Solve the Kovasznay flow using the incremental pressure correction scheme (IPCS)

Hi,
I am attempting to solve the Kovasznay flow in Divergence conforming discontinuous Galerkin method for the Navier–Stokes equations — DOLFINx 0.9.0 documentation using the incremental pressure correction scheme (IPCS) in Test problem 2: Flow past a cylinder (DFG 2D-3 benchmark) — FEniCSx tutorial.
But I can’t get the correct result. Could you help me find out where the problem lies? Thank you.
Here is my code:

from mpi4py import MPI
from petsc4py import PETSc
import numpy as np

import dolfinx
from basix.ufl import element
from dolfinx import fem,mesh
from dolfinx.fem import (Constant, Function, functionspace,
                         assemble_scalar, dirichletbc, form, locate_dofs_topological, set_bc)
from dolfinx.fem.petsc import (apply_lifting, assemble_matrix, assemble_vector,
                               create_vector, create_matrix, set_bc)
from dolfinx.io import (VTXWriter, distribute_entity_data, gmshio)
from ufl import (FacetNormal, Identity, Measure, TestFunction, TrialFunction,
                 as_vector, div, dot, ds, dx, inner, lhs, grad, nabla_grad, rhs, sym, system)

def norm_L2(comm, v):
    """Compute the L2(Ω)-norm of v"""
    return np.sqrt(comm.allreduce(fem.assemble_scalar(fem.form(inner(v, v) * dx)), op=MPI.SUM))

n = 20
mesh = mesh.create_unit_square(MPI.COMM_WORLD, n, n)
gdim = mesh.geometry.dim

t = 0
T = 10                       # Final time
dt = 0.1                # Time step size
num_steps = int(T / dt)
k = Constant(mesh, PETSc.ScalarType(dt))
Re=100

v_cg2 = element("Lagrange", mesh.topology.cell_name(), 2, shape=(mesh.geometry.dim, ))
s_cg1 = element("Lagrange", mesh.topology.cell_name(), 1)
V = functionspace(mesh, v_cg2)
Q = functionspace(mesh, s_cg1)

fdim = mesh.topology.dim - 1

# Define boundary conditions
def u_e_expr(x):
    """Expression for the exact velocity solution to Kovasznay flow"""
    return np.vstack((1 - np.exp(
        (Re / 2 - np.sqrt(Re**2 / 4 + 4 * np.pi**2)) * x[0]) * np.cos(2 * np.pi * x[1]),
        (Re / 2 - np.sqrt(Re**2 / 4 + 4 * np.pi**2)) / (2 * np.pi) * np.exp(
            (Re / 2 - np.sqrt(Re**2 / 4 + 4 * np.pi**2)) * x[0])
        * np.sin(2 * np.pi * x[1])  ))


u_D = fem.Function(V)
u_D.interpolate(u_e_expr)


def boundary_marker(x):
    return np.isclose(x[0], 0.0)| np.isclose(x[0], 1.0) | np.isclose(x[1], 0.0) | np.isclose(x[1], 1.0)

boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, mesh.topology.dim - 1, boundary_marker)
boundary_vel_dofs = fem.locate_dofs_topological(V, mesh.topology.dim - 1, boundary_facets)
bc_u = fem.dirichletbc(u_D, boundary_vel_dofs)
bcu = [bc_u]

p_D=fem.Function(Q)
boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, mesh.topology.dim - 1, lambda x:np.isclose(x[0], 0.0) & np.isclose(x[1], 0.0))
boundary_p_dofs = fem.locate_dofs_topological(Q, mesh.topology.dim - 1, boundary_facets)
bc_p = fem.dirichletbc(p_D, boundary_p_dofs)
bcp=[bc_p]

u = TrialFunction(V)
v = TestFunction(V)
u_ = Function(V)
u_.name = "u"
u_s = Function(V)
u_n = Function(V)
u_n1 = Function(V)
p = TrialFunction(Q)
q = TestFunction(Q)
p_ = Function(Q)
p_.name = "p"
phi = Function(Q)

f = Constant(mesh, PETSc.ScalarType((0, 0)))
F1 = 1 / k * dot(u - u_n, v) * dx
F1 += inner(dot(1.5 * u_n - 0.5 * u_n1, 0.5 * nabla_grad(u_n1 + u_n)), v) * dx
F1 += 0.5 /Re * inner(grad(u + u_n), grad(v)) * dx - dot(p_, div(v)) * dx
F1 += dot(f, v) * dx
a1 = form(lhs(F1))
L1 = form(rhs(F1))
A1 = assemble_matrix(a1,bcs=bcu)
A1.assemble()
b1 = create_vector(L1)

a2 = form(dot(grad(p), grad(q)) * dx)
L2 = form(-1 / k * dot(div(u_s), q) * dx)
A2 = assemble_matrix(a2, bcs=bcp)
A2.assemble()
b2 = create_vector(L2)

a3 = form(dot(u, v) * dx)
L3 = form(dot(u_s, v) * dx - k * dot(nabla_grad(phi), v) * dx)
A3 = assemble_matrix(a3)
A3.assemble()
b3 = create_vector(L3)

# Solver for step 1
solver1 = PETSc.KSP().create(mesh.comm)
solver1.setOperators(A1)
solver1.setType(PETSc.KSP.Type.BCGS)
pc1 = solver1.getPC()
pc1.setType(PETSc.PC.Type.JACOBI)

# Solver for step 2
solver2 = PETSc.KSP().create(mesh.comm)
solver2.setOperators(A2)
solver2.setType(PETSc.KSP.Type.MINRES)
pc2 = solver2.getPC()
pc2.setType(PETSc.PC.Type.HYPRE)
pc2.setHYPREType("boomeramg")

# Solver for step 3
solver3 = PETSc.KSP().create(mesh.comm)
solver3.setOperators(A3)
solver3.setType(PETSc.KSP.Type.CG)
pc3 = solver3.getPC()
pc3.setType(PETSc.PC.Type.SOR)


vtx_u = VTXWriter(mesh.comm, "dfg2D-3-u.bp", [u_], engine="BP4")
vtx_p = VTXWriter(mesh.comm, "dfg2D-3-p.bp", [p_], engine="BP4")
vtx_u.write(t)
vtx_p.write(t)

for i in range(num_steps):
    
    # Update current time step
    t += dt


    # Step 1: Tentative velocity step
    A1.zeroEntries()
    assemble_matrix(A1, a1, bcs=bcu)
    A1.assemble()
    with b1.localForm() as loc:
        loc.set(0)
    assemble_vector(b1, L1)
    apply_lifting(b1, [a1], [bcu])
    b1.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
    set_bc(b1, bcu)
    solver1.solve(b1, u_s.x.petsc_vec)
    u_s.x.scatter_forward()

    # Step 2: Pressure corrrection step
    with b2.localForm() as loc:
        loc.set(0)
    assemble_vector(b2, L2)
    apply_lifting(b2, [a2], [bcp])
    b2.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
    set_bc(b2, bcp)
    solver2.solve(b2, phi.x.petsc_vec)
    phi.x.scatter_forward()

    p_.x.petsc_vec.axpy(1, phi.x.petsc_vec)
    p_.x.scatter_forward()

    # Step 3: Velocity correction step
    with b3.localForm() as loc:
        loc.set(0)
    assemble_vector(b3, L3)
    b3.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
    solver3.solve(b3, u_.x.petsc_vec)
    u_.x.scatter_forward()

    # Write solutions to file
    vtx_u.write(t)
    vtx_p.write(t)

    # Update variable with solution form this time step
    with u_.x.petsc_vec.localForm() as loc_, u_n.x.petsc_vec.localForm() as loc_n, u_n1.x.petsc_vec.localForm() as loc_n1:
        loc_n.copy(loc_n1)
        loc_.copy(loc_n)


vtx_u.close()
vtx_p.close()


div_u = norm_L2(mesh.comm, div(u_))
if mesh.comm.rank == 0:
    print(f"div_u = {div_u}")