The solution of the Navier-Stokes equations using FEniCS did not converge, and the initial pressure error is large

Hey guys.
I am solving the Navier-Stokes equations using FEniCS, with version 2019.2. The exact solution I am using is the Taylor-Green vortex, and the method I am referring to is Longfei Li’s ‘A Split-Step Finite-Element Method for Incompressible Navier-Stokes Equations with High-Order Accuracy up to the Boundary’, as shown in the figure below.

However, I am encountering issues with non-convergence for both velocity and pressure. According to the error outputs, even the initial pressure doesn’t converge. Could anyone help identify where the problem might lie? Thank you very much.
The output results from the previous runs are:

*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
t = 0.001: Velocity L2 error = 2.713e-04, max error = 3.274e-04
t = 0.001: Pressure L2 error = 2.972e-01, max error = 5.302e-01
*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
t = 0.002: Velocity L2 error = 9.010e-04, max error = 1.096e-03
t = 0.002: Pressure L2 error = 2.969e-01, max error = 5.299e-01
*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
*** Warning: Degree of exact solution may be inadequate for accurate result in errornorm.
t = 0.003: Velocity L2 error = 1.530e-03, max error = 1.852e-03
t = 0.003: Pressure L2 error = 2.968e-01, max error = 5.296e-01

Here is the complete code:

from fenics import *
import numpy as np

# Parameter settings
T = 10.0  # Final time
num_steps = 10000  # Number of time steps
dt = T / num_steps  # Time step size
mu = 0.01  # Kinematic viscosity
rho = 1.0  # Density
t = 0  # Initial time

# Create mesh and define function spaces
mesh = UnitSquareMesh(32, 32)  # Fine mesh
h_min = mesh.hmin()
alpha = Constant(0.01 * h_min ** -2)  # Set alpha inversely proportional to grid spacing squared

V = VectorFunctionSpace(mesh, 'P', 2)  # Velocity space
Q = FunctionSpace(mesh, 'P', 1)  # Pressure space
R = FunctionSpace(mesh, 'R', 0)  # Lagrange multiplier space (constant space)
P_element = MixedElement([Q.ufl_element(), R.ufl_element()])
W = FunctionSpace(mesh, P_element)
boundary = "on_boundary"

# Define Taylor-Green vortex analytical solution
ue = ("-cos(x[0])*sin(x[1])*exp(-2.0*mu*t)",
      "sin(x[0])*cos(x[1])*exp(-2.0*mu*t)",)
pe = "-0.25*(cos(2*x[0])+cos(2*x[1]))*exp(-4.0*mu*t)"
due_dt = ("2.0*mu*cos(x[0])*sin(x[1])*exp(-2.0*mu*t)",
          "-2.0*mu*sin(x[0])*cos(x[1])*exp(-2.0*mu*t)")

# Create expressions for analytical solutions
u_exact = Expression(ue, degree=2, t=t, mu=mu, rho=rho)
p_exact = Expression(pe, degree=2, t=t, mu=mu, rho=rho)
g = Expression(ue, degree=2, t=t, mu=mu, rho=rho)
dg_dt = Expression(due_dt, degree=2, t=t, mu=mu, rho=rho)
fe = ('0', '0')
f_exact = Expression(fe, degree=2, t=t, mu=mu, rho=rho, pi=np.pi)

# Use analytical solution as boundary condition
bcu = DirichletBC(V, u_exact, boundary)

# Define trial and test functions
u = TrialFunction(V)
v = TestFunction(V)
p_lambda_trial = TrialFunction(W)
q_mu = TestFunction(W)
p, lambda_ = split(p_lambda_trial)
q, miu = split(q_mu)

# Define solution functions for each step
u_n = Function(V)
u_0 = Function(V)
u_1 = Function(V)
u_p = Function(V)
p_n = Function(Q)
p_0 = Function(Q)
p_1 = Function(Q)
p_p = Function(Q)
f_n = Function(V)
f_0 = Function(V)
f_1 = Function(V)
p_lambda = Function(W)  # For mixed space solution (p, λ)

# Initialize conditions consistent with the analytical solution
u_exact.t = 0
p_exact.t = 0
f_exact.t = 0
u_0 = interpolate(u_exact, V)
p_0 = interpolate(p_exact, Q)
f_0 = interpolate(f_exact, V)
u_exact.t = dt
p_exact.t = dt
f_exact.t = dt
u_n = interpolate(u_exact, V)
p_n = interpolate(p_exact, Q)
f_n = interpolate(f_exact, V)

# Define some constants for the variational form
n = FacetNormal(mesh)
k = Constant(dt)
mu_const = Constant(mu)
rho_const = Constant(rho)

# Define L operator in the variational form
def L_operator(uu, pp, vv):
    return (-rho_const * inner(dot(uu, nabla_grad(uu)), vv)
            - inner(nabla_grad(pp), vv)
            - mu_const * inner(nabla_grad(uu), nabla_grad(vv)))

# Step 1: Temporary velocity step
F1 = rho_const * dot((u - u_n) / k, v) * dx - \
     Constant(1.5) * L_operator(u_n, p_n, v) * dx - \
     Constant(1.5) * inner(f_n, v) * dx + \
     Constant(0.5) * L_operator(u_0, p_0, v) * dx + \
     Constant(0.5) * inner(f_0, v) * dx

a1 = lhs(F1)
L1 = rhs(F1)

# Step 2: Pressure correction step
boundary_term1 = (
        inner(dot(n, -rho * (dg_dt - dot(g, nabla_grad(u_p))) + f_1), q) * ds +
        mu_const * curl(u_p) * (n[0]*nabla_grad(q)[1] - n[1]*nabla_grad(q)[0]) * ds
)
a2 = (-inner(nabla_grad(p), nabla_grad(q)) * dx
     + lambda_ * inner(Constant(1.0), q) * dx
     + inner(p, miu) * dx)
L2 = (inner(-rho * inner(nabla_grad(u_p), nabla_grad(u_p).T) + div(f_1) + alpha * div(u_p), q) * dx
     - boundary_term1)

# Step 3: Velocity correction step
F3 = rho_const * dot((u - u_n) / k, v) * dx - \
     Constant(0.5) * L_operator(u_n, p_n, v) * dx - \
     Constant(0.5) * inner(f_n, v) * dx - \
     Constant(0.5) * L_operator(u_p, p_p, v) * dx - \
     Constant(0.5) * inner(f_1, v) * dx

a3 = lhs(F3)
L3 = rhs(F3)

# Step 4: Pressure update
boundary_term2 = (
        inner(dot(n, -rho * (dg_dt - dot(g, nabla_grad(u_1))) + f_1), q) * ds +
        mu_const * curl(u_1) * (n[0]*nabla_grad(q)[1] - n[1]*nabla_grad(q)[0]) * ds
)
a4 = (-inner(nabla_grad(p), nabla_grad(q)) * dx
     + lambda_ * inner(Constant(1.0), q) * dx
     + inner(p, miu) * dx)
L4 = (inner(-rho * inner(nabla_grad(u_1), nabla_grad(u_1).T) + div(f_1) + alpha * div(u_1), q) * dx
     - boundary_term2)

# Assemble matrices
A1 = assemble(a1)
A2 = assemble(a2)
A3 = assemble(a3)
A4 = assemble(a4)

errors_u = []
errors_p = []
times = []

# Create file for saving results
vtkfile_u = File('taylor_green/velocity.pvd')
vtkfile_p = File('taylor_green/pressure.pvd')
vtkfile_error = File('taylor_green/error.pvd')

# Time-stepping loop
t=2*dt
while t < T - 1e-12:
    t += dt  # Update time
    u_exact.t = t
    p_exact.t = t
    f_exact.t = t
    g.t = t
    dg_dt.t = t
    f_1 = interpolate(f_exact, V)

    # Apply boundary conditions
    bcu.apply(A1)
    bcu.apply(A3)

    # Step 1: Temporary velocity step
    b1 = assemble(L1)
    bcu.apply(b1)
    solve(A1, u_p.vector(), b1)

    # Step 2: Pressure correction step
    b2 = assemble(L2)
    solve(A2, p_lambda.vector(), b2)
    p_tmp = p_lambda.sub(0)
    p_p = project(p_tmp, Q)

    # Step 3: Velocity correction step
    b3 = assemble(L3)
    bcu.apply(b3)
    solve(A3, u_1.vector(), b3)

    # Step 4: Pressure update
    b4 = assemble(L4)
    solve(A4, p_lambda.vector(), b4)
    p_tmp = p_lambda.sub(0)
    p_1 = project(p_tmp, Q)

    # Calculate errors
    u_e = interpolate(u_exact, V)
    p_e = interpolate(p_exact, Q)

    u_error = errornorm(u_e, u_1, 'L2')
    p_error = errornorm(p_e, p_1, 'L2')

    u_diff = Function(V)
    u_diff.vector()[:] = u_1.vector() - u_e.vector()
    u_error_max = np.abs(u_diff.vector().get_local()).max()

    p_diff = Function(Q)
    p_diff.vector()[:] = p_1.vector() - p_e.vector()
    p_error_max = np.abs(p_diff.vector().get_local()).max()

    # Log errors
    errors_u.append(u_error)
    errors_p.append(p_error)
    times.append(t)
    print(f't = {t:.3f}: Velocity L2 error = {u_error:.3e}, max error = {u_error_max:.3e}')
    print(f't = {t:.3f}: Pressure L2 error = {p_error:.3e}, max error = {p_error_max:.3e}')

    # 
    u_0.assign(u_n)
    p_0.assign(p_n)
    f_0.assign(f_n)
    u_n.assign(u_1)
    p_n.assign(p_1)
    f_n.assign(f_1)