Getting Problem to get good results in Cahn-Hiliard equation code for P2 elements

Hello, everyone, i have used the demo code ideas to write in code of the Cahn-Hilliard equation code but i am getting problem to get good results using P2 elements. But i am getting good results using P1 elements.
Here, is my full code>

import os

try:
    from petsc4py import PETSc

    import dolfinx

    if not dolfinx.has_petsc:
        print("This demo requires DOLFINx to be compiled with PETSc enabled.")
        exit(0)
except ModuleNotFoundError:
    print("This demo requires petsc4py.")
    exit(0)

from mpi4py import MPI

import numpy as np

import ufl
from basix.ufl import element, mixed_element
from dolfinx import default_real_type, log, plot
from dolfinx.fem import Function, functionspace
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.io import XDMFFile
from dolfinx.mesh import CellType, create_unit_square
from dolfinx import mesh, fem, io
from dolfinx.nls.petsc import NewtonSolver

try:
    import pyvista as pv
    import pyvistaqt as pvqt

    have_pyvista = True
    if pv.OFF_SCREEN:
        pv.start_xvfb(wait=0.5)
except ModuleNotFoundError:
    print("pyvista and pyvistaqt are required to visualise the solution")
    have_pyvista = False

# Save all logging to file
log.set_output_file("log.txt")
def run_simulation(N, dt, T=0.2, degree=2, p=1.0):
    msh = create_unit_square(MPI.COMM_WORLD, N, N, CellType.quadrilateral)
    #msh = create_unit_square(MPI.COMM_WORLD, N, N, CellType.triangle)
    n = ufl.FacetNormal(msh)  # Add this right after creating the mesh
    P2 = element("Lagrange", msh.basix_cell(), 2, dtype=default_real_type)
    ME = functionspace(msh, mixed_element([P2, P2]))
    V = functionspace(msh, P2)

    # Variational problem components
    phi, psi = ufl.TestFunctions(ME)
    
    
    
    class exact_solution_u:
        def __init__(self, t):
            self.t = t
        def __call__(self, x):
            return (np.exp(-self.t) * x[0]**2*x[1]**2*(1-x[0])**2*(1-x[1])**2)
     
    
    class exact_solution_w:
        def __init__(self, t):
            self.t = t
        def __call__(self, x):
            return (2*x[0]**2*x[1]**2*np.exp(-self.t)*(x[0] - 1)**2*(x[1] - 1)**2 
                   - 2*x[0]**2*x[1]**2*np.exp(-self.t)*(x[1] - 1)**2 
                   - 2*x[0]**2*np.exp(-self.t)*(x[0] - 1)**2*(x[1] - 1)**2 
                   - 2*x[1]**2*np.exp(-self.t)*(x[0] - 1)**2*(x[1] - 1)**2 
                   - 4*x[0]*x[1]**2*np.exp(-self.t)*(2*x[0] - 2)*(x[1] - 1)**2 
                   - 4*x[0]**2*x[1]*np.exp(-self.t)*(2*x[1] - 2)*(x[0] - 1)**2 
                   - 2*x[0]**2*x[1]**2*np.exp(-self.t)*(x[0] - 1)**2 
                   - 6*x[0]**4*x[1]**4*np.exp(-2*self.t)*(x[0] - 1)**4*(x[1] - 1)**4 
                   + 4*x[0]**6*x[1]**6*np.exp(-3*self.t)*(x[0] - 1)**6*(x[1] - 1)**6)
                

   

    

    def g_expr(x, t):
        return (8*np.exp(-t)*(x[0]-1)**2*(x[1]-1)**2 + 8*x[0]**2*x[1]**2*np.exp(-t) 
               +24*x[0]**2*np.exp(-t)*(x[0]-1)**2 + 8*x[0]**2*np.exp(-t)*(x[1]-1)**2  
               +8*x[1]**2*np.exp(-t)*(x[0]-1)**2 + 24*x[1]**2*np.exp(-t)*(x[1]-1)**2 
               +16*x[0]*x[1]**2*np.exp(-t)*(2*x[0]-2)+16*x[0]**2*x[1]*np.exp(-t)*(2*x[1]-2) 
               +16*x[0]*np.exp(-t)*(2*x[0]-2)*(x[1]-1)**2 - 4*x[0]**2*x[1]**2*np.exp(-t)*(x[0]-1)**2  
               +16*x[1]*np.exp(-t)*(2*x[1]-2)*(x[0]-1)**2 - 4*x[0]**2*x[1]**2*np.exp(-t)*(x[1]-1)**2 
               -4*x[0]**2*np.exp(-t)*(x[0]-1)**2*(x[1]-1)**2 - 4*x[1]**2*np.exp(-t)*(x[0]-1)**2*(x[1]-1)**2  
               -8*x[0]*x[1]**2*np.exp(-t)*(2*x[0]-2)*(x[1]-1)**2 - 8*x[0]**2*x[1]*np.exp(-t)*(2*x[1]-2)*(x[0]-1)**2 
               -x[0]**2*x[1]**2*np.exp(-t)*(x[0]-1)**2*(x[1]-1)**2 +72*x[0]**2*x[1]**4*np.exp(-2*t)*(x[0]-1)**4*(x[1]-1)**4 
               +192*x[0]**3*x[1]**4*np.exp(-2*t)*(x[0]-1)**3*(x[1]-1)**4 +72*x[0]**4*x[1]**2*np.exp(-2*t)*(x[0]-1)**4*(x[1]-1)**4 
               +192*x[0]**4*x[1]**3*np.exp(-2*t)*(x[0]-1)**4*(x[1]-1)**3 +72*x[0]**4*x[1]**4*np.exp(-2*t)*(x[0]-1)**2*(x[1]-1)**4 
               +72*x[0]**4*x[1]**4*np.exp(-2*t)*(x[0]-1)**4*(x[1]-1)**2 -120*x[0]**4*x[1]**6*np.exp(-3*t)*(x[0]-1)**6*(x[1]-1)**6 
               -288*x[0]**5*x[1]**6*np.exp(-3*t)*(x[0]-1)**5*(x[1]-1)**6-120*x[0]**6*x[1]**4*np.exp(-3*t)*(x[0]-1)**6*(x[1]-1)**6 
               -288*x[0]**6*x[1]**5*np.exp(-3*t)*(x[0]-1)**6*(x[1]-1)**5-120*x[0]**6*x[1]**6*np.exp(-3*t)*(x[0]-1)**4*(x[1]-1)**6 
               -120*x[0]**6*x[1]**6*np.exp(-3*t)*(x[0]-1)**6*(x[1]-1)**4+32*x[0]*x[1]*np.exp(-t)*(2*x[0]-2)*(2*x[1]-2))

    g =Function(V)
    
    
    
    t = 0
    u_exact = exact_solution_u(t)
    w_exact = exact_solution_w(t)
    
    
    #t = 0
    #u_exact = exact_solution(t)
   
    u_D = fem.Function(V)
    u_D.interpolate(u_exact)

    tdim = msh.topology.dim
    fdim = tdim - 1
    msh.topology.create_connectivity(fdim, tdim)
    boundary_facets = mesh.exterior_facet_indices(msh.topology)
    bc = fem.dirichletbc(u_D, fem.locate_dofs_topological(V, fdim, boundary_facets))
   
    
    


    
    
    
    # Initial conditions
    u_n = fem.Function(V)
    u_n.interpolate(u_exact)
    w_n = fem.Function(V)
    w_n.interpolate(w_exact)
    
    
    
    
    
    
    # Solution functions
    v = fem.Function(ME)
    v0 = fem.Function(ME)  # solution from previous converged step
    #u, w = vh.split()
    
    
    # Split mixed functions
    u, w = ufl.split(v)
    u0, w0 = ufl.split(v0)
    
    
    # Zero u
    v.x.array[:] = 0.0

    
    # The chemical potential $df/dc$ is computed using UFL automatic
    # differentiation:
    # Compute the chemical potential df/dc
    u = ufl.variable(u)
    f=u**2*(1-u)**2
    #f=(1.0/4)*(1-u**2)**(1-u**2)
    dfdu = ufl.diff(f, u)
    
    # It is convenient to introduce an expression for $w_{n+\theta}$:
    # w_(n+theta)
    w_mid =w 
    
    
        # which is then used in the definition of the variational forms:
    # Weak statement of the equations
    F0 = (ufl.inner(u, phi) * ufl.dx- ufl.inner(u_n, phi) * ufl.dx
         + dt * ufl.inner(ufl.grad(w_mid), ufl.grad(phi)) * ufl.dx-dt*ufl.inner(g, phi) * ufl.dx
         - dt * ufl.inner(ufl.dot(ufl.grad(w_mid), n), phi) * ufl.ds)  # Added boundary term
    F1 = (ufl.inner(w, psi) * ufl.dx- ufl.inner(dfdu, psi) * ufl.dx
         - p*ufl.inner(ufl.grad(u), ufl.grad(psi)) * ufl.dx-p*ufl.inner(ufl.dot(ufl.grad(u), n), psi)* ufl.ds)
    F = F0 + F1
    
    # Create nonlinear problem and Newton solver
    problem = NonlinearProblem(F, v)
    solver = NewtonSolver(MPI.COMM_WORLD, problem)
    solver.convergence_criterion = "incremental"
    solver.rtol = np.sqrt(np.finfo(default_real_type).eps) * 1e-1
    
    
    # We can customize the linear solver used inside the NewtonSolver by
    # modifying the PETSc options
    ksp = solver.krylov_solver
    opts = PETSc.Options()  # type: ignore
    option_prefix = ksp.getOptionsPrefix()
    opts[f"{option_prefix}ksp_type"] = "preonly"
    opts[f"{option_prefix}pc_type"] = "lu"
    sys = PETSc.Sys()  # type: ignore
    # For factorisation prefer superlu_dist, then MUMPS, then default
    if sys.hasExternalPackage("superlu_dist"):
        opts[f"{option_prefix}pc_factor_mat_solver_type"] = "superlu_dist"
    elif sys.hasExternalPackage("mumps"):
        opts[f"{option_prefix}pc_factor_mat_solver_type"] = "mumps"
    ksp.setFromOptions()
    
 
    # Time stepping
    num_steps = int(T / dt)
    for n in range(num_steps):
        t += dt
 
    
        # Assign the time parameter if needed (for time-dependent expressions)
        u_exact.t = t
        w_exact.t = t
        u_D.interpolate(u_exact)
        
        
        
        #g.interpolate(g_expr)
        #f.interpolate(lambda x: f_expr(x, t))  # Pass current time to f_expr
        g.interpolate(lambda x: g_expr(x, t))  # Evaluates at midpoint
        
        
        
        # Update previous solution
        #v_n.x.array[:] = v.x.array
        v0.x.array[:] = v.x.array
        
    
        # Solve
        #solver.solve(v)
        # Solve with BCs, boundary conditions are already handled by the NonlinearProblem
        solver.solve(v)
        u, w = v.split()
        
    
        
    
    V_ex = functionspace(msh, ("Lagrange", degree + 1))
    u_ex = Function(V_ex)
    w_ex = Function(V_ex)
    u_ex.interpolate(u_exact)
    w_ex.interpolate(w_exact)

    # Compute L2 errors
    erroru = np.sqrt(msh.comm.allreduce(
            fem.assemble_scalar(fem.form((v.sub(0) - u_ex)**2 * ufl.dx)), op=MPI.SUM))
    errorw = np.sqrt(msh.comm.allreduce(
            fem.assemble_scalar(fem.form((v.sub(1) - w_ex)**2 * ufl.dx)), op=MPI.SUM))

    h = 1.0 / N
    return h, dt, erroru, errorw  # Now inside a function
if __name__ == "__main__":
    #Ns=[8,16]
    Ns =[4, 8, 16, 32]
    #Ns =[8, 16, 32, 64]
    hs, dts, errorsu, errorsw = [], [], [], []

    for N in Ns:
        h = 1.0 / N
        dt=h**3
        #dt = h**1.5  # Less aggressive refinement for N≥32
        #dt = h**2
        h_val, dt_val, erroru, errorw = run_simulation(N, dt)
        hs.append(h_val)
        dts.append(dt_val)
        errorsu.append(erroru)
        errorsw.append(errorw)
        if MPI.COMM_WORLD.rank == 0:
            print(f"N={N:2d}, h={h_val:.3e}, dt={dt_val:.3e}, L2 erroru={erroru:.3e}, L2 errorw={errorw:.3e}")

    # Compute convergence rate of u
    if MPI.COMM_WORLD.rank == 0:
        print("\nConvergence rates (combined h and dt):")
        for i in range(1, len(errorsu)):
            ratespu = np.log(errorsu[i-1] / errorsu[i]) / np.log(hs[i-1] / hs[i])
            print(f"From N={Ns[i-1]} to N={Ns[i]}: ratespu ≈ {ratespu:.2f}")
            ratetimeu = np.log(errorsu[i-1] / errorsu[i]) / np.log(dts[i-1] / dts[i])
            print(f"From dt={dts[i-1]:.3e} to dt={dts[i]:.3e}: ratetimeu ≈ {ratetimeu:.2f}")
            
    # Compute convergence rate of w
    if MPI.COMM_WORLD.rank == 0:
        print("\nConvergence rates (combined h and dt):")
        for i in range(1, len(errorsw)):
            ratespw = np.log(errorsw[i-1] / errorsw[i]) / np.log(hs[i-1] / hs[i])
            print(f"From N={Ns[i-1]} to N={Ns[i]}: ratespw ≈ {ratespw:.2f}")
            ratetimew = np.log(errorsw[i-1] / errorsw[i]) / np.log(dts[i-1] / dts[i])
            print(f"From dt={dts[i-1]:.3e} to dt={dts[i]:.3e}: ratetimew ≈ {ratetimew:.2f}")

Results (using P1 elements):

Errors for P2 element:

( It is the same code. I have just change P1 to P2 but then this errors is coming).

Please help how to get good results using P2 element also.

Thanks in advance!.

(post deleted by author)