I have tried HHO implementation but not getting the superconvergnce as expected for each k

#HHO DIFFUSION

#Hybrid High-Order method for diffusion problem

from mpi4py import MPI

from dolfinx import mesh, fem, io, log

import ufl

import basix

import numpy as np

import matplotlib.pyplot as plt

from scipy.sparse import lil_matrix, csr_matrix, coo_matrix

from scipy.sparse.linalg import bicgstab, spsolve

from numpy.polynomial.legendre import leggauss

import time

import warnings

import itertools

warnings.filterwarnings(“ignore”)

# PROBLEM DEFINITION

def u_exact(x):

"""Exact solution"""

return np.sin(np.pi \* x\[0\]) \* np.sin(np.pi \* x\[1\])

def grad_exact(x):

"""Gradient of exact solution"""

return np.array(\[

    np.pi \* np.cos(np.pi \* x\[0\]) \* np.sin(np.pi \* x\[1\]),

    np.pi \* np.sin(np.pi \* x\[0\]) \* np.cos(np.pi \* x\[1\])

\])

def source(x):

"""Source term f(x)"""

return 2 \* np.pi\*\*2 \* np.sin(np.pi \* x\[0\]) \* np.sin(np.pi \* x\[1\])

def kappa(x):

"""Diffusion tensor (piecewise constant)"""

return np.eye(2)

def bc_dirichlet(x):

"""Dirichlet boundary condition"""

return u_exact(x)

def bc_neumann(x):

"""Neumann boundary condition (zero)"""

return 0.0

# ============================================================

# POLYNOMIAL UTILITIES - UPDATED WITH HIERARCHICAL BASIS

# ============================================================

def dim_poly(k, d=2):

if k < 0:

    return 0

if d == 2:

    return (k + 1) \* (k + 2) // 2

else:

    return (k + 1) \* (k + 2) \* (k + 3) // 6

def poly_basis_2d(k, x, y, hierarchical=False):

basis = \[\]

if hierarchical:

    \# Constant mode: 1

    basis.append(1.0)

    \# Gradient modes: polynomials with degree >= 1

    for degree in range(1, k + 1):

        for i in range(degree + 1):

            j = degree - i

            basis.append((x \*\* i) \* (y \*\* j))

else:

    \# Standard basis: all monomials (degree 0 to k)

    for degree in range(k + 1):

        for i in range(degree + 1):

            j = degree - i

            basis.append((x \*\* i) \* (y \*\* j))

return np.array(basis)

def poly_grad_basis_2d(k, x, y, hierarchical=False):

grad = \[\]

if hierarchical:

    \# Constant mode gradient: 0

    grad.append(np.array(\[0.0, 0.0\]))

    \# Gradient modes: gradients of monomials with degree >= 1

    for degree in range(1, k + 1):

        for i in range(degree + 1):

            j = degree - i

            if i > 0:

                dx = i \* (x \*\* (i - 1)) \* (y \*\* j)

            else:

                dx = 0.0

            if j > 0:

                dy = j \* (x \*\* i) \* (y \*\* (j - 1))

            else:

                dy = 0.0

            grad.append(np.array(\[dx, dy\]))

else:

    \# Standard gradients: gradients of all monomials (degree 0 to k)

    for degree in range(k + 1):

        for i in range(degree + 1):

            j = degree - i

            if i > 0:

                dx = i \* (x \*\* (i - 1)) \* (y \*\* j)

            else:

                dx = 0.0

            if j > 0:

                dy = j \* (x \*\* i) \* (y \*\* (j - 1))

            else:

                dy = 0.0

            grad.append(np.array(\[dx, dy\]))

return np.array(grad)

def poly_basis_edge(k, s):

"""Evaluate polynomial basis P^k on edge at parameter s"""

return np.array(\[s \*\* i for i in range(k + 1)\])

# GAUSS-LEGENDRE QUADRATURE FOR EDGES (USING BASIX)

def gauss_legendre_1d_basix(n):

points, weights = basix.make_quadrature(

    basix.CellType.interval,  # 1st: CellType

    n                         # 2nd: degree (int)

)



\# Map from \[-1, 1\] to \[0, 1\]

points = 0.5 \* (points + 1.0)

weights = 0.5 \* weights

return points.flatten(), weights.flatten()

def quadrature_edge(n):

"""Quadrature for edge using Gauss-Legendre"""

return gauss_legendre_1d_basix(n)

def quadrature_triangle(degree):

\# Basix gives points and weights on the reference triangle

points, weights = basix.make_quadrature(

    basix.CellType.triangle,  # 2D triangle

    degree                    # quadrature degree

)

return points, weights

# Mesh utilities

def get_cell_vertices(domain, cell_idx):

"""Get vertices of a cell"""

connectivity = domain.topology.connectivity(domain.topology.dim, 0)

return connectivity.links(cell_idx)

def get_cell_edges(domain, cell_idx):

"""Get edges of a cell"""

connectivity = domain.topology.connectivity(domain.topology.dim, 1)

return connectivity.links(cell_idx)

def get_edge_vertices(domain, edge_idx):

"""Get vertices of an edge"""

connectivity = domain.topology.connectivity(1, 0)

return connectivity.links(edge_idx)

def get_edge_cells(domain, edge_idx):

"""Get cells adjacent to an edge"""

connectivity = domain.topology.connectivity(1, domain.topology.dim)

return connectivity.links(edge_idx)

def get_cell_center(domain, cell_idx):

"""Get center of a cell"""

vertices = get_cell_vertices(domain, cell_idx)

coords = domain.geometry.x

return np.mean(coords\[vertices\], axis=0)

def get_edge_center(domain, edge_idx):

"""Get center of an edge"""

vertices = get_edge_vertices(domain, edge_idx)

coords = domain.geometry.x

return np.mean(coords\[vertices\], axis=0)

def get_cell_measure(domain, cell_idx):

"""Get area of a cell"""

vertices = get_cell_vertices(domain, cell_idx)

coords = domain.geometry.x\[vertices\]\[:, :2\]

v0, v1, v2 = coords

return 0.5 \* abs(np.cross(v1 - v0, v2 - v0))

def get_edge_measure(domain, edge_idx):

"""Get length of an edge"""

vertices = get_edge_vertices(domain, edge_idx)

coords = domain.geometry.x\[vertices\]

return np.linalg.norm(coords\[1\] - coords\[0\])

def get_edge_normal(domain, cell_idx, edge_idx):

"""Get outward normal of an edge with respect to a cell"""

cell_center = get_cell_center(domain, cell_idx)

edge_verts = get_edge_vertices(domain, edge_idx)

coords = domain.geometry.x\[edge_verts\]

edge_vec = coords\[1\] - coords\[0\]

normal = np.array(\[-edge_vec\[1\], edge_vec\[0\]\])

normal = normal / (np.linalg.norm(normal) + 1e-12)

edge_center = get_edge_center(domain, edge_idx)

to_cell = cell_center - edge_center

if np.dot(normal, to_cell\[:2\]) < 0:

    normal = -normal

return normal

def point_on_edge(domain, edge_idx, s):

"""Get point on edge at parameter s ∈ \[0,1\]"""

vertices = get_edge_vertices(domain, edge_idx)

coords = domain.geometry.x\[vertices\]

return coords\[0\] + s \* (coords\[1\] - coords\[0\])

def is_boundary_edge(domain, edge_idx):

"""Check if an edge is on the boundary"""

cells_adjacent = get_edge_cells(domain, edge_idx)

return len(cells_adjacent) == 1

# ELEMENT QUADRATURE CLASS

class ElementQuad:

def \__init_\_(self, domain, cell_idx, K, L):

    self.domain = domain

    self.cell_idx = cell_idx

    self.K = K

    self.Ldeg = max(L, 0)

    

    \# Create topology if needed

    domain.topology.create_entities(0)

    domain.topology.create_entities(1)

    domain.topology.create_entities(2)

    domain.topology.create_connectivity(domain.topology.dim, 1)

    domain.topology.create_connectivity(1,0)

    self.edges = get_cell_edges(domain, cell_idx)

    self.n_edges = len(self.edges)

    \# Determine quadrature rule

    self.doeT = self.Ldeg + self.K+1 #Degree of exactness needed for cell integrals

    self.doeF = 2 \* self.K +1 # Degree of exactness needed for face integrals



    \# Minimum quadrature needed for stiffness and mass matrices

    self.quad_rule = max(self.doeT, self.doeF) #Take the maximum of cell and face requirements

    

    self.quad_rule = max(self.quad_rule, 2\*(self.K+1)) #Ensure sufficient accuracy for L² error computation



    self.quad_rule = min(self.quad_rule, 20) # Cap at Dunavant maximum (20)

    

    \# Cell quadrature using Dunavant rules

    self.quadT_nodes, self.quadT_weights_ref = quadrature_triangle(self.quad_rule)

    

    vertices = get_cell_vertices(domain, cell_idx)

    coords = domain.geometry.x\[vertices\]

    self.v0, self.v1, self.v2 = coords\[0\], coords\[1\], coords\[2\]

    

    \# Scale weights by cell area

    cell_area = get_cell_measure(domain, cell_idx)

    self.quadT_weights = self.quadT_weights_ref \* (cell_area / 0.5)

    

    \# Map from reference triangle to physical triangle

    self.quadT_physical = \[\]

    for ref_node in self.quadT_nodes:

        x = self.v0 + ref_node\[0\] \* (self.v1 - self.v0) + ref_node\[1\] \* (self.v2 - self.v0)

        self.quadT_physical.append(x\[:2\])

    self.quadT_physical = np.array(self.quadT_physical)

    self.quadT_size = len(self.quadT_physical)

    

    \# Edge quadrature (Gauss-Legendre)

    self.quadF_nodes = \[\]

    self.quadF_weights = \[\]

    self.quadF_physical = \[\]

    self.quadF_sizes = \[\]

    for edge_idx in self.edges:

        nodes, weights_ref = quadrature_edge(self.quad_rule)

        edge_len = get_edge_measure(domain, edge_idx)

        self.quadF_nodes.append(nodes)

        self.quadF_weights.append(weights_ref \* edge_len)

        phys = \[\]

        for s in nodes:

            pt = point_on_edge(domain, edge_idx, s)

            phys.append(pt\[:2\])

        self.quadF_physical.append(np.array(phys))

        self.quadF_sizes.append(len(nodes))

    

    \# Precompute basis function values with hierarchical basis

    self.\_precompute_basis()

def \_precompute_basis(self):

    self.phiT_quadT = \[\]

    self.dphiT_quadT = \[\]

    for pt in self.quadT_physical:

        self.phiT_quadT.append(poly_basis_2d(self.K + 1, pt\[0\], pt\[1\], hierarchical=True))

        self.dphiT_quadT.append(poly_grad_basis_2d(self.K + 1, pt\[0\], pt\[1\], hierarchical=True))

    self.phiT_quadT = np.array(self.phiT_quadT)

    self.dphiT_quadT = np.array(self.dphiT_quadT)

    

    self.phiL_quadT = \[\]

    for pt in self.quadT_physical:

        self.phiL_quadT.append(poly_basis_2d(self.Ldeg, pt\[0\], pt\[1\], hierarchical=False))

    self.phiL_quadT = np.array(self.phiL_quadT)

    

    \# Cell basis P^{K+1} at edge quadrature points (HIERARCHICAL)

    self.phiT_quadF = \[\]

    self.dphiT_quadF = \[\]

    for edge_idx in range(self.n_edges):

        phi_list = \[\]

        dphi_list = \[\]

        for pt in self.quadF_physical\[edge_idx\]:

            phi_list.append(poly_basis_2d(self.K + 1, pt\[0\], pt\[1\], hierarchical=True))

            dphi_list.append(poly_grad_basis_2d(self.K + 1, pt\[0\], pt\[1\], hierarchical=True))

        self.phiT_quadF.append(np.array(phi_list))

        self.dphiT_quadF.append(np.array(dphi_list))



    \# Edge basis P^K at edge quadrature points (STANDARD)

    self.phiF_quadF = \[\]

    for edge_idx in range(self.n_edges):

        phi_list = \[\]

        for s in self.quadF_nodes\[edge_idx\]:

            phi_list.append(poly_basis_edge(self.K, s))

        self.phiF_quadF.append(np.array(phi_list))

def get_quadT(self):

    """Return cell quadrature points and weights"""

    return self.quadT_physical, self.quadT_weights

def get_quadF(self, edge_idx):

    """Return edge quadrature points and weights"""

    return self.quadF_physical\[edge_idx\], self.quadF_weights\[edge_idx\]

def get_phiT_quadT(self):

    """Return cell basis P^{K+1} values at cell quadrature points (HIERARCHICAL)"""

    return self.phiT_quadT

def get_dphiT_quadT(self):

    """Return cell basis P^{K+1} gradients at cell quadrature points (HIERARCHICAL)"""

    return self.dphiT_quadT

def get_phiL_quadT(self):

    """Return cell basis P^L values at cell quadrature points (STANDARD)"""

    return self.phiL_quadT

def get_phiT_quadF(self, edge_idx):

    """Return cell basis P^{K+1} values at edge quadrature points (HIERARCHICAL)"""

    return self.phiT_quadF\[edge_idx\]

def get_dphiT_quadF(self, edge_idx):

    """Return cell basis P^{K+1} gradients at edge quadrature points (HIERARCHICAL)"""

    return self.dphiT_quadF\[edge_idx\]

def get_phiF_quadF(self, edge_idx):

    """Return edge basis P^K values at edge quadrature points (STANDARD)"""

    return self.phiF_quadF\[edge_idx\]

def get_phiL_quadF(self, edge_idx):

    """Return cell basis P^L values at edge quadrature points (STANDARD)"""

    phiL_quadF = \[\]

    for pt in self.quadF_physical\[edge_idx\]:

        phiL_quadF.append(poly_basis_2d(self.Ldeg, pt\[0\], pt\[1\], hierarchical=False))

    return np.array(phiL_quadF)

# HHO DIFFUSION CLASS - PRIMAL FORMULATION

class HHODiffusion:

def \__init_\_(self, domain, K, L, kappa_func, source_func, bc_type='Dirichlet',

             bc_dirichlet_func=None, bc_neumann_func=None,

             exact_solution=None, grad_exact=None,

             solver_type='bicgstab', use_scaling=True):



    self.domain = domain

    domain.topology.create_entities(0)

    domain.topology.create_entities(1)

    domain.topology.create_entities(2)

    

    \# Create connectivity

    domain.topology.create_connectivity(2, 1)

    domain.topology.create_connectivity(1, 2)

    self.cell_edge_connectivity = domain.topology.connectivity(2, 1)

    self.edge_cell_connectivity = domain.topology.connectivity(1, 2)

    

    self.K = K

    self.L = L

    self.Ldeg = max(L, 0)

    self.use_scaling = use_scaling

    

    self.kappa_func = kappa_func

    self.source_func = source_func

    self.bc_type = bc_type

    self.bc_dirichlet_func = bc_dirichlet_func

    self.bc_neumann_func = bc_neumann_func

    self.exact_solution = exact_solution

    self.grad_exact = grad_exact

    self.solver_type = solver_type

    

    \# DOF dimensions

    self.m_nlocal_cell_dofs = dim_poly(self.Ldeg)

    self.m_nlocal_edge_dofs = self.K + 1 



    \# High-order space dimensions:

    \# P^{K+1} full dimension

    self.m_nhighorder_dofs = dim_poly(self.K + 1) # This is N_high for P^{K+1}



    \# Gradient space (remove constant mode): dimension = full - 1

    self.m_nhighorder_grad = self.m_nhighorder_dofs - 1

    self.n_cells = domain.topology.index_map(domain.topology.dim).size_local

    self.n_edges = domain.topology.index_map(1).size_local

    

    self.m_ntotal_cell_dofs = self.n_cells \* self.m_nlocal_cell_dofs

    self.m_ntotal_edge_dofs = self.n_edges \* self.m_nlocal_edge_dofs

    

    \# Boundary edge counts

    self.n_dir_edges = 0

    self.n_nondir_edges = 0

    if bc_type == 'Dirichlet':

        for e in range(self.n_edges):

            if is_boundary_edge(domain, e):

                self.n_dir_edges += 1

            else:

                self.n_nondir_edges += 1

    else:

        self.n_nondir_edges = self.n_edges

    

    self.m_ndir_edge_dofs = self.n_dir_edges \* self.m_nlocal_edge_dofs

    self.m_nnondir_edge_dofs = self.n_nondir_edges \* self.m_nlocal_edge_dofs

    self.m_ntotal_dofs = self.m_ntotal_cell_dofs + self.m_ntotal_edge_dofs

    

    \# Storage

    self.aT = {}

    self.P_T_cache = {}  # Will store only gradient coefficients (bP_T)

    self.D_T_cache = {}

    self.D_TF_cache = {}

    self.GlobMat = None

    self.SysMat = None

    self.GlobRHS = None

    self.ScRHS = None

    self.A_sc = None

    self.invATT_ATF_cache = {}

    self.invATT_b_cache = {}

    self.Xh = None

    self.quad_cache = {}

    

    \# Timing

    self.\_assembly_time = 0

    self.\_solving_time = 0

    self.\_solving_error = 0    

    print(f"\[HHO_Diffusion\] Initialized: K={K}, L={L}, cells={self.n_cells}, edges={self.n_edges}")

    print(f"  Cell DOFs: {self.m_nlocal_cell_dofs}, Edge DOFs: {self.m_nlocal_edge_dofs}")

    print(f"  High-order space P^{K+1} full: {self.m_nhighorder_dofs},gradient: {self.m_nhighorder_grad}")

def get_element_quad(self, cell_idx):

    """Get or create ElementQuad for a cell"""

    if cell_idx not in self.quad_cache:

        self.quad_cache\[cell_idx\] = ElementQuad(self.domain, cell_idx, self.K, self.L)

    return self.quad_cache\[cell_idx\]

def \_get_kappa_at_point(self, x, cell):

    """Get diffusion tensor at point"""

    cell_center = get_cell_center(self.domain, cell)

    return self.kappa_func(cell_center)    

def diffusion_operator(self, cell_idx, elquad):

    edges = elquad.edges

    n_edges = len(edges)

    local_dofs = self.m_nlocal_cell_dofs + n_edges \* self.m_nlocal_edge_dofs

    

    N_cell = self.m_nlocal_cell_dofs

    N_edge = self.m_nlocal_edge_dofs

    N_high_full = self.m_nhighorder_dofs

    N_high_grad = self.m_nhighorder_grad      # Gradient modes dimension (N_high_full - 1)

    \# Get quadrature data

    quadT_physical = elquad.quadT_physical

    quadT_weights = elquad.quadT_weights

    n_quadT = len(quadT_physical)

    \# Hierarchical basis at cell quadrature points

    phiT_quadT = elquad.get_phiT_quadT()      # Full P^{K+1} basis \[constant, gradient modes\]

    dphiT_quadT = elquad.get_dphiT_quadT()    # Gradients: first row is zero

    phiL_quadT = elquad.get_phiL_quadT()      # P^L basis

    \# Compute S_T = \[(∇φ\_i, ∇φ\_j)\_T\]

    S_T_full = np.zeros((N_high_full, N_high_full))

    for i in range(N_high_full):

        for j in range(N_high_full):

            for q in range(n_quadT):

                x = quadT_physical\[q\]

                Kx = self.\_get_kappa_at_point(x, cell_idx)

                S_T_full\[i, j\] += quadT_weights\[q\] \* np.dot(dphiT_quadT\[q, i\], Kx @ dphiT_quadT\[q, j\])



    \# Extract reduced matrix eS_T (remove first row/col)

    eS_T = S_T_full\[1:, 1:\]            

    

    \# Compute L_T^{k+1} = \[(φ\_i, 1)\_T\] and L_T^k = \[(φ\_i^L, 1)\_T\]

    \# for constant recovery

    L_T_high = np.zeros(N_high_full)

    for i in range(N_high_full):

        for q in range(n_quadT):

            L_T_high\[i\] += quadT_weights\[q\] \* phiT_quadT\[q, i\]

    

    L_T_low = np.zeros(N_cell)

    for i in range(N_cell):

        for q in range(n_quadT):

            L_T_low\[i\] += quadT_weights\[q\] \* phiL_quadT\[q, i\]



    \# Compute M_TT^{k,k+1} = \[(φ\_i^L, φ\_j^{K+1})\_T\]

    \# This is the hierarchical mass matrix for P^L and P^{K+1}

    M_TT_k_k1 = np.zeros((N_cell, N_high_full))

    for i in range(N_cell):

        for j in range(N_high_full):

            for q in range(n_quadT):

                M_TT_k_k1\[i, j\] += quadT_weights\[q\] \* phiL_quadT\[q, i\] \* phiT_quadT\[q, j\]



    \# Extract M_TT^{k,k} as sub-matrix of M_TT^{k,k+1}

    \# Mk;k_TT is a sub-matrix of Mk;k+1_TT

    M_TT_kk = M_TT_k_k1\[:, :N_cell\]  # First N_cell columns correspond to P^L basis        



    #Compute bB\_{P,T} = \[(∇φ\_i, ∇φ\_j^L)\_T - Σ\_F (∇φ\_i·n, φ\_j^L)\_F\]

    B_P_T = np.zeros((N_high_full, N_cell))

    

    \# Cell contribution

    dphiL_quadT = np.zeros((n_quadT, N_cell, 2))

    for q in range(n_quadT):

        dphiL_quadT\[q\] = poly_grad_basis_2d(self.Ldeg, quadT_physical\[q\]\[0\], quadT_physical\[q\]\[1\],hierarchical=False)

    

    for i in range(N_high_full):

        for j in range(N_cell):

            for q in range(n_quadT):

                x = quadT_physical\[q\]

                Kx = self.\_get_kappa_at_point(x, cell_idx)

                B_P_T\[i, j\] += quadT_weights\[q\] \* np.dot(dphiT_quadT\[q, i\], Kx @ dphiL_quadT\[q, j\])

    

    #face contributions (substract)

    for e, edge_idx in enumerate(edges):

        normal = get_edge_normal(self.domain, cell_idx, edge_idx)

        quadF_physical = elquad.quadF_physical\[e\]

        quadF_weights = elquad.quadF_weights\[e\]

        n_quadF = len(quadF_physical)

        

        dphiT_quadF = elquad.get_dphiT_quadF(e)

        phiL_quadF = elquad.get_phiL_quadF(e)

        

        for i in range(N_high_full):

            for j in range(N_cell):

                for q in range(n_quadF):

                    x = quadF_physical\[q\]

                    Kx = self.\_get_kappa_at_point(x, cell_idx)

                    B_P_T\[i, j\] -= quadF_weights\[q\] \* np.dot(dphiT_quadF\[q, i\], Kx @ normal) \* phiL_quadF\[q, j\]

        

        \# Remove constant mode to get bB_P,T

        bB_P_T = B_P_T\[1:, :\] # N_high_grad x N_cell

    \#  Compute bB\_{P,F} = \[(∇φ\_i·n, φ\_j^F)\_F\]

    B_P_F = \[None\] \* n_edges

    bB_P_F = \[None\] \* n_edges



    for e, edge_idx in enumerate(edges):

        normal = get_edge_normal(self.domain, cell_idx, edge_idx)

        quadF_physical = elquad.quadF_physical\[e\]

        quadF_weights = elquad.quadF_weights\[e\]

        n_quadF = len(quadF_physical)

        

        dphiT_quadF = elquad.get_dphiT_quadF(e)

        phiF_quadF = elquad.get_phiF_quadF(e)

        

        B_P_F\[e\] = np.zeros((N_high_full, N_edge))

        for i in range(N_high_full):

            for j in range(N_edge):

                for q in range(n_quadF):

                    x = quadF_physical\[q\]

                    Kx = self.\_get_kappa_at_point(x, cell_idx)

                    B_P_F\[e\]\[i, j\] += quadF_weights\[q\] \* np.dot(dphiT_quadF\[q, i\], Kx @ normal) \* phiF_quadF\[q, j\]

        

        \# Remove constant mode

        bB_P_F\[e\] = B_P_F\[e\]\[1:, :\]  # N_high_grad x N_edge

    \# Assemble bB_P = \[bB_P,T, bB_P,F1, ..., bB_P,Fn\]

    bB_P = np.zeros((N_high_grad, local_dofs))

    bB_P\[:, :N_cell\] = bB_P_T 

    

    for e in range(n_edges):

        start = N_cell + e \* N_edge

        bB_P\[:, start:start + N_edge\] = bB_P_F\[e\]

    

    \# solve reduced system : bP_T = bB_P / es_T

    try:

        bP_T = np.linalg.solve(eS_T +  1e-12\*np.eye(N_high_grad), bB_P)

    except:

        bP_T = np.linalg.lstsq(eS_T + 1e-12\*np.eye(N_high_grad), bB_P, rcond=None)\[0\]

    

    self.P_T_cache\[cell_idx\] = bP_T

    P_T_full = np.zeros((N_high_full, local_dofs))



    cell_area = L_T_high\[0\]  # (φ\_T^1, 1)\_T = area of cell

    L_T_low_extended = np.zeros(local_dofs)

    L_T_low_extended\[:N_cell\] = L_T_low

    if cell_area > 1e-12:

        P_T_full\[0, :\] = (L_T_low_extended - L_T_high\[1:\] @ bP_T) / cell_area

    else:

        P_T_full\[0, :\] = 0.0

    

    \# Gradient mode coefficients (from reduced solve)

    P_T_full\[1:, :\] = bP_T

    \# Step 8: Compute consistent part A_T^cons = P_T^T S_T P_T

    A_cons = P_T_full.T @ S_T_full @ P_T_full

    \# Compute D_T using hierarchical mass matrices

    \# D_T = (M_TT^{k,k})^{-1} M_TT^{k,k+1} P_T - \[I 0 ... 0\]

    \# (M_TT^{k,k})^{-1} M_TT^{k,k+1} is the matrix of the linear operator

    \# (π\_T^{0,k})|P^{k+1}(T) : P^{k+1}(T) -> P^k(T)

    try:

       M_TT_inv = np.linalg.inv(M_TT_kk + 1e-12 \* np.eye(N_cell))

    except:

       M_TT_inv = np.linalg.lstsq(M_TT_kk + 1e-12 \* np.eye(N_cell), np.eye(N_cell), rcond=None)\[0\]



    D_T = M_TT_inv @ M_TT_k_k1 @ P_T_full

    D_T\[:, :N_cell\] -= np.eye(N_cell)

    \# Compute D_TF for each face using hierarchical mass matrices

    \# D_TF = (M_FF^{k,k})^{-1} M_FT^{k,k+1} P_T - \[0 ... I ... 0\]

    \# (M_FF^{k,k})^{-1} M_FT^{k,k+1} is the matrix of the linear operator

    \# π\_F^{0,k} ∘ γ\_TF^{k+1} : P^{k+1}(T) -> P^k(F)

    STF = np.zeros((local_dofs, local_dofs))

    D_TF_list = \[\]



    for e, edge_idx in enumerate(edges):

        hF = get_edge_measure(self.domain, edge_idx)

    

        quadF_physical = elquad.quadF_physical\[e\]

        quadF_weights = elquad.quadF_weights\[e\]

        n_quadF = len(quadF_physical)

    

        phiT_quadF = elquad.get_phiT_quadF(e)

        phiL_quadF = elquad.get_phiL_quadF(e)

        phiF_quadF = elquad.get_phiF_quadF(e)

    \# Compute M_FT^{k,k+1} = \[(φ\_i^F, φ\_j^{K+1})\_F\]

    M_FT_k_k1 = np.zeros((N_edge, N_high_full))

    for i in range(N_edge):

        for j in range(N_high_full):

            for q in range(n_quadF):

                M_FT_k_k1\[i, j\] += quadF_weights\[q\] \* phiF_quadF\[q, i\] \* phiT_quadF\[q, j\]

    

    \# Compute M_FF^{k,k} = \[(φ\_i^F, φ\_j^F)\_F\]

    M_FF_kk = np.zeros((N_edge, N_edge))

    for i in range(N_edge):

        for j in range(N_edge):

            for q in range(n_quadF):

                M_FF_kk\[i, j\] += quadF_weights\[q\] \* phiF_quadF\[q, i\] \* phiF_quadF\[q, j\]

    

    \# Compute M_FT^{k,k} = \[(φ\_i^F, φ\_j^L)\_F\]

    M_FT_k_k = np.zeros((N_edge, N_cell))

    for i in range(N_edge):

        for j in range(N_cell):

            for q in range(n_quadF):

                M_FT_k_k\[i, j\] += quadF_weights\[q\] \* phiF_quadF\[q, i\] \* phiL_quadF\[q, j\]

    

    try:

        M_FF_inv = np.linalg.inv(M_FF_kk + 1e-12 \* np.eye(N_edge))

    except:

        M_FF_inv = np.linalg.lstsq(M_FF_kk + 1e-12 \* np.eye(N_edge), np.eye(N_edge), rcond=None)\[0\]

    

    \# Compute D_TF

    \# D_TF = (M_FF^{k,k})^{-1} M_FT^{k,k+1} P_T - \[0 ... I ... 0\]

    D_TF = M_FF_inv @ M_FT_k_k1 @ P_T_full

    face_start = N_cell + e \* N_edge

    D_TF\[:, face_start:face_start + N_edge\] -= np.eye(N_edge)

    D_TF_list.append(D_TF)

    

    \# Compute (M_FF^{k,k})^{-1} M_FT^{k,k}

    M_FF_inv_M_FT = M_FF_inv @ M_FT_k_k

    

    \# Compute difference operator for stabilization

    diff_operator = D_TF - M_FF_inv_M_FT @ D_T

    

    \# Add to stabilization

    STF += (1.0 / hF) \* diff_operator.T @ M_FF_kk @ diff_operator

    

    \# Step 15: Assemble local matrix A_T = A_cons + STF

    A_T = A_cons + STF

    return A_T

def load_operator(self, cell_idx, elquad):

    edges = elquad.edges

    n_edges = len(edges)

    local_dofs = self.m_nlocal_cell_dofs + n_edges \* self.m_nlocal_edge_dofs

    

    b = np.zeros(local_dofs)

    \# Source term contribution to cell DOFs only

    quadT_physical = elquad.quadT_physical

    quadT_weights = elquad.quadT_weights

    phiL_quadT = elquad.get_phiL_quadT()  # P^L basis functions

    

    for i in range(self.m_nlocal_cell_dofs):

        for q in range(len(quadT_physical)):

            x = quadT_physical\[q\]

            b\[i\] += quadT_weights\[q\] \* self.source_func(x) \* phiL_quadT\[q, i\]



    return b

    \# Edge DOFs remain zero (no source term)

    \# The source term will be coupled to edge DOFs through static condensation

    \# in the assemble() method

def assemble(self):

    """

    Assemble the global system using hierarchical basis approach.

    """

    print("\[HHO_Diffusion\] Assembling...")

    total_start = time.time()

    

    self.aT = {}

    cell_source = {}

    

    triplets_GlobMat = \[\]

    triplets_ScBe = \[\]

    

    self.GlobRHS = np.zeros(self.m_ntotal_edge_dofs)

    self.ScRHS = np.zeros(self.m_ntotal_cell_dofs)

    

    self.invATT_ATF_cache = {}

    self.invATT_b_cache = {}

    

    for iT in range(self.n_cells):

        elquad = self.get_element_quad(iT)

        edges = elquad.edges

        n_edges = len(edges)

        

        \# Get Dirichlet edges

        dirichlet_edges = \[\]

        for e_idx, edge_idx in enumerate(edges):

            if is_boundary_edge(self.domain, edge_idx):

                dirichlet_edges.append(e_idx + 1)

        

        self.aT\[iT\] = self.diffusion_operator(iT, elquad)

        bT = self.load_operator(iT, elquad) # Load operator called here

        

        A_T = self.aT\[iT\]

        

        if self.L >= 0:



            \# Static condensation

            ATT = A_T\[:self.m_nlocal_cell_dofs , :self.m_nlocal_cell_dofs\]  # A₁₁  # N_cell × N_cell  # First N_cell rows, first N_cell columns

            ATF = A_T\[:self.m_nlocal_cell_dofs, self.m_nlocal_cell_dofs:\]  # A₁₂  # N_cell × N_face   # First N_cell rows, remaining columns

            AFF = A_T\[self.m_nlocal_cell_dofs:, self.m_nlocal_cell_dofs:\]  # A₂₂  # N_face × N_face   # Remaining rows, remaining columns

            

            \# Split load vector

            b_cell = bT\[:self.m_nlocal_cell_dofs\] # Load on cell DOFs # b₁

            b_face = bT\[self.m_nlocal_cell_dofs:\] # Load on edge DOFs (all zeros) # b₂

            

            ATT_inv = np.linalg.inv(ATT + 1e-12 \* np.eye(self.m_nlocal_cell_dofs))   # 1/A₁₁

            invATT_ATF = ATT_inv @ ATF                                               # A₁₂/A₁₁

            invATT_b = ATT_inv @ b_cell                                              # b₁/A₁₁

            

            """

            self.invATT_ATF_cache\[iT\] = invATT_ATF 

            self.invATT_b_cache\[iT\] = invATT_b               

            \[ ATT ATF \]\[UT\] = \[bT\]

            \[ AFT AFF \]\[UF\] = \[bF\]

            Thus  UT = ATT ^-1 \*( bT - UF\*ATF ) 

            and   UF = AFF^-1 \* (bF - UT\*AFT) now putting UT we get 

            UF = (AFF - AFT\*ATT^-1\*ATF) ^(-1) \* (bF - AFT \* ATT ^(-1)\* bT)

                 = S \* b̃

            where ScRHS =  ATT^(-1 )\*bT 

            and Asc =    ATT ^(-1 )  \* ATF 

            and UT = ScRHS - Asc \* UF



            """

            \# Schur complement for the matrix

            MatF = AFF - ATF.T @ invATT_ATF              # S = AFF - AFT \* ATT ^(-1 )  \* ATF (Local Schur complement)

            \# Effective load on edge DOFs (Schur complement of RHS)

            \# b_face is zero, so cell_source\[iT\] = -ATF.T @ invATT_b

            cell_source\[iT\] = b_face - ATF.T @ invATT_b   # b̃ = bF - ATF\* ATT^(-1 )\*bT  (Local effective RHS)

            \# Store cell DOF solution contribution

            cell_start = iT \* self.m_nlocal_cell_dofs

            self.ScRHS\[cell_start:cell_start + self.m_nlocal_cell_dofs\] = invATT_b  # ATT^(-1 )\*bT 

            \# Build A_sc matrix for cell-edge coupling

            for i in range(self.m_nlocal_cell_dofs): # For each cell DOF

                for j_edge, edge_idx in enumerate(edges): # For each edge in this cell

                    j_global = edge_idx \* self.m_nlocal_edge_dofs

                    j_local_start = j_edge \* self.m_nlocal_edge_dofs

                    ipos = cell_start + i               # Global position of cell DOF

                    for jk in range(self.m_nlocal_edge_dofs):  # For each DOF on edge

                        jpos = j_global + jk                   # Global position of edge DOF

                        j_local = j_local_start + jk

                        triplets_ScBe.append((ipos, jpos, invATT_ATF\[i, j_local\])) # A_Sc=(A_TT ^-1 \* A_TF )

        

        \# Triplets collect kar rahe hain (row, col, value)

        \# locally cell dofs ko remove kar diye hai sirf face dofs me equations bachi hai usko global matrix me rakh rhe jha par uska jageh ho

        \# Assemble local triplets for global matrix

        \# Yeh GLOBAL hai - sab cells ke contributions ko assemble karna

        for i_edge, edge_idx in enumerate(edges):

            is_dirichlet = (i_edge + 1) in dirichlet_edges

            

            i_global = edge_idx \* self.m_nlocal_edge_dofs

            i_local = i_edge \* self.m_nlocal_edge_dofs

            

            if not is_dirichlet:

                for i in range(self.m_nlocal_edge_dofs):

                    ipos = i_global + i

                    i_loc = i_local + i

                    

                    for j_edge, edge_idx2 in enumerate(edges):

                        j_global = edge_idx2 \* self.m_nlocal_edge_dofs

                        j_local = j_edge \* self.m_nlocal_edge_dofs

                        for j in range(self.m_nlocal_edge_dofs):

                            triplets_GlobMat.append((ipos, j_global + j, MatF\[i_loc, j_local + j\]))

                    



                    \# LOCAL RHS ko GLOBAL RHS mein daalo

                    \# Right-hand side contribution (source term coupled through static condensation)

                    self.GlobRHS\[ipos\] += cell_source\[iT\]\[i_loc\]



            else:

                \# Dirichlet edge pe boundary condition rakh rhe : set identity B\[dof\] = g_D(edge)  

                for i in range(self.m_nlocal_edge_dofs):

                    ipos = i_global + i

                    triplets_GlobMat.append((ipos, ipos, 1.0))

                    self.GlobRHS\[ipos\] = 0.0

    \# GlobMat = Σ S_T and  GlobRHS = Σ b̃\_T ko upar array me store kiye 

    \# Triplets collect kar rahe hain (row, col, value)

    \# Build global matrices (GlobMat \* U_F = GlobRHS )

    \# yha global system ki matrix ko store kar liye solve karne k liye

    \#  GlobMat = Σ S_T // GlobRHS = Σ b̃\_T  //  A_sc = Σ A_TT⁻¹ A_TF // ScRHS = Σ A_TT⁻¹ b_T

    \#   Σ S_T \* Global UT =  Σ b̃\_T

    self.GlobMat = coo_matrix(

        (np.array(\[t\[2\] for t in triplets_GlobMat\]),

         (np.array(\[t\[0\] for t in triplets_GlobMat\]),

          np.array(\[t\[1\] for t in triplets_GlobMat\]))),

        shape=(self.m_ntotal_edge_dofs, self.m_ntotal_edge_dofs)

    ).tocsr()

    

    \# yeh Global Asc matrix hai global UT  = scRHS - Asc \* XF ko recover karne k liye 

    \# A_sc = Σ A_TT⁻¹ A_TF ,isse hmme Global UT mil jayega 

    if triplets_ScBe:

        self.A_sc = coo_matrix(

            (np.array(\[t\[2\] for t in triplets_ScBe\]),

             (np.array(\[t\[0\] for t in triplets_ScBe\]),

              np.array(\[t\[1\] for t in triplets_ScBe\]))),

            shape=(self.m_ntotal_cell_dofs, self.m_ntotal_edge_dofs)

        ).tocsr()

    else:

        self.A_sc = csr_matrix((self.m_ntotal_cell_dofs, self.m_ntotal_edge_dofs))

    

    self.\_assembly_time = time.time() - total_start

    print(f"\[HHO_Diffusion\] Assembly time: {self.\_assembly_time:.4f}s")



def solve(self):

    print("\[HHO_Diffusion\] Solving...")

    solve_start = time.time()

    UDir = None

    dir_dofs = \[\]

    dir_values = \[\]

    if self.bc_type != 'Neumann':

        self.SysMat = self.GlobMat.copy()  # SysMat = S_global

        self.B = self.GlobRHS.copy()       # B = b̃\_global    

        dir_dofs = \[\]

        dir_values = \[\]

        for e in range(self.n_edges):

            if is_boundary_edge(self.domain, e):

                for i in range(self.m_nlocal_edge_dofs):

                    dof = e \* self.m_nlocal_edge_dofs + i

                    dir_dofs.append(dof)

                    edge_center = get_edge_center(self.domain, e)

                    dir_values.append(self.bc_dirichlet_func(edge_center))

                    

                    self.SysMat\[dof, :\] = 0

                    self.SysMat\[dof, dof\] = 1

                    self.B\[dof\] = dir_values\[-1\]

        

        UDir = np.array(dir_values)

    else:

        n_unknowns = self.m_ntotal_edge_dofs

        self.SysMat = self.GlobMat.copy()

        self.B = self.GlobRHS.copy()

        UDir = np.zeros(0)

    self.SysMat = self.SysMat.tocsr() # SysMat = S_global

    \# yhe pe hm xF = solve(SysMat, B), sari edge dofs nikal rhe solve karke

    \# xF = SysMat^(-1) \* B 

    \# Solve using BiCGSTAB or direct solver

    if self.solver_type == 'bicgstab':

        try:

            xF, info = bicgstab(self.SysMat, self.B, tol=1e-10, maxiter=10000)

            if info != 0:

                print(f"  BiCGSTAB did not converge (info={info}), using direct solver...")

                xF = spsolve(self.SysMat, self.B)

        except:

            xF = spsolve(self.SysMat, self.B)

    else:

        xF = spsolve(self.SysMat, self.B)

    \# xF nikal lie hm 

    self.\_solving_error = np.linalg.norm(self.SysMat @ xF - self.B) #  Check Error   

    print(f"  Solving error: {self.\_solving_error:.6e}")  # print error 

    

    self.Xh = np.zeros(self.m_ntotal_dofs) # Xh = \[U_T; xF\] 

    

    if self.L >= 0:

        xF_full = xF\[:self.m_ntotal_edge_dofs\]

        self.Xh\[:self.m_ntotal_cell_dofs\] = self.ScRHS - self.A_sc @ xF_full

    else:

        self.Xh\[:self.m_ntotal_cell_dofs\] = self.ScRHS - self.A_sc @ xF

    

    self.Xh\[self.m_ntotal_cell_dofs:\] = xF 

    

    if self.bc_type != 'Neumann':

        for idx, dof in enumerate(dir_dofs):

            self.Xh\[self.m_ntotal_cell_dofs + dof\] = UDir\[idx\]

    

    self.\_solving_time = time.time() - solve_start

    print(f"\[HHO_Diffusion\] Solving time: {self.\_solving_time:.4f}s")

    return self.Xh

#Xh=\[UT ; xF\] 

def compute_errors(self):

    if self.exact_solution is None:

        return None, None

    l2 = 0.0

    h1 = 0.0

    

    for iT in range(self.n_cells):

         elquad = self.get_element_quad(iT)

         edges = elquad.edges



         \# Get reduced reconstruction operator (gradient coefficients bP_T)

         bP_T = self.P_T_cache.get(iT)

         if bP_T is None:

             continue

            

         \# Build local DOFs for this cell

         cell_start = iT \* self.m_nlocal_cell_dofs

         cell_dofs = self.Xh\[cell_start:cell_start + self.m_nlocal_cell_dofs\]

        

         \# Collect edge DOFs for this cell

         edge_dofs = \[\]

         for e, edge_idx in enumerate(edges):

             for j in range(self.m_nlocal_edge_dofs):

                 glob_idx = self.m_ntotal_cell_dofs + edge_idx \* self.m_nlocal_edge_dofs + j

                 edge_dofs.append(self.Xh\[glob_idx\])

         edge_dofs = np.array(edge_dofs)

        

    \# Combine cell and edge DOFs

    local_dofs = np.concatenate(\[cell_dofs, edge_dofs\])

    \# Reconstruct full P^{K+1} coefficients

    N_high_full = self.m_nhighorder_dofs    

    \# Step 1: Compute gradient coefficients: bP_T @ local_dofs

    grad_coeffs = bP_T @ local_dofs  # N_high_grad

    \# Step 2: Recover constant coefficient via closure condition

    quadT_physical = elquad.quadT_physical

    quadT_weights = elquad.quadT_weights

    n_quadT = len(quadT_physical)

    phiT_quadT = elquad.get_phiT_quadT()

    phiL_quadT = elquad.get_phiL_quadT()

    L_T_low = np.zeros(self.m_nlocal_cell_dofs)

    for i in range(self.m_nlocal_cell_dofs):

        for q in range(n_quadT):

            L_T_low\[i\] += quadT_weights\[q\] \* phiL_quadT\[q, i\]

        

    L_T_high = np.zeros(N_high_full)

    for i in range(N_high_full):

        for q in range(n_quadT):

            L_T_high\[i\] += quadT_weights\[q\] \* phiT_quadT\[q, i\]

        

    cell_area = L_T_high\[0\]

    \# Constant coefficient from closure condition

    if cell_area > 1e-12:

        const_coeff = (np.dot(L_T_low, cell_dofs) - np.dot(L_T_high\[1:\], grad_coeffs)) / cell_area

    else:

        const_coeff = 0.0 

    \# Full coefficients: \[constant, gradient_coeffs\]

    \# reconstruted solution in P(K+1) space 

    w_T = np.concatenate(\[\[const_coeff\], grad_coeffs\])

    \# Get higher-order quadrature points

    for q in range(len(elquad.quadT_physical)):

        x = elquad.quadT_physical\[q\]

        w = elquad.quadT_weights\[q\]    

        \# Evaluate reconstructed solution using high-order basis P^{K+1}

        phi_high = poly_basis_2d(self.K + 1, x\[0\], x\[1\],hierarchical=True)

        u_h = np.dot(w_T, phi_high)

        \# Evaluate gradient using high-order basis P^{K+1}

        grad_phi_high = poly_grad_basis_2d(self.K +1, x\[0\], x\[1\],hierarchical=True)

        grad_u_h = np.zeros(2)

        for i in range(len(w_T)):

            grad_u_h += w_T\[i\] \* grad_phi_high\[i\]                

        u_ex = self.exact_solution(x)

        g_ex = self.grad_exact(x)

        l2 += w\* (u_h - u_ex)\*\*2

        h1 += w\* (np.linalg.norm(grad_u_h - g_ex)\*\*2)

    l2_errors = np.sqrt(l2)

    H1_errors = np.sqrt(h1)

    return l2_errors, H1_errors

# CONVERGENCE STUDY

def convergence_study():

print("\\n" + "="\*70)

print("HHO CONVERGENCE STUDY")

print("="\*70)

K = 2

L = 2

levels = \[4, 8, 16, 32\] 

l2_err = \[\]

H1_err = \[\]

h_values = \[\]

times = \[\]

print(f"\\nK={K}, L={L}, Levels: {levels}")

print("="\*70)

print("Theoretical rates for hierarchical HHO (Remark B.2):")

print(f"{'Level':>6} {'Mesh':>10} {'h':>10} {'L2 Error':>14} {'Rate':>8} {'H1 Error':>14} {'Rate':>8} {'Time':>10}")

print("="\*70)

\# LOOP OVER EACH MESH

for n, N in enumerate(levels):

    print(f"\\n\[Level {n}\] Mesh {N}x{N}")

    

    domain = mesh.create_unit_square(MPI.COMM_WORLD, N, N)

    

    h = 1.0 / N

    h_values.append(h)

    

    \# Use HIERARCHICAL class

    hho = HHODiffusion(domain, K, L, kappa, source, 'Dirichlet',

                      bc_dirichlet_func=bc_dirichlet,

                      exact_solution=u_exact, grad_exact=grad_exact,

                      use_scaling=True)

    

    \# Assemble and solve

    start_time = time.time()

    hho.assemble()

    hho.solve()

    total_time = time.time() - start_time

    times.append(total_time)

    

    \# Compute errors using hierarchical reconstruction

    l2_errors, H1_errors = hho.compute_errors()

    l2_err.append(l2_errors)

    H1_err.append(H1_errors)

    

    \# Print results with rates

    if n > 0:

        rate_l2 = np.log(l2_err\[n-1\] / l2_err\[n\]) / np.log(2.0)

        rate_h1 = np.log(H1_err\[n-1\] / H1_err\[n\]) / np.log(2.0)

        print(f"  L2: {l2_errors:.6e}, Rate: {rate_l2:.4f}")

        print(f"  H1: {H1_errors:.6e}, Rate: {rate_h1:.4f}")

        print(f"  Time: {total_time:.4f}s")

    else:

        print(f"  L2: {l2_errors:.6e}")

        print(f"  H1: {H1_errors:.6e}")

        print(f"  Time: {total_time:.4f}s")

    

\# Print summary table

print("\\n" + "="\*70)

print("SUMMARY TABLE")

print("="\*70)

print(f"{'N':>6} {'h':>12} {'L2 Error':>14} {'Rate':>10} {'H1 Error':>14} {'Rate':>10} {'Time (s)':>12}")

print("-"\*70)



for n, N in enumerate(levels):

    if n == 0:

        print(f"{N:>6} {h_values\[n\]:>12.6f} {l2_err\[n\]:>14.6e} {'---':>10} {H1_err\[n\]:>14.6e} {'---':>10} {times\[n\]:>12.4f}")

    else:

        rate_l2 = np.log(l2_err\[n-1\] / l2_err\[n\]) / np.log(2.0)

        rate_H1 = np.log(H1_err\[n-1\] / H1_err\[n\]) / np.log(2.0)

        print(f"{N:>6} {h_values\[n\]:>12.6f} {l2_err\[n\]:>14.6e} {rate_l2:>10.4f} {H1_err\[n\]:>14.6e} {rate_h1:>10.4f} {times\[n\]:>12.4f}")



print("="\*70)

print("="\*70)

return l2_err, H1_err

# MAIN

if _name_ == “_main_”:

print("\\n" + "="\*70)

print("HHO DIFFUSION - PRIMAL FORMULATION")

print("Hybrid High-Order method")

print("="\*70)

print("\\nSolving: -∇·(κ∇u) = f in Ω")

print("         u = g on ∂Ω")

print("         Ω = (0,1)²")

print("\\nFeatures:")

print("  - Primal HHO formulation (exact from theory)")

print("  - Dunavant quadrature rules for triangles")

print("  - Gauss-Legendre quadrature for edges")

print("  - Static condensation")

print("  - Stabilization scaling")

print("  - L2 and H1 error computation")

print("  - Solution visualization for each mesh level")

print("="\*70)



\# Run convergence study with solution plotting

convergence_study()