Poisson solver with big field error

Hello everyone,

I have a numerical model consisting of a grounded metallic outer shell enclosing a uniformly charged spherical particle distribution to solve the internal electric field. The simulated electric field results are compared with the analytical solutions. At present, the maximum deviation between numerical and theoretical results is approximately 5%. This noticeable error is likely introduced by the charge deposition scheme, although the exact source cannot be fully confirmed. I would appreciate suggestions on feasible approaches to reduce this numerical error. The relevant code and simulation results are attached for reference. Thank you very much.

track_inject_iter.py:

import numba
import numpy as np
import dolfinx
from dolfinx import fem, geometry
import ufl
from petsc4py import PETSc
import dolfinx.la as la

@numba.njit(parallel=True, fastmath=True)
def numba_deposit_kernel(
    alive_x,
    alive_q,
    cell_ids,
    invT_arr,
    origin_arr,
    cell_dofs_arr,
    num_dofs
):
    n_particles = alive_x.shape[0]
    n_threads = numba.get_num_threads()
  
    # 1. Thread-isolated buffer to avoid race conditions without locks
    thread_buf = np.zeros((n_threads, num_dofs), dtype=np.float64)
    thread_outside_counts = np.zeros(n_threads, dtype=np.int64)



    chunk_size = (n_particles + n_threads - 1) // n_threads

    for tid in numba.prange(n_threads):
        start_idx = tid * chunk_size
        end_idx = min(start_idx + chunk_size, n_particles)
  
        buf = thread_buf[tid]
        outside_cnt = 0
  
        for i in range(start_idx, end_idx):
            cell = cell_ids[i]
            if cell < 0:
                outside_cnt += 1
                continue

            invT = invT_arr[cell]
            orig = origin_arr[cell]

            dx = alive_x[i, 0] - orig[0]
            dy = alive_x[i, 1] - orig[1]
            dz = alive_x[i, 2] - orig[2]

            # Strict calculation of tetrahedral barycentric coordinates
            l0 = invT[0, 0]*dx + invT[0, 1]*dy + invT[0, 2]*dz
            l1 = invT[1, 0]*dx + invT[1, 1]*dy + invT[1, 2]*dz
            l2 = invT[2, 0]*dx + invT[2, 1]*dy + invT[2, 2]*dz
            l3 = 1.0 - (l0 + l1 + l2)

            # =========================================================
            # Tolerance judgment: allow tiny floating-point out-of-bounds to avoid falsely removing particles near cell boundaries
            # =========================================================
            eps = 1e-9
            if l0 < -eps or l1 < -eps or l2 < -eps or l3 < -eps:
                outside_cnt += 1
                continue  # Only discard particles far outside the mesh

            # 1. Truncate minor negative values to eliminate negative charge contamination
            l0 = l0 if l0 > 0.0 else 0.0
            l1 = l1 if l1 > 0.0 else 0.0
            l2 = l2 if l2 > 0.0 else 0.0
            l3 = l3 if l3 > 0.0 else 0.0

            # 2. Local shape function renormalization: ensure full charge conservation across the 4 tetrahedron nodes
            l_sum = l0 + l1 + l2 + l3
            if l_sum > 0.0:
                inv_sum = 1.0 / l_sum
                l0 *= inv_sum
                l1 *= inv_sum
                l2 *= inv_sum
                l3 *= inv_sum

            q = alive_q[i]
            dofs = cell_dofs_arr[cell]

            # 3. Thread-safe accumulation into local thread buffer
            buf[dofs[0]] += q * l0
            buf[dofs[1]] += q * l1
            buf[dofs[2]] += q * l2
            buf[dofs[3]] += q * l3

        thread_outside_counts[tid] = outside_cnt

    # 2. Cross-thread reduction to sum contributions over all threads
    result = np.zeros(num_dofs, dtype=np.float64)
    for d in numba.prange(num_dofs):
        s = 0.0
        for t in range(n_threads):
            s += thread_buf[t, d]
        result[d] = s

    # 3. Sum total count of particles out of mesh bounds
    total_outside = 0
    for t in range(n_threads):
        total_outside += thread_outside_counts[t]

    return result, total_outside

def numba_deposit_core_adapter(ps, alive_idx, cell_ids_alive, geo_cache, num_dofs):
    """Wrapper interface for argument conversion"""
    return numba_deposit_kernel(
       ps.x[alive_idx],
       ps.q[alive_idx],
        cell_ids_alive,
       geo_cache.invT_arr,
       geo_cache.origin_arr,
       geo_cache.cell_dofs_arr,
        num_dofs
    )

def deposit_charge(ps, V_rho, geo_cache):
    """
    Charge deposition kernel with strict dual-dimensional charge conservation (external entry point for Dolfinx)
    """
    domain = V_rho.mesh
    rho = fem.Function(V_rho)
    rho.x.array[:] = 0.0

    alive_mask = ps.alive
    alive_idx = np.where(alive_mask)[0]
    if len(alive_idx) == 0:
        return rho

    # 1. Batch collision detection to locate particle-cell intersections via geometry tree
    points = ps.x[alive_idx]
    candidates = geometry.compute_collisions_points(geo_cache.bb_tree, points)
    colliding = geometry.compute_colliding_cells(domain, candidates, points)

    # Update cell ID index for each living particle
    cell_ids_alive = np.full(len(alive_idx), -1, dtype=np.int32)
    for local_i, pidx in enumerate(alive_idx):
        links = colliding.links(local_i)
        if len(links) == 0:
            ps.alive[pidx] = False  # Mark particles outside local mesh as dead
            continue
        cell = links[0]
        ps.cell_id[pidx] = cell
        cell_ids_alive[local_i] = cell

    # 2. Calculate total DoF count including local DoFs and cross-process ghost DoFs
    num_dofs = V_rho.dofmap.index_map.size_local + V_rho.dofmap.index_map.num_ghosts
  
    # 3. Execute high-performance Numba charge deposition core
    Q_arr, total_outside = numba_deposit_core_adapter(ps, alive_idx, cell_ids_alive, geo_cache, num_dofs)

    # 4. Populate Dolfinx vector with charge array and perform bidirectional cross-process reduction
    Q = fem.Function(V_rho)
    Q.x.array[:len(Q_arr)] = Q_arr[:len(Q.x.array)]
  
    # Critical synchronization step: reverse scatter ghost contributions back to owning ranks first, then forward sync for global consistency
    Q.x.scatter_reverse(la.InsertMode.add)
    Q.x.scatter_forward()

    # 5. Compute nodal control volumes via mass lumping (convert total nodal charge Q to charge density rho)
    vtest = ufl.TestFunction(V_rho)
    lump_form = fem.form(vtest * ufl.dx)
    lump = fem.Function(V_rho)
    fem.petsc.assemble_vector(lump.x.petsc_vec, lump_form)
    lump.x.scatter_reverse(la.InsertMode.add)
    lump.x.scatter_forward()

    # 6. Compute charge density rho = Q / control_volume
    vol = lump.x.array
    mask = vol > 1e-30
    rho.x.array[mask] = Q.x.array[mask] / vol[mask]
  
    # Final forward synchronization to ensure consistent source term for Poisson solver
    rho.x.scatter_forward()
    return rho

electric_field.py:

import numpy as np

from mpi4py import MPI

import dolfinx

import dolfinx.fem as fem

import dolfinx.geometry as geometry

import ufl

from petsc4py import PETSc

import dolfinx.la as la

import gmsh

from datetime import datetime



# ====================== 你的高性能模块 ======================

# 请确保这些函数已在 track_fast.py 中定义

from track_inject_iter import ParticleSystemSoA, deposit_charge, MeshGeometryCache



EPS0 = 8.8541878128e-12

def create_spherical_mesh(comm, R_wall=0.020, cell_size=0.00025):

    """生成球体网格"""

    gmsh.initialize()

    if comm.rank != 0:

        gmsh.option.setNumber("General.Terminal", 0)

    

    model = gmsh.model

    model.add("sphere_cavity")

    

    sphere = model.occ.addSphere(0.0, 0.0, 0.0, R_wall)

    model.occ.synchronize()

    

    model.mesh.setSize(model.getEntities(0), cell_size)

    

    volumes = [e[1] for e in model.getEntities(3)]

    model.addPhysicalGroup(3, volumes, tag=1, name="volume")

    

    surfaces = [e[1] for e in model.getEntities(2)]

    model.addPhysicalGroup(2, surfaces, tag=100, name="metal_wall")

    

    model.mesh.generate(3)

    gmsh.write("spherical_cavity.msh")

    

    from dolfinx.io import gmshio

    domain, cell_tags, facet_tags = gmshio.model_to_mesh(model, comm, rank=0, gdim=3)

 

    gmsh.finalize()

    return domain, facet_tags



class FastPoissonSolver:

    def __init__(self, domain, facet_tags, wall_tag=100):

        self.domain = domain

        self.facet_tags = facet_tags

        # ------------------------------------------------

        # PIC推荐配置

        # ------------------------------------------------

        self.V_rho = fem.functionspace(

            domain,

            ("Lagrange", 1)

        )

        self.V_phi = fem.functionspace(

            domain,

            ("Lagrange", 2)

        )

        self.W = fem.functionspace(

            domain,

            ("DG", 1, (3,))

        )

        # ------------------------------------------------

        # Poisson

        # ------------------------------------------------

        u = ufl.TrialFunction(self.V_phi)

        v = ufl.TestFunction(self.V_phi)

        self.a_form = fem.form(

            ufl.inner(

                ufl.grad(u),

                ufl.grad(v)

            ) * ufl.dx

        )

        self.rho_func = fem.Function(self.V_rho)

        self.L_form = fem.form(

            (1.0 / EPS0)

            * self.rho_func

            * v

            * ufl.dx

        )

        self.phi = fem.Function(self.V_phi)

        # ------------------------------------------------

        # E = -grad(phi)

        # L2 projection

        # ------------------------------------------------

        E_trial = ufl.TrialFunction(self.W)

        E_test = ufl.TestFunction(self.W)

        self.aE = fem.form(

            ufl.inner(E_trial, E_test)

            * ufl.dx

        )

        self.LE = fem.form(

            ufl.inner(

                -ufl.grad(self.phi),

                E_test

            )

            * ufl.dx

        )

        self.E_field = fem.Function(self.W)

        # ------------------------------------------------

        # 边界条件

        # ------------------------------------------------

        facets = facet_tags.indices[

            facet_tags.values == wall_tag

        ]

        self.dofs_wall = fem.locate_dofs_topological(

            self.V_phi,

            domain.topology.dim - 1,

            facets

        )

        # ------------------------------------------------

        # Poisson Solver

        # ------------------------------------------------

        self.ksp = PETSc.KSP().create(domain.comm)

        self.ksp.setType("cg")

        pc = self.ksp.getPC()

        pc.setType("hypre")

        pc.setHYPREType("boomeramg")

        self.ksp.setInitialGuessNonzero(True)

        self.ksp.setTolerances(

            rtol=1e-8,

            atol=1e-12

        )

        # ------------------------------------------------

        # E Projection Solver

        # ------------------------------------------------

        self.kspE = PETSc.KSP().create(domain.comm)

        self.kspE.setType("cg")

        pcE = self.kspE.getPC()

        pcE.setType("jacobi")

    def solve(self, v_wall=0.0):

        # ==========================================

        # Dirichlet BC

        # ==========================================

        bc = fem.dirichletbc(

            PETSc.ScalarType(v_wall),

            self.dofs_wall,

            self.V_phi

        )

        bcs = [bc]

        # ==========================================

        # Assemble Poisson

        # ==========================================

        A = fem.petsc.create_matrix(

            self.a_form

        )

        fem.petsc.assemble_matrix(

            A,

            self.a_form,

            bcs=bcs

        )

        A.assemble()

        self.ksp.setOperators(A)

        b = fem.petsc.create_vector(

            self.L_form

        )

        fem.petsc.assemble_vector(

            b,

            self.L_form

        )

        fem.petsc.apply_lifting(

            b,

            [self.a_form],

            [bcs]

        )

        b.ghostUpdate(

            addv=PETSc.InsertMode.ADD,

            mode=PETSc.ScatterMode.REVERSE

        )

        fem.petsc.set_bc(

            b,

            bcs

        )

        # ==========================================

        # Solve phi

        # ==========================================

        self.ksp.solve(

            b,

            self.phi.x.petsc_vec

        )

        self.phi.x.scatter_forward()

        # ==========================================

        # L2 Projection of E

        # ==========================================

        AE = fem.petsc.create_matrix(

            self.aE

        )

        fem.petsc.assemble_matrix(

            AE,

            self.aE

        )

        AE.assemble()

        bE = fem.petsc.create_vector(

            self.LE

        )

        fem.petsc.assemble_vector(

            bE,

            self.LE

        )

        bE.ghostUpdate(

            addv=PETSc.InsertMode.ADD,

            mode=PETSc.ScatterMode.REVERSE

        )

        self.kspE.setOperators(AE)

        self.kspE.solve(

            bE,

            self.E_field.x.petsc_vec

        )

        self.E_field.x.scatter_forward()

        A.destroy()

        b.destroy()

        AE.destroy()

        bE.destroy()

        return self.phi, self.E_field

    def destroy(self):

        self.ksp.destroy()

        self.kspE.destroy()

def verify_grounded_sphere_system():

    comm = MPI.COMM_WORLD

    rank = comm.rank

    R_wall = 0.010

    R_beam = 0.005

    Q_total = 1e-9

    num_particles = 60000

    # =========================

    # mesh

    # =========================

    domain, facet_tags = create_spherical_mesh(

        comm,

        R_wall,

        cell_size=0.00025

    )

    # =========================

    # particles

    # =========================

    ps = ParticleSystemSoA(num_particles)

    np.random.seed(42)

    if rank == 0:

        count = 0

        while count < num_particles:

            xyz = np.random.uniform(-R_beam, R_beam, 3)

            if np.linalg.norm(xyz) <= R_beam:

                ps.x[count] = xyz

                ps.q[count] = Q_total / num_particles

                ps.alive[count] = True

                count += 1

    comm.Bcast(ps.x, root=0)

    comm.Bcast(ps.q, root=0)

    comm.Bcast(ps.alive, root=0)

    # =========================

    # solver + cache

    # =========================

    poisson = FastPoissonSolver(domain, facet_tags, wall_tag=100)

    geo_cache = MeshGeometryCache(

        domain,

        poisson.V_rho

    )

    # =========================

    # 1. particle charge check

    # =========================

    alive_idx = np.where(ps.alive)[0]

    particle_Q = np.sum(ps.q[alive_idx])

    if rank == 0:

        print("particle_Q =", particle_Q)

        print("target_Q   =", Q_total)

    # =========================

    # 2. deposit

    # =========================

    rho = deposit_charge(ps, poisson.V_rho, geo_cache)

    alive_after = np.sum(ps.alive)

    charge_after = np.sum(ps.q[ps.alive])

    if rank == 0:

        print("alive_after =", alive_after)

        print("charge_after =", charge_after)

    poisson.rho_func.x.array[:] = rho.x.array

    poisson.rho_func.x.scatter_forward()

    # =========================

    # 3. FEM integrated charge

    # =========================

    Q_form = fem.form(rho * ufl.dx)

    Q_local = fem.assemble_scalar(Q_form)

    Q_num = comm.allreduce(Q_local, op=MPI.SUM)

    if rank == 0:

        print(f"FEM ∫rho dx = {Q_num:.6e}")

    # =========================

    # 4. MASS LUMPED CHECK (关键守恒标准)

    # =========================

    v = ufl.TestFunction(poisson.V_rho)

    lump_form = fem.form(v * ufl.dx)

    lump = fem.Function(poisson.V_rho)

    fem.petsc.assemble_vector(lump.x.petsc_vec, lump_form)

    lump.x.scatter_forward()

    Q_lumped = np.sum(rho.x.array * lump.x.array)

    if rank == 0:

        print(f"Lumped Q     = {Q_lumped:.6e}")

    # =========================

    # 5. solve

    # =========================

    phi, E_field = poisson.solve(v_wall=0.0)

    # =========================

    # 6. field validation

    # =========================

    if rank == 0:

        print("\n====== 球体场验证 ======")

        print(f"{'r(mm)':<12}{'E_num':<18}{'E_exact':<18}{'err%':<10}")

        print("-" * 60)

        alive_idx = np.where(ps.alive)[0]

        test_idx = np.random.choice(alive_idx, 8, replace=False)

        for idx in test_idx:

            p = ps.x[idx]

            r = np.linalg.norm(p)

            if r <= R_beam:

                E_exact = (

                    Q_total /

                    (4 * np.pi * EPS0 * R_beam**3)

                ) * r

            else:

                E_exact = (

                    Q_total /

                    (4 * np.pi * EPS0 * r**2)

                )

            points = np.array([p], dtype=np.float64)

            cell_cand = geometry.compute_collisions_points(

                geo_cache.bb_tree,

                points

            )

            colliding = geometry.compute_colliding_cells(

                domain,

                cell_cand,

                points

            )

            if len(colliding.links(0)) > 0:

                E_num_vec = E_field.eval(

                    p,

                    [colliding.links(0)[0]]

                )

                E_num = np.linalg.norm(E_num_vec)

                err = abs(E_num - E_exact) / E_exact * 100

                print(f"{r*1000:<12.3f}{E_num:<18.4f}{E_exact:<18.4f}{err:<10.3f}")

    poisson.destroy()

if __name__ == "__main__":

    verify_grounded_sphere_system()

The simulation results are:
particle_Q = 1.0000000000000005e-09
target_Q = 1e-09
alive_after = 60000
charge_after = 1.0000000000000005e-09
FEM ∫rho dx = 9.759833e-10
Lumped Q = 9.759833e-10

====== Spherical Field Validation ======

r(mm) E_num E_exact err%

4.519 319018.8044 324914.4684 1.815
4.595 324181.0148 330373.3166 1.874
4.819 337208.7909 346496.2446 2.680
4.819 331767.5544 346494.7841 4.250
3.677 264973.1326 264347.6137 0.237
3.394 245362.6843 244064.4941 0.532
4.023 282003.2988 289252.0044 2.506
3.324 232931.4091 239022.3070 2.548