Hello everyone,
I have a numerical model consisting of a grounded metallic outer shell enclosing a uniformly charged spherical particle distribution to solve the internal electric field. The simulated electric field results are compared with the analytical solutions. At present, the maximum deviation between numerical and theoretical results is approximately 5%. This noticeable error is likely introduced by the charge deposition scheme, although the exact source cannot be fully confirmed. I would appreciate suggestions on feasible approaches to reduce this numerical error. The relevant code and simulation results are attached for reference. Thank you very much.
track_inject_iter.py:
import numba
import numpy as np
import dolfinx
from dolfinx import fem, geometry
import ufl
from petsc4py import PETSc
import dolfinx.la as la
@numba.njit(parallel=True, fastmath=True)
def numba_deposit_kernel(
alive_x,
alive_q,
cell_ids,
invT_arr,
origin_arr,
cell_dofs_arr,
num_dofs
):
n_particles = alive_x.shape[0]
n_threads = numba.get_num_threads()
# 1. Thread-isolated buffer to avoid race conditions without locks
thread_buf = np.zeros((n_threads, num_dofs), dtype=np.float64)
thread_outside_counts = np.zeros(n_threads, dtype=np.int64)
chunk_size = (n_particles + n_threads - 1) // n_threads
for tid in numba.prange(n_threads):
start_idx = tid * chunk_size
end_idx = min(start_idx + chunk_size, n_particles)
buf = thread_buf[tid]
outside_cnt = 0
for i in range(start_idx, end_idx):
cell = cell_ids[i]
if cell < 0:
outside_cnt += 1
continue
invT = invT_arr[cell]
orig = origin_arr[cell]
dx = alive_x[i, 0] - orig[0]
dy = alive_x[i, 1] - orig[1]
dz = alive_x[i, 2] - orig[2]
# Strict calculation of tetrahedral barycentric coordinates
l0 = invT[0, 0]*dx + invT[0, 1]*dy + invT[0, 2]*dz
l1 = invT[1, 0]*dx + invT[1, 1]*dy + invT[1, 2]*dz
l2 = invT[2, 0]*dx + invT[2, 1]*dy + invT[2, 2]*dz
l3 = 1.0 - (l0 + l1 + l2)
# =========================================================
# Tolerance judgment: allow tiny floating-point out-of-bounds to avoid falsely removing particles near cell boundaries
# =========================================================
eps = 1e-9
if l0 < -eps or l1 < -eps or l2 < -eps or l3 < -eps:
outside_cnt += 1
continue # Only discard particles far outside the mesh
# 1. Truncate minor negative values to eliminate negative charge contamination
l0 = l0 if l0 > 0.0 else 0.0
l1 = l1 if l1 > 0.0 else 0.0
l2 = l2 if l2 > 0.0 else 0.0
l3 = l3 if l3 > 0.0 else 0.0
# 2. Local shape function renormalization: ensure full charge conservation across the 4 tetrahedron nodes
l_sum = l0 + l1 + l2 + l3
if l_sum > 0.0:
inv_sum = 1.0 / l_sum
l0 *= inv_sum
l1 *= inv_sum
l2 *= inv_sum
l3 *= inv_sum
q = alive_q[i]
dofs = cell_dofs_arr[cell]
# 3. Thread-safe accumulation into local thread buffer
buf[dofs[0]] += q * l0
buf[dofs[1]] += q * l1
buf[dofs[2]] += q * l2
buf[dofs[3]] += q * l3
thread_outside_counts[tid] = outside_cnt
# 2. Cross-thread reduction to sum contributions over all threads
result = np.zeros(num_dofs, dtype=np.float64)
for d in numba.prange(num_dofs):
s = 0.0
for t in range(n_threads):
s += thread_buf[t, d]
result[d] = s
# 3. Sum total count of particles out of mesh bounds
total_outside = 0
for t in range(n_threads):
total_outside += thread_outside_counts[t]
return result, total_outside
def numba_deposit_core_adapter(ps, alive_idx, cell_ids_alive, geo_cache, num_dofs):
"""Wrapper interface for argument conversion"""
return numba_deposit_kernel(
ps.x[alive_idx],
ps.q[alive_idx],
cell_ids_alive,
geo_cache.invT_arr,
geo_cache.origin_arr,
geo_cache.cell_dofs_arr,
num_dofs
)
def deposit_charge(ps, V_rho, geo_cache):
"""
Charge deposition kernel with strict dual-dimensional charge conservation (external entry point for Dolfinx)
"""
domain = V_rho.mesh
rho = fem.Function(V_rho)
rho.x.array[:] = 0.0
alive_mask = ps.alive
alive_idx = np.where(alive_mask)[0]
if len(alive_idx) == 0:
return rho
# 1. Batch collision detection to locate particle-cell intersections via geometry tree
points = ps.x[alive_idx]
candidates = geometry.compute_collisions_points(geo_cache.bb_tree, points)
colliding = geometry.compute_colliding_cells(domain, candidates, points)
# Update cell ID index for each living particle
cell_ids_alive = np.full(len(alive_idx), -1, dtype=np.int32)
for local_i, pidx in enumerate(alive_idx):
links = colliding.links(local_i)
if len(links) == 0:
ps.alive[pidx] = False # Mark particles outside local mesh as dead
continue
cell = links[0]
ps.cell_id[pidx] = cell
cell_ids_alive[local_i] = cell
# 2. Calculate total DoF count including local DoFs and cross-process ghost DoFs
num_dofs = V_rho.dofmap.index_map.size_local + V_rho.dofmap.index_map.num_ghosts
# 3. Execute high-performance Numba charge deposition core
Q_arr, total_outside = numba_deposit_core_adapter(ps, alive_idx, cell_ids_alive, geo_cache, num_dofs)
# 4. Populate Dolfinx vector with charge array and perform bidirectional cross-process reduction
Q = fem.Function(V_rho)
Q.x.array[:len(Q_arr)] = Q_arr[:len(Q.x.array)]
# Critical synchronization step: reverse scatter ghost contributions back to owning ranks first, then forward sync for global consistency
Q.x.scatter_reverse(la.InsertMode.add)
Q.x.scatter_forward()
# 5. Compute nodal control volumes via mass lumping (convert total nodal charge Q to charge density rho)
vtest = ufl.TestFunction(V_rho)
lump_form = fem.form(vtest * ufl.dx)
lump = fem.Function(V_rho)
fem.petsc.assemble_vector(lump.x.petsc_vec, lump_form)
lump.x.scatter_reverse(la.InsertMode.add)
lump.x.scatter_forward()
# 6. Compute charge density rho = Q / control_volume
vol = lump.x.array
mask = vol > 1e-30
rho.x.array[mask] = Q.x.array[mask] / vol[mask]
# Final forward synchronization to ensure consistent source term for Poisson solver
rho.x.scatter_forward()
return rho
electric_field.py:
import numpy as np
from mpi4py import MPI
import dolfinx
import dolfinx.fem as fem
import dolfinx.geometry as geometry
import ufl
from petsc4py import PETSc
import dolfinx.la as la
import gmsh
from datetime import datetime
# ====================== 你的高性能模块 ======================
# 请确保这些函数已在 track_fast.py 中定义
from track_inject_iter import ParticleSystemSoA, deposit_charge, MeshGeometryCache
EPS0 = 8.8541878128e-12
def create_spherical_mesh(comm, R_wall=0.020, cell_size=0.00025):
"""生成球体网格"""
gmsh.initialize()
if comm.rank != 0:
gmsh.option.setNumber("General.Terminal", 0)
model = gmsh.model
model.add("sphere_cavity")
sphere = model.occ.addSphere(0.0, 0.0, 0.0, R_wall)
model.occ.synchronize()
model.mesh.setSize(model.getEntities(0), cell_size)
volumes = [e[1] for e in model.getEntities(3)]
model.addPhysicalGroup(3, volumes, tag=1, name="volume")
surfaces = [e[1] for e in model.getEntities(2)]
model.addPhysicalGroup(2, surfaces, tag=100, name="metal_wall")
model.mesh.generate(3)
gmsh.write("spherical_cavity.msh")
from dolfinx.io import gmshio
domain, cell_tags, facet_tags = gmshio.model_to_mesh(model, comm, rank=0, gdim=3)
gmsh.finalize()
return domain, facet_tags
class FastPoissonSolver:
def __init__(self, domain, facet_tags, wall_tag=100):
self.domain = domain
self.facet_tags = facet_tags
# ------------------------------------------------
# PIC推荐配置
# ------------------------------------------------
self.V_rho = fem.functionspace(
domain,
("Lagrange", 1)
)
self.V_phi = fem.functionspace(
domain,
("Lagrange", 2)
)
self.W = fem.functionspace(
domain,
("DG", 1, (3,))
)
# ------------------------------------------------
# Poisson
# ------------------------------------------------
u = ufl.TrialFunction(self.V_phi)
v = ufl.TestFunction(self.V_phi)
self.a_form = fem.form(
ufl.inner(
ufl.grad(u),
ufl.grad(v)
) * ufl.dx
)
self.rho_func = fem.Function(self.V_rho)
self.L_form = fem.form(
(1.0 / EPS0)
* self.rho_func
* v
* ufl.dx
)
self.phi = fem.Function(self.V_phi)
# ------------------------------------------------
# E = -grad(phi)
# L2 projection
# ------------------------------------------------
E_trial = ufl.TrialFunction(self.W)
E_test = ufl.TestFunction(self.W)
self.aE = fem.form(
ufl.inner(E_trial, E_test)
* ufl.dx
)
self.LE = fem.form(
ufl.inner(
-ufl.grad(self.phi),
E_test
)
* ufl.dx
)
self.E_field = fem.Function(self.W)
# ------------------------------------------------
# 边界条件
# ------------------------------------------------
facets = facet_tags.indices[
facet_tags.values == wall_tag
]
self.dofs_wall = fem.locate_dofs_topological(
self.V_phi,
domain.topology.dim - 1,
facets
)
# ------------------------------------------------
# Poisson Solver
# ------------------------------------------------
self.ksp = PETSc.KSP().create(domain.comm)
self.ksp.setType("cg")
pc = self.ksp.getPC()
pc.setType("hypre")
pc.setHYPREType("boomeramg")
self.ksp.setInitialGuessNonzero(True)
self.ksp.setTolerances(
rtol=1e-8,
atol=1e-12
)
# ------------------------------------------------
# E Projection Solver
# ------------------------------------------------
self.kspE = PETSc.KSP().create(domain.comm)
self.kspE.setType("cg")
pcE = self.kspE.getPC()
pcE.setType("jacobi")
def solve(self, v_wall=0.0):
# ==========================================
# Dirichlet BC
# ==========================================
bc = fem.dirichletbc(
PETSc.ScalarType(v_wall),
self.dofs_wall,
self.V_phi
)
bcs = [bc]
# ==========================================
# Assemble Poisson
# ==========================================
A = fem.petsc.create_matrix(
self.a_form
)
fem.petsc.assemble_matrix(
A,
self.a_form,
bcs=bcs
)
A.assemble()
self.ksp.setOperators(A)
b = fem.petsc.create_vector(
self.L_form
)
fem.petsc.assemble_vector(
b,
self.L_form
)
fem.petsc.apply_lifting(
b,
[self.a_form],
[bcs]
)
b.ghostUpdate(
addv=PETSc.InsertMode.ADD,
mode=PETSc.ScatterMode.REVERSE
)
fem.petsc.set_bc(
b,
bcs
)
# ==========================================
# Solve phi
# ==========================================
self.ksp.solve(
b,
self.phi.x.petsc_vec
)
self.phi.x.scatter_forward()
# ==========================================
# L2 Projection of E
# ==========================================
AE = fem.petsc.create_matrix(
self.aE
)
fem.petsc.assemble_matrix(
AE,
self.aE
)
AE.assemble()
bE = fem.petsc.create_vector(
self.LE
)
fem.petsc.assemble_vector(
bE,
self.LE
)
bE.ghostUpdate(
addv=PETSc.InsertMode.ADD,
mode=PETSc.ScatterMode.REVERSE
)
self.kspE.setOperators(AE)
self.kspE.solve(
bE,
self.E_field.x.petsc_vec
)
self.E_field.x.scatter_forward()
A.destroy()
b.destroy()
AE.destroy()
bE.destroy()
return self.phi, self.E_field
def destroy(self):
self.ksp.destroy()
self.kspE.destroy()
def verify_grounded_sphere_system():
comm = MPI.COMM_WORLD
rank = comm.rank
R_wall = 0.010
R_beam = 0.005
Q_total = 1e-9
num_particles = 60000
# =========================
# mesh
# =========================
domain, facet_tags = create_spherical_mesh(
comm,
R_wall,
cell_size=0.00025
)
# =========================
# particles
# =========================
ps = ParticleSystemSoA(num_particles)
np.random.seed(42)
if rank == 0:
count = 0
while count < num_particles:
xyz = np.random.uniform(-R_beam, R_beam, 3)
if np.linalg.norm(xyz) <= R_beam:
ps.x[count] = xyz
ps.q[count] = Q_total / num_particles
ps.alive[count] = True
count += 1
comm.Bcast(ps.x, root=0)
comm.Bcast(ps.q, root=0)
comm.Bcast(ps.alive, root=0)
# =========================
# solver + cache
# =========================
poisson = FastPoissonSolver(domain, facet_tags, wall_tag=100)
geo_cache = MeshGeometryCache(
domain,
poisson.V_rho
)
# =========================
# 1. particle charge check
# =========================
alive_idx = np.where(ps.alive)[0]
particle_Q = np.sum(ps.q[alive_idx])
if rank == 0:
print("particle_Q =", particle_Q)
print("target_Q =", Q_total)
# =========================
# 2. deposit
# =========================
rho = deposit_charge(ps, poisson.V_rho, geo_cache)
alive_after = np.sum(ps.alive)
charge_after = np.sum(ps.q[ps.alive])
if rank == 0:
print("alive_after =", alive_after)
print("charge_after =", charge_after)
poisson.rho_func.x.array[:] = rho.x.array
poisson.rho_func.x.scatter_forward()
# =========================
# 3. FEM integrated charge
# =========================
Q_form = fem.form(rho * ufl.dx)
Q_local = fem.assemble_scalar(Q_form)
Q_num = comm.allreduce(Q_local, op=MPI.SUM)
if rank == 0:
print(f"FEM ∫rho dx = {Q_num:.6e}")
# =========================
# 4. MASS LUMPED CHECK (关键守恒标准)
# =========================
v = ufl.TestFunction(poisson.V_rho)
lump_form = fem.form(v * ufl.dx)
lump = fem.Function(poisson.V_rho)
fem.petsc.assemble_vector(lump.x.petsc_vec, lump_form)
lump.x.scatter_forward()
Q_lumped = np.sum(rho.x.array * lump.x.array)
if rank == 0:
print(f"Lumped Q = {Q_lumped:.6e}")
# =========================
# 5. solve
# =========================
phi, E_field = poisson.solve(v_wall=0.0)
# =========================
# 6. field validation
# =========================
if rank == 0:
print("\n====== 球体场验证 ======")
print(f"{'r(mm)':<12}{'E_num':<18}{'E_exact':<18}{'err%':<10}")
print("-" * 60)
alive_idx = np.where(ps.alive)[0]
test_idx = np.random.choice(alive_idx, 8, replace=False)
for idx in test_idx:
p = ps.x[idx]
r = np.linalg.norm(p)
if r <= R_beam:
E_exact = (
Q_total /
(4 * np.pi * EPS0 * R_beam**3)
) * r
else:
E_exact = (
Q_total /
(4 * np.pi * EPS0 * r**2)
)
points = np.array([p], dtype=np.float64)
cell_cand = geometry.compute_collisions_points(
geo_cache.bb_tree,
points
)
colliding = geometry.compute_colliding_cells(
domain,
cell_cand,
points
)
if len(colliding.links(0)) > 0:
E_num_vec = E_field.eval(
p,
[colliding.links(0)[0]]
)
E_num = np.linalg.norm(E_num_vec)
err = abs(E_num - E_exact) / E_exact * 100
print(f"{r*1000:<12.3f}{E_num:<18.4f}{E_exact:<18.4f}{err:<10.3f}")
poisson.destroy()
if __name__ == "__main__":
verify_grounded_sphere_system()
The simulation results are:
particle_Q = 1.0000000000000005e-09
target_Q = 1e-09
alive_after = 60000
charge_after = 1.0000000000000005e-09
FEM ∫rho dx = 9.759833e-10
Lumped Q = 9.759833e-10
====== Spherical Field Validation ======
r(mm) E_num E_exact err%
4.519 319018.8044 324914.4684 1.815
4.595 324181.0148 330373.3166 1.874
4.819 337208.7909 346496.2446 2.680
4.819 331767.5544 346494.7841 4.250
3.677 264973.1326 264347.6137 0.237
3.394 245362.6843 244064.4941 0.532
4.023 282003.2988 289252.0044 2.506
3.324 232931.4091 239022.3070 2.548