I am running a phase-field simulation in FEniCSx with mesh refinement,
using SNES (newtonls) + GMRES + hypre boomeramg.
I observe that the code becomes slower when increasing MPI ranks.
Heres is my code :
import os
from pathlib import Path
from mpi4py import MPI
from petsc4py import PETSc
import dolfinx
import numpy as np
import ngsPETSc.utils.fenicsx as ngfx
from packaging.version import Version
from netgen.geom2d import SplineGeometry
from ufl import (
FacetNormal,
Identity,
TestFunction,
TrialFunction,
div,
dot,
ds,
dx,
inner,
lhs,
nabla_grad,
rhs,
sym,
TestFunctions,
split,
variable,
diff,
inner,
grad,
as_vector,
sqrt
)
from basix.ufl import element, mixed_element
from dolfinx import default_real_type, log, plot,mesh,geometry
from dolfinx.fem import(
Function,
functionspace,
Expression,
Constant,
Function,
extract_function_spaces,
functionspace,
assemble_scalar,
dirichletbc,
form,
locate_dofs_geometrical,
)
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.io import XDMFFile
from dolfinx.plot import vtk_mesh
from dolfinx import fem
import ufl
# Save all logging to file
log.set_output_file("log.txt")
rank = MPI.COMM_WORLD.rank
######ORIZW TIS PARAMETROUS POU EINAI STATHERES
r0=5
e4=0.05
Delta=0.65
W0=1
D=3
tau0=1
l=1.5957*D
lamda =1.5957*D
dt =0.01 ## XRONIKO BIMA
t=0
T= 285 # XRONIKI PERIODOS
######################### MESH NETGEN
geo=SplineGeometry()
pnts= [(-800,-800),(800,-800),(800,800),(-800,800)]
p1,p2,p3,p4 = [geo.AppendPoint(*pnt) for pnt in pnts]
curves = [
[["line",p1,p2],"line"],
[["line",p2,p3],"line"],
[["line",p3,p4],"line"],
[["line",p4,p1],"line"],
]
for c,bc in curves :
geo.Append(c,bc=bc)
#####################################
geoModel = ngfx.GeometricModel(geo, MPI.COMM_WORLD)
domain, (ct, ft), region_map = geoModel.model_to_mesh(gdim=2, hmax=100)
########################################################
def mark_cells_radius_DG(domain, R_ref, center=(0.0, 0.0)):
W = fem.functionspace(domain, ("DG", 0))
x = ufl.SpatialCoordinate(domain)
cx, cy = center
r = sqrt((x[0] - cx)**2 + (x[1] - cy)**2)
indicator = ufl.conditional(ufl.le(r, R_ref), 1.0, 0.0)
markers = fem.Function(W)
ip = W.element.interpolation_points
if Version(dolfinx.__version__) < Version("0.10.0"):
ip = ip()
markers.interpolate(fem.Expression(indicator, ip))
# --- IMPORTANT: keep only owned cells (exclude ghosts) ---
tdim = domain.topology.dim
n_owned = domain.topology.index_map(tdim).size_local
arr_owned = markers.x.array[:n_owned]
cells = np.flatnonzero(arr_owned > 0.5).astype(np.int32)
return cells
mesh_id = 0
out_folder = Path("code_4_8cores")
out_folder.mkdir(parents=True, exist_ok=True)
# γράψε αρχικό mesh
# geometric initial refinement
n_init_ref = 7
R_ref = 185.0
tdim = domain.topology.dim
# καλό να υπάρχει connectivity που χρειάζεται το compute_incident_entities
domain.topology.create_connectivity(tdim, tdim-1)
for k in range(n_init_ref):
cells_local = mark_cells_radius_DG(domain, R_ref, center=(0.0, 0.0))
# 100% ασφάλεια: μόνο owned (όχι ghost) ΚΑΙ σωστό dtype
n_owned = domain.topology.index_map(tdim).size_local
cells_local = cells_local[cells_local < n_owned].astype(np.int32, copy=False)
# Debug check (αν θες)
# assert cells_local.size == 0 or (cells_local.min() >= 0 and cells_local.max() < n_owned)
domain, (ct, ft) = geoModel.refineMarkedElements(tdim, cells_local)
###################### dokimazw na plotarw to mesh na dw pws fainetai pyhto
######################################
import numpy as np
from dolfinx import geometry
from mpi4py import MPI
class TipTracker:
def __init__(self, mesh, x_min, x_max=0.0, y0=0.0, npts=5000,
out_path="tip_velocity.csv", comm=None):
self.mesh = mesh
self.comm = comm if comm is not None else MPI.COMM_WORLD
self.rank = self.comm.rank
self.x_min = float(x_min)
self.x_max = float(x_max)
self.y0 = float(y0)
self.npts = int(npts)
# sampling points (ίδια σε όλους)
self.xs = np.linspace(self.x_min, self.x_max, self.npts)
self.pts = np.zeros((self.npts, 3), dtype=np.float64)
self.pts[:, 0] = self.xs
self.pts[:, 1] = self.y0
self.pts[:, 2] = 0.0
# local bb_tree
self.bb = geometry.bb_tree(self.mesh, self.mesh.topology.dim)
# state
self.x_tip_old = None
# file (μόνο rank 0)
self.f = None
if self.rank == 0:
self.f = open(out_path, "w", buffering=1)
self.f.write("step,t,x_tip,v_tip\n")
def _local_eval_on_points(self, phi):
"""
Κάθε rank αξιολογεί φ μόνο για points που πέφτουν στα local cells του.
Επιστρέφει (idx_local, vals_local) όπου idx_local είναι indices στο [0..npts-1].
"""
candidates = geometry.compute_collisions_points(self.bb, self.pts)
colliding = geometry.compute_colliding_cells(self.mesh, candidates, self.pts)
cells = np.full(self.npts, -1, dtype=np.int32)
for i in range(self.npts):
links = colliding.links(i)
if len(links) > 0:
cells[i] = links[0]
valid = cells >= 0
idx = np.where(valid)[0]
if idx.size == 0:
return idx.astype(np.int32), np.array([], dtype=np.float64)
vals = phi.eval(self.pts[valid], cells[valid]).reshape(-1).astype(np.float64)
return idx.astype(np.int32), vals
@staticmethod
def _compute_tip_from_line(xs, vals):
"""
xs: (N,) x grid
vals: (N,) φ(x,0) (με NaN σε σημεία που δεν αξιολογήθηκαν)
Επιστρέφει x_tip από ΠΡΩΤΟ zero-crossing (πιο αριστερό). Αν δεν υπάρχει, min|φ|.
"""
ok = np.isfinite(vals)
if np.count_nonzero(ok) < 2:
return None
xv = xs[ok]
v = vals[ok]
s = np.sign(v)
idx = np.where(s[:-1] * s[1:] < 0)[0]
if idx.size > 0:
j = idx[0]
x0, x1 = xv[j], xv[j + 1]
f0, f1 = v[j], v[j + 1]
return float(x0 - f0 * (x1 - x0) / (f1 - f0))
j = int(np.argmin(np.abs(v)))
return float(xv[j])
def update(self, phi, step, t, dt):
# 1) local evaluations
idx_local, vals_local = self._local_eval_on_points(phi)
# 2) gather indices + values to rank 0
all_idx = self.comm.gather(idx_local, root=0)
all_vals = self.comm.gather(vals_local, root=0)
x_tip = None
v_tip = 0.0
if self.rank == 0:
# 3) stitch full line values
vals = np.full(self.npts, np.nan, dtype=np.float64)
for idx, vv in zip(all_idx, all_vals):
vals[idx] = vv # κάθε point πρέπει να “ανήκει” σε ένα rank
# 4) compute tip from the full line
x_tip = self._compute_tip_from_line(self.xs, vals)
# 5) velocity
if x_tip is None or self.x_tip_old is None:
v_tip = 0.0
else:
v_tip = (x_tip - self.x_tip_old) / dt
self.x_tip_old = x_tip
# 6) write
if self.f is not None:
self.f.write(f"{step},{t},{x_tip},{v_tip}\n")
# 7) broadcast x_tip ώστε όλοι να έχουν ίδιο state (προαιρετικό αλλά καλό)
x_tip = self.comm.bcast(x_tip, root=0)
v_tip = self.comm.bcast(v_tip, root=0)
self.x_tip_old = x_tip
return x_tip, v_tip
def close(self):
if self.rank == 0 and self.f is not None:
self.f.close()
##################################
def tau(phi):
gradphi=grad(phi)
absgradphi = sqrt(inner( gradphi, gradphi)+1e-12)
abs4gradphi=absgradphi**4
sum_grad_phi4= gradphi[0]**4 + gradphi[1]**4
whole = sum_grad_phi4/abs4gradphi
a=(1-3*e4)*(1+((4*e4)/(1-3*e4))*whole)
return tau0*(a)**2
def W(phi):
gradphi=grad(phi)
absgradphi = sqrt(inner( gradphi, gradphi)+1e-12)
abs4gradphi=absgradphi**4
sum_grad_phi4= gradphi[0]**4 + gradphi[1]**4
whole = sum_grad_phi4/abs4gradphi
a=(1-3*e4)*(1+((4*e4)/(1-3*e4))*whole)
return W0*a
def omega(phi):
gradphi=grad(phi)
absgradphi = sqrt(inner( gradphi, gradphi)+1e-12)
abs4gradphi=absgradphi**4
V1= gradphi[0]*(( gradphi[1]**2)*(( gradphi[0]**2)-( gradphi[1]**2)))
V2= gradphi[1]*(( gradphi[0]**2)*(( gradphi[1]**2)-( gradphi[0]**2)))
V=as_vector([V1,V2])
return ((16*e4*W0)/abs4gradphi)*V
def f(phi,u):
return (phi-l*u*(1-phi**2))*(1-phi**2)
def build_system(domain, dt, D, petsc_options,
old_sol=None, old_sol0=None,
init_phi=None, init_u=None,
petsc_prefix="Crystal_growth"):
"""
Χτίζει: ME, sol, sol0, problem πάνω στο current domain (mesh).
- An old_sol/old_sol0 δοθούν: μεταφέρει λύση nonmatching (για mesh change).
- Αλλιώς, an init_phi/init_u δοθούν: βάζει τα initial initial conditions .
"""
# 1) Spaces + functions
P1 = element("Lagrange", domain.basix_cell(), 1, dtype=default_real_type)
ME = fem.functionspace(domain, mixed_element([P1, P1]))
sol = fem.Function(ME)
sol0 = fem.Function(ME)
# 2) είτε μεταφορά λύσης (mesh changed) είτε initial conditions
if old_sol is not None and old_sol0 is not None:
# Old components
old_phi = old_sol.sub(0)
old_u = old_sol.sub(1)
old_phi0 = old_sol0.sub(0)
old_u0 = old_sol0.sub(1)
# Collapsed spaces + maps
Vphi_new, map_phi = ME.sub(0).collapse()
Vu_new, map_u = ME.sub(1).collapse()
phi_new = fem.Function(Vphi_new)
u_new = fem.Function(Vu_new)
phi0_new = fem.Function(Vphi_new)
u0_new = fem.Function(Vu_new)
# Nonmatching interpolation data
tdim = domain.topology.dim
num_local_cells = domain.topology.index_map(tdim).size_local
new_cells = np.arange(num_local_cells, dtype=np.int32)
idata_phi = fem.create_interpolation_data(Vphi_new, old_phi.function_space, new_cells)
idata_u = fem.create_interpolation_data(Vu_new, old_u.function_space, new_cells)
# Interpolate old -> new
phi_new.interpolate_nonmatching(old_phi, new_cells, idata_phi)
phi0_new.interpolate_nonmatching(old_phi0, new_cells, idata_phi)
u_new.interpolate_nonmatching(old_u, new_cells, idata_u)
u0_new.interpolate_nonmatching(old_u0, new_cells, idata_u)
# Write back into mixed sol/sol0
sol.sub(0).x.array[map_phi] = phi_new.x.array
sol.sub(1).x.array[map_u] = u_new.x.array
sol0.sub(0).x.array[map_phi] = phi0_new.x.array
sol0.sub(1).x.array[map_u] = u0_new.x.array
sol.x.scatter_forward()
sol0.x.scatter_forward()
else:
# initial conditions (στην αρχή)
if init_phi is not None:
sol.sub(0).interpolate(init_phi)
if init_u is not None:
sol.sub(1).interpolate(init_u)
sol.x.scatter_forward()
sol0.x.array[:] = sol.x.array
sol0.x.scatter_forward()
# 3) Weak form (πάντα ξαναχτίζεται στο νέο ME/sol/sol0)
q, v = ufl.TestFunctions(ME)
phi, u = ufl.split(sol)
phi0, u0 = ufl.split(sol0)
F1 = (
ufl.inner(u, v) * ufl.dx
- ufl.inner(u0, v) * ufl.dx
+ dt * D * ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
- 0.5 * ufl.inner(phi, v) * ufl.dx
+ 0.5 * ufl.inner(phi0, v) * ufl.dx
)
F2 = (
ufl.inner(tau(phi0) * phi, q) * ufl.dx
- ufl.inner(tau(phi0) * phi0, q) * ufl.dx
+ dt * ufl.inner(W(phi0)**2 * ufl.grad(phi), ufl.grad(q)) * ufl.dx
- dt * ufl.inner(f(phi0, u0), q) * ufl.dx
+ dt * ufl.inner(W(phi0) * omega(phi0), ufl.grad(q)) * ufl.dx
)
F = F1 + F2
# 4) Problem
problem = NonlinearProblem(F, sol,
petsc_options_prefix=petsc_prefix,
petsc_options=petsc_options)
return ME, sol, sol0, problem
def mark_cells_phi_grad(phi: fem.Function, theta: float = 0.5):
mesh = phi.function_space.mesh
W = fem.functionspace(mesh, ("DG", 0))
eta = fem.Function(W)
ip = W.element.interpolation_points
if Version(dolfinx.__version__) < Version("0.10.0"):
ip = ip()
eta.interpolate(fem.Expression(ufl.inner(ufl.grad(phi), ufl.grad(phi)), ip))
eta_max = eta.x.petsc_vec.max()[1]
should_refine = ufl.conditional(ufl.gt(eta, theta * eta_max), 1, 0)
markers = fem.Function(W)
markers.interpolate(fem.Expression(should_refine, ip))
return np.flatnonzero(markers.x.array.astype(np.int32) == 1)
#ΔΗΛΩΣΗ ΤΟΥ ΤΙ OPTIONS ΘΑ ΕΧΕΙ Ο SOLVER #######
use_superlu = PETSc.IntType == np.int64
sys = PETSc.Sys()
if sys.hasExternalPackage("mumps") and not use_superlu:
linear_solver = "mumps"
elif sys.hasExternalPackage("superlu_dist"):
linear_solver = "superlu_dist"
else:
linear_solver = "petsc"
petsc_options = {
"snes_type": "newtonls",
"snes_linesearch_type": "basic",
"snes_atol": 1e-6,
"snes_rtol": 1e-6,
"snes_monitor": None,
"ksp_error_if_not_converged": True,
"ksp_type": "gmres",
"ksp_rtol": 1e-8,
"ksp_monitor": None,
"pc_type": "hypre",
"pc_hypre_type": "boomeramg",
"pc_hypre_boomeramg_max_iter": 1,
"pc_hypre_boomeramg_cycle_type": "v",
}
###################################################ARXIKI SINTHIKI GIA TO PHI PARAGONTAS PHI ######
def Initial_condition_PHI(x):
r1 = np.sqrt(x[0]**2 + x[1]**2)
return -np.tanh((r1 - r0) / np.sqrt(2))
##############ARXIKI ADIASTATIS SIGKENTRWSIS%%%%%%%%%%%%%%%%%
def Initial_condition_U(x):
r2 = np.sqrt(x[0]**2 + x[1]**2)
u_init = np.where(r2<=r0,
0,
-Delta * (1 - np.exp(-(r2 -r0)/2.5))
)
return u_init
######################################################ARXIKI DILWSI TOY PROBLIMATOS
ME, sol, sol0, problem = build_system(
domain, dt, D, petsc_options,
init_phi=Initial_condition_PHI,
init_u=Initial_condition_U
)
out_folder = Path("code_3_8cores")
out_folder.mkdir(parents=True, exist_ok=True)
file = XDMFFile(MPI.COMM_WORLD,out_folder / f"output_mesh{mesh_id}.xdmf", "w")
file.write_mesh(domain)
log_path = out_folder / "residuals.txt"
if rank == 0:
f_log = open(log_path, "w", buffering=1) # line-buffered
f_log.write("step,t,snes_reason,snes_its,snes_norm,ksp_its,ksp_rnorm\n")
else:
f_log = None
###############################
step = 0
# --- Initial refinement passes (before time loop) ---
##############################arxiko refinement###############
gdim = domain.geometry.dim
Vscal = fem.functionspace(domain, ("CG", 1))
Vvec = fem.functionspace(domain, ("CG", 1, (gdim,))) # vector space
# SubFunction is ok for UFL
# ∇phi (vector)
tip_tracker = TipTracker(
mesh=domain,
x_min=-200.0,
x_max=0.0,
y0=0.0,
npts=2000,
out_path=str(out_folder / "tip_velocity.csv"),
comm=MPI.COMM_WORLD
)
while t < T:
t += dt
step += 1
problem.solve()
sol.x.scatter_forward()
# diagnostics
converged_reason = problem.solver.getConvergedReason()
num_iterations = problem.solver.getIterationNumber()
snes = problem.solver
ksp = snes.getKSP()
snes_reason = snes.getConvergedReason()
snes_its = snes.getIterationNumber()
# SNES residual norm
snes_norm = snes.getFunctionNorm()
# KSP stats (τελευταίο linear solve)
ksp_its = ksp.getIterationNumber()
ksp_rnorm = ksp.getResidualNorm()
# Γράψιμο ΜΕΣΑ στο loop
if rank == 0:
f_log.write(f"{step},{t},{snes_reason},{snes_its},{snes_norm},{ksp_its},{ksp_rnorm}\n")
# 1) διάνυσμα gradient
phi = sol.sub(0)
tip_tracker.update(phi, step=step, t=t, dt=dt)
grad_phi = fem.Function(Vvec, name="grad_phi")
ip_vec = Vvec.element.interpolation_points
if Version(dolfinx.__version__) < Version("0.10.0"):
ip_vec = ip_vec()
grad_phi.interpolate(fem.Expression(ufl.grad(phi), ip_vec))
# |∇phi|^2 (scalar)
grad2 = fem.Function(Vscal, name="grad_phi_sq")
ip_s = Vscal.element.interpolation_points
if Version(dolfinx.__version__) < Version("0.10.0"):
ip_s = ip_s()
grad2.interpolate(fem.Expression(ufl.inner(ufl.grad(phi), ufl.grad(phi)), ip_s))
sol0.x.array[:] = sol.x.array
sol0.x.scatter_forward()
# Write output
if step % 150==0:
phi_out = sol.sub(0)
u_out = sol.sub(1)
file.write_function(phi_out, t)
file.write_function(u_out, t)
file.write_function(grad_phi,t)
file.flush()
tip_tracker.close()
file.close()
if rank == 0 and f_log is not None:
f_log.close()
if rank == 0:
print(f"Simulation completed: {step} steps, final time t={t}")








