Segmentation fault while running on parallel cores

Hello, I am trying to run a simulation with a nonlinear variational problem using fenics 2019.1.0 on parallel cores. Using superlu_dist linear solver I get segmentation fault after two or three iterations. I could not pinpoint the exact issue here. Can anyone check if my method is correct as I am new to fenics.

from fenics import *
from datetime import datetime
import os

# Import MPI
try:
    comm = MPI.comm_world
    rank = MPI.rank(comm)
except:
    comm = None
    rank = 0 # Fallback for non-MPI runs


# Start message
if rank == 0:
    print("Starting dendrite_example.py at ", datetime.fromtimestamp(datetime.timestamp(datetime.now())))


"""
SELECT SIMULATION  0..2
"""
n_sim = 0



"""
PARAMETERS
"""

# Mesh dimension and size
lox= 200
loy= 100
nx, ny = 400, 200

# Applied voltage
phie = -0.45

# Normalized Parameters 
L = Constant(6.25)
kappa = Constant(0.3)
Ls = Constant(0.001)
alpha = Constant(0.5)
AA = Constant(38.69)   # nF/RT
W = Constant(2.4)
es = Constant(-13.8)
el = Constant(2.631)
A = Constant(1.0)      # R*T/R*T
c0 = Constant(1.0/14.89)
dv = Constant(5.5)     # Csm/Clm
M0 = Constant(317.9)
S1 = Constant(1000000)
S2 = Constant(1.19)
ft2 = Constant(0.0074)



"""
MESH AND FUNCTIONS
"""

# Create mesh and define function space
mesh = RectangleMesh(comm, Point(0, 0), Point(lox, loy), nx, ny)
P1 = FiniteElement('P', triangle, 1)
V = FunctionSpace(mesh, MixedElement([P1,P1,P1]))

# Define trial and  test functions
v_1, v_2, v_3 = TestFunctions(V)

# Define functions for solutions at previous and at current time
u = Function(V)                # At current time
u_n = Function(V)

# Split system function to access the components
xi, w, phi = split(u)
xi_n, w_n, phi_n = split(u_n)



"""
INITIAL CONDITIONS
"""

# Create initial conditions
if n_sim == 2:
    ruido = Expression('0.04*sin(0.0628*(x[1]-25))', degree=3)
elif n_sim == 1:
    ruido = Expression('0.175*exp(-0.1*pow(x[1]-50.,2.0))', degree=3)
else:
    ruido = Expression('0.04*sin(x[1])', degree=3)
u_init = Expression(('0.5*(1.0-1.0*tanh((x[0]-20.0+ruido)*2))','x[0] < (20.0) ? -10.0 : 0.0','-0.225*(1.0-tanh((x[0]-20.0+ruido)*2))'), degree=3, ruido=ruido)
u.interpolate(u_init)
u_n.interpolate(u_init)


"""
BOUNDARY CONDITIONS
"""

# Define boundary conditions

# Boundaries y=0, y=Ly
def boundary0(x, on_boundary):
    return on_boundary and near(x[0], 0)
def boundaryL(x, on_boundary):
    return on_boundary and near(x[0], lox)

# Boundary conditions for xi
bc_xi1 = DirichletBC(V.sub(0), Constant(1.0), boundary0)
bc_xi2 = DirichletBC(V.sub(0), Constant(0.0), boundaryL)

# Boundary conditions for mu
bc_c2 = DirichletBC(V.sub(1), Constant(0.0), boundaryL)

# Boundary conditions for phi
bc_phi1 = DirichletBC(V.sub(2), Constant(phie), boundary0)
bc_phi2 = DirichletBC(V.sub(2), Constant(0.0), boundaryL)

# Gather all boundary conditions in a variable
bcs = [bc_xi1, bc_xi2, bc_c2, bc_phi1, bc_phi2 ] # Dirichlet



"""
VARIATIONAL PROBLEM
"""

# Switching Function Material
def h(_x):
    return _x**3*(Constant(6.0)*_x**2 - Constant(15.0)*_x + Constant(10.0))
def dh(_x):
    return Constant(30.0)*_x*_x*(_x-Constant(1.0))*(_x-Constant(1.0))

# Barrier Function Material
def g(_x):
    return W*_x**2.0*(Constant(1.0) - _x)**2
def dg(_x):
    return W*Constant(2.0)*(_x * (Constant(1.0) - _x) ** 2 - (Constant(1.0) - _x) * _x ** 2)

# Concentration
def cl(_x):
    return exp((_x-el)/A)/(Constant(1.0)+exp((_x-el)/A))
def dcldw(_x):
    return exp((_x+el)/A)/(A*(exp(_x/A)+exp(el/A))**2)
def cs(_x):
    return exp((_x-es)/A)/(Constant(1.0)+exp((_x-es)/A))
def dcsdw(_x):
    return exp((_x+es)/A)/(A*(exp(_x/A)+exp(es/A))**2)

# Susceptibility factor
def chi(_xi,_w):
    return dcldw(_w)*(Constant(1.0)-h(_xi))+dcsdw(_w)*h(_xi)*dv

# Mobility defined by D*c/(R*T), whereR*T is normalized by the chemical potential
# M0*(1-h) is the effective diffusion coefficient; cl*(1-h) is the ion concentration
def D(_xi,_w):
    return M0*(Constant(1.0)-h(_xi))*cl(_w)*(Constant(1.0)-h(_xi))

# Feature of diffusion
def ft(_w):
    return cs(_w)*dv-cl(_w)

# Effective conductivity, Derivative Parsed Material
def Le1(_xi):
    return S1*h(_xi)+S2*(Constant(1.0)-h(_xi))


# Numerical Parameters (time for evolution)
t = 0.0
Tf = 108.0
dt = 0.02
num_steps = int(Tf/dt)
k = Constant(dt)


# Define variational problem
F1 = (xi-xi_n)/k*v_1*dx + L*kappa*dot(grad(xi),grad(v_1))*dx + L*dg(xi)*v_1*dx + Ls*(exp(phi*AA/Constant(2.0))-Constant(14.89)*cl(w)*(Constant(1.0)-h(xi))*exp(-phi*AA/Constant(2.0)))*dh(xi)*v_1*dx
F2 = chi(xi,w)*(w-w_n)/k*v_2*dx + D(xi, w)*dot(grad(w),grad(v_2))*dx + D(xi, w)*AA*dot(grad(phi),grad(v_2))*dx + (h(xi)-h(xi_n))/k*ft(w)*v_2*dx
F3 = Le1(xi)*dot(grad(phi),grad(v_3))*dx + ft2*(xi-xi_n)/k*v_3*dx
F = F1 + F2 + F3

# Forced parameter
if n_sim == 2:
    Lg = Expression('-0.2*(1.+sin(0.0628*(x[1]-25)))', element = V.ufl_element().sub_elements()[0])
    F = F-Lg*xi.dx(0)*v_1*dx

# Jacobian
J = derivative(F,u)



"""
SOLVE AND SAVE SOLUTIONS
"""

# Create the save directory if it doesn't exist
if rank == 0:
    os.makedirs(f"saved/sim{n_sim+1}", exist_ok=True)
# All processes wait here until rank 0 is done
if comm is not None:
    comm.Barrier()

# Use XDMFFile for parallel-safe I/O.
xdmffile = XDMFFile(comm, f"saved/sim{n_sim+1}/solution.xdmf")
xdmffile.parameters["flush_output"] = True
xdmffile.parameters["functions_share_mesh"] = True


# Define parallel solver parameters.
solver_params = {
    "newton_solver": {
        "absolute_tolerance": 1.0e-6,
        "relative_tolerance": 1.0e-6,
        "maximum_iterations": 100,
        "linear_solver": "superlu_dist",     
        "preconditioner": "default"
    }
}


# Create progress bar
if rank == 0:
    progress = Progress('Time-stepping', num_steps)
else:
    # Set log level to ERROR for other processes to minimize spam
    set_log_level(LogLevel.ERROR)
    

# Time step
for n in range(num_steps):

    # Update time
    t+=dt

    # Solve problem
    solve(F == 0, u, bcs, J=J, solver_parameters=solver_params)

    # Update previous solution
    u_n.assign(u)

    if rank == 0:
        # Timestamp
        print("step = ", n, "timestamp =", datetime.fromtimestamp(datetime.timestamp(datetime.now())))

    # Save solution each 5 seconds of simulation
    if (t % 5.0 < 0.001 or t % 5.0 > 4.999):
        
        xdmffile.write(u, t)

        if rank == 0:
            # Timestamp
            fsolution=f"saved/sim{n_sim+1}/solution.xdmf" # Just for the print message
            print("Solution saved to ", fsolution, " at time t=", t, " at ", datetime.fromtimestamp(datetime.timestamp(datetime.now())))

    # Update progress bar
    if rank == 0:
        set_log_level(LogLevel.PROGRESS)
        progress+=1
        set_log_level(LogLevel.ERROR) # Set back to ERROR to avoid solver spam

# Save last solution
xdmffile.write(u, t)
xdmffile.close() # Close the file


# End message
if rank == 0:
    print("Completed at ", datetime.fromtimestamp(datetime.timestamp(datetime.now())))

These are the logs:

Starting dendrite_example.py at  2025-11-01 18:55:14.227334
Process 0: Solving nonlinear variational problem.
  Process 0: Newton iteration 0: r (abs) = 2.056e+06 (tol = 1.000e-06) r (rel) = 1.000e+00 (tol = 1.000e-06)
  Process 0: Newton iteration 1: r (abs) = 1.167e+04 (tol = 1.000e-06) r (rel) = 5.675e-03 (tol = 1.000e-06)
--------------------------------------------------------------------------
prterun noticed that process rank 1 with PID 1057381 on node n226 exited on
signal 11 (Segmentation fault).
--------------------------------------------------------------------------

For what it’s worth, your test case converges successfully for me (using debian unstable, openmpi with 4 processes)

Starting dendrite_example.py at  2025-11-01 19:31:01.685950
Calling FFC just-in-time (JIT) compiler, this may take some time.
Calling FFC just-in-time (JIT) compiler, this may take some time.
  WARNING: The number of integration points for each cell will be: 100
           Consider using the option 'quadrature_degree' to reduce the number of points
Calling FFC just-in-time (JIT) compiler, this may take some time.
  WARNING: The number of integration points for each cell will be: 225
           Consider using the option 'quadrature_degree' to reduce the number of points
Process 0: Solving nonlinear variational problem.
  Process 0: Newton iteration 0: r (abs) = 2.056e+06 (tol = 1.000e-06) r (rel) = 1.000e+00 (tol = 1.000e-06)
  Process 0: Newton iteration 1: r (abs) = 1.152e+04 (tol = 1.000e-06) r (rel) = 5.604e-03 (tol = 1.000e-06)
  Process 0: Newton iteration 2: r (abs) = 1.152e+03 (tol = 1.000e-06) r (rel) = 5.604e-04 (tol = 1.000e-06)
  Process 0: Newton iteration 3: r (abs) = 3.571e+01 (tol = 1.000e-06) r (rel) = 1.737e-05 (tol = 1.000e-06)
  Process 0: Newton iteration 4: r (abs) = 1.636e+00 (tol = 1.000e-06) r (rel) = 7.960e-07 (tol = 1.000e-06)
  Process 0: Newton solver finished in 4 iterations and 4 linear solver iterations.
step =  0 timestamp = 2025-11-01 19:41:40.577844

FEniCS uses MPI parallelisation, while the lower end linear algebra libraries ultimately use BLAS. BLAS might use threading (openmp or pthreads), depending on the implementation. The two kinds of parallelisation, MPI vs threading, tend to make problems when run together, joint usage needs to be managed. The simplest step to manage BLAS with MPI is to permit only one BLAS thread per MPI process by setting the environment variable OMP_NUM_THREADS=1. Try it and see if it helps.

If BLAS is the problem, then you’d also want to check you’ve installed an optimised implementation such as OpenBLAS (it might be available in serial, openmp or pthread variants).

1 Like