Dolfinx seems much slower than dolfin in solving nonlinear mechanics

I am currently migrating a significant amount of code to DOLFINx. Meanwhile, I have developed two minimal examples—one for DOLFINx and another for DOLFIN—to compare their performance in solving nonlinear mechanics problems.

Preliminary observations suggest that DOLFINx runs considerably slower than DOLFIN, even though both were installed using Spack.

Using dolfinx

spack:
  # add package specs to the `specs` list
  specs:
  - python
  - adios2+python
  - fenics-dolfinx+adios2+petsc  cflags=-O3 fflags=-O3
  - py-fenics-dolfinx cflags=-O3 fflags=-O3
  - py-pip
  - python +optimizations
  view: true
  concretizer:
    unify: true
  packages:
    gcc:
      externals:
      - spec: gcc@13.3.0 languages:='c,c++,fortran'
        prefix: /usr
        extra_attributes:
          compilers:
            c: /usr/bin/gcc
            cxx: /usr/bin/g++
            fortran: /usr/bin/gfortran
from dolfinx import log, default_scalar_type
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
import numpy as np
import ufl

from mpi4py import MPI
from dolfinx import fem, mesh



########################################################################
## 1. 定义网格与函数空间
########################################################################
L = 20.0
domain = mesh.create_box(MPI.COMM_WORLD, [[0.0, 0.0, 0.0], [L, 1, 1]], [
                         40, 10, 10], mesh.CellType.tetrahedron)
V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim, )))


########################################################################
## 2. 定义边界
########################################################################
def left(x):
    return np.isclose(x[0], 0)

def right(x):
    return np.isclose(x[0], L)

V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim, )))
fdim = domain.topology.dim - 1
left_facets = mesh.locate_entities_boundary(domain, fdim, left)
right_facets = mesh.locate_entities_boundary(domain, fdim, right)
marked_facets = np.hstack([left_facets, right_facets])
marked_values = np.hstack(
    [np.full_like(left_facets, 1), np.full_like(right_facets, 2)])
sorted_facets = np.argsort(marked_facets)
facet_tag = mesh.meshtags(
    domain, fdim, marked_facets[sorted_facets], marked_values[sorted_facets])
u_bc = np.array((0,) * domain.geometry.dim, dtype=default_scalar_type)
left_dofs = fem.locate_dofs_topological(V, facet_tag.dim, facet_tag.find(1))
bcs = [fem.dirichletbc(u_bc, left_dofs, V)]


########################################################################
## 3. 定义本构关系
########################################################################
B = fem.Constant(domain, default_scalar_type((0, 0, 0)))
T = fem.Constant(domain, default_scalar_type((0, 0, 0)))

v = ufl.TestFunction(V)
disp = fem.Function(V)

d = len(disp)
I = ufl.variable(ufl.Identity(d))
F = ufl.variable(I + ufl.grad(disp))
C = ufl.variable(F.T * F)
Ic = ufl.variable(ufl.tr(C))
J = ufl.variable(ufl.det(F))

E = default_scalar_type(1.0e4)
nu = default_scalar_type(0.3)
mu = fem.Constant(domain, E / (2 * (1 + nu)))
lmbda = fem.Constant(domain, E * nu / ((1 + nu) * (1 - 2 * nu)))
psi = (mu / 2) * (Ic - 3) - mu * ufl.ln(J) + (lmbda / 2) * (ufl.ln(J))**2
P = ufl.diff(psi, F)

########################################################################
## 4. 定义弱形式
########################################################################
metadata = {"quadrature_degree": 4}
ds = ufl.Measure('ds', domain=domain,
                 subdomain_data=facet_tag, metadata=metadata)
dx = ufl.Measure("dx", domain=domain, metadata=metadata)

F = ufl.inner(ufl.grad(v), P) * dx - ufl.inner(v, B) * \
    dx - ufl.inner(v, T) * ds(2)

########################################################################
## 5. 定义非线性求解器
########################################################################
problem = NonlinearProblem(F, disp, bcs)
solver = NewtonSolver(domain.comm, problem)
solver.atol = 1e-7
solver.rtol = 1e-6
solver.convergence_criterion = "incremental"

########################################################################
## 6. 求解
########################################################################
tval0 = -1.5
from dolfinx.geometry import bb_tree, compute_colliding_cells, compute_collisions_points
tree = bb_tree(domain, domain.geometry.dim)
x0 = np.array([L/2, 0.5, 0.5], dtype=default_scalar_type)
cell_candidates = compute_collisions_points(tree, x0)
cell = compute_colliding_cells(domain, cell_candidates, x0).array
first_cell = cell[0]
log.set_log_level(log.LogLevel.INFO)
for n in range(1, 3):
    T.value[2] = n * tval0
    num_its, converged = solver.solve(disp)
    assert (converged)
    disp.x.scatter_forward()
    print(disp.eval(x0, first_cell)[:3])
time python3 quick-start.py 
[2025-06-13 15:58:21.265] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:22.893] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:24.225] [info] Newton iteration 2: r (abs) = 12.565869115552855 (tol = 1e-07), r (rel) = 0.1002671312122483 (tol = 1e-06)
[2025-06-13 15:58:24.387] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:25.693] [info] Newton iteration 3: r (abs) = 0.8785434206693661 (tol = 1e-07), r (rel) = 0.00701018191625794 (tol = 1e-06)
[2025-06-13 15:58:25.854] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:27.160] [info] Newton iteration 4: r (abs) = 0.39482612960830427 (tol = 1e-07), r (rel) = 0.0031504453038159992 (tol = 1e-06)
[2025-06-13 15:58:27.322] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:28.645] [info] Newton iteration 5: r (abs) = 0.0036403735824912563 (tol = 1e-07), r (rel) = 2.904771745596795e-05 (tol = 1e-06)
[2025-06-13 15:58:28.807] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:30.146] [info] Newton iteration 6: r (abs) = 1.783858658903483e-05 (tol = 1e-07), r (rel) = 1.4233984818049848e-07 (tol = 1e-06)
[2025-06-13 15:58:30.146] [info] Newton solver finished in 6 iterations and 6 linear solver iterations.
[-0.07666793  0.01112278 -1.0976045 ]
[2025-06-13 15:58:30.325] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:31.831] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:33.183] [info] Newton iteration 2: r (abs) = 10.852971025363265 (tol = 1e-07), r (rel) = 0.09295613408995217 (tol = 1e-06)
[2025-06-13 15:58:33.344] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:34.667] [info] Newton iteration 3: r (abs) = 1.6749227835683338 (tol = 1e-07), r (rel) = 0.014345781122591971 (tol = 1e-06)
[2025-06-13 15:58:34.827] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:36.178] [info] Newton iteration 4: r (abs) = 8.261046727151854 (tol = 1e-07), r (rel) = 0.07075619804916829 (tol = 1e-06)
[2025-06-13 15:58:36.340] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:37.664] [info] Newton iteration 5: r (abs) = 0.34888363707070985 (tol = 1e-07), r (rel) = 0.002988202407759547 (tol = 1e-06)
[2025-06-13 15:58:37.825] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:39.147] [info] Newton iteration 6: r (abs) = 0.2895785528927952 (tol = 1e-07), r (rel) = 0.002480251972420242 (tol = 1e-06)
[2025-06-13 15:58:39.308] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:40.632] [info] Newton iteration 7: r (abs) = 0.0004160114978664066 (tol = 1e-07), r (rel) = 3.5631552400037086e-06 (tol = 1e-06)
[2025-06-13 15:58:40.792] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 15:58:42.115] [info] Newton iteration 8: r (abs) = 3.864113793945916e-07 (tol = 1e-07), r (rel) = 3.3096290327269865e-09 (tol = 1e-06)
[2025-06-13 15:58:42.115] [info] Newton solver finished in 8 iterations and 8 linear solver iterations.
[-0.27184167  0.02014685 -2.05597763]

real    0m21.484s
user    0m24.166s
sys     0m0.655s

Using dolfin

spack:
  specs:
  - kokkos-kernels@4.3.00 +cuda cuda_arch=89 %gcc@11.4.0
  - kokkos@4.3.00 ~openmp +cuda +cuda_lambda +cuda_constexpr +wrapper cuda_arch=89
    cxxstd=20
  # FEniCS
  - fenics@2019.1.0.post0 +zlib build_type=Release cflags="-O3"
  - petsc@3.14.0 +mumps +hdf5
  - boost@1.71.0
  - openmpi@4.0.6 +cuda
  - python@3.10.10 +optimizations
  - py-matplotlib
  - py-cython@0.29.36
  # NPUHeart
  - muparser # 2.3.4
  - fmt@10.1.1
  - nlohmann-json@3.11.2
  - py-pip
  - spdlog
  - pugixml
  view: true
  concretizer:
    unify: true

  packages:
    all:
      compiler: [gcc@11.4.0]
from fenics import *


########################################################################
## 1. 定义网格与函数空间
########################################################################
L = 20.0
domain = BoxMesh(Point(0.0, 0.0, 0.0), Point(L, 1.0, 1.0), 40, 10, 10)
V = VectorFunctionSpace(domain, "Lagrange", 1)

# ########################################################################
# ## 2. 定义边界
# ########################################################################
class Left(SubDomain):
    def inside(self, x, on_boundary):
        return x[0] < DOLFIN_EPS and on_boundary

class Right(SubDomain):
    def inside(self, x, on_boundary):
        return near(x[0], L) and on_boundary

boundaries = MeshFunction("size_t", domain, domain.topology().dim()-1)
boundaries.set_all(0)
Left().mark(boundaries, 1)
Right().mark(boundaries, 2)

noslip = Constant((0.0, 0.0, 0.0))
bc0 = DirichletBC(V, noslip, boundaries, 1)
bcs = [bc0]

########################################################################
## 3. 定义本构关系
########################################################################

v = TestFunction(V)
u = TrialFunction(V)
disp = Function(V, name="displacement")


F = variable(grad(disp) + Identity(len(disp)))
C = F.T * F
Ic = tr(C)
J = det(F)

E = 1.0e4
nu = 0.3
mu = Constant(E / (2 * (1 + nu)))
lmbda = Constant(E * nu / ((1 + nu) * (1 - 2 * nu)))
psi = (mu / 2) * (Ic - 3) - mu * ln(J) + (lmbda / 2) * (ln(J))**2
P = diff(psi, F)

########################################################################
## 4. 定义弱形式
########################################################################
ds = Measure('ds', domain=domain, subdomain_data=boundaries)
T = Expression(("0.0", "0.0", "data"), degree=1, domain=domain, data = 0.0)
H = inner(P, grad(v))*dx(degree=5) - inner(v, T)*ds(subdomain_id=2, degree=5)
J = derivative(H, disp, u)

########################################################################
## 5. 定义非线性求解器
########################################################################
solver_parameters={
    "newton_solver": {
        "linear_solver": "lu", 
        "absolute_tolerance": 1.0e-7, 
        "relative_tolerance": 1e-6
    }
}


########################################################################
## 6. 求解
########################################################################
tval0 = -1.5
for n in range(1, 3):
    T.data = n * tval0
    solve(H == 0, disp, J=J, bcs=bcs, solver_parameters=solver_parameters)
    print(disp(L/2, 0.5, 0.5))

time python3 quick_start.py 
Solving nonlinear variational problem.
  Newton iteration 0: r (abs) = 1.425e-01 (tol = 1.000e-07) r (rel) = 1.000e+00 (tol = 1.000e-06)
  Newton iteration 1: r (abs) = 6.930e+02 (tol = 1.000e-07) r (rel) = 4.863e+03 (tol = 1.000e-06)
  Newton iteration 2: r (abs) = 2.177e+01 (tol = 1.000e-07) r (rel) = 1.528e+02 (tol = 1.000e-06)
  Newton iteration 3: r (abs) = 4.303e-02 (tol = 1.000e-07) r (rel) = 3.019e-01 (tol = 1.000e-06)
  Newton iteration 4: r (abs) = 2.350e-02 (tol = 1.000e-07) r (rel) = 1.649e-01 (tol = 1.000e-06)
  Newton iteration 5: r (abs) = 8.959e-07 (tol = 1.000e-07) r (rel) = 6.286e-06 (tol = 1.000e-06)
  Newton iteration 6: r (abs) = 9.850e-11 (tol = 1.000e-07) r (rel) = 6.911e-10 (tol = 1.000e-06)
  Newton solver finished in 6 iterations and 6 linear solver iterations.
[-0.07666793  0.01112278 -1.0976045 ]
Solving nonlinear variational problem.
  Newton iteration 0: r (abs) = 1.425e-01 (tol = 1.000e-07) r (rel) = 1.000e+00 (tol = 1.000e-06)
  Newton iteration 1: r (abs) = 6.010e+02 (tol = 1.000e-07) r (rel) = 4.217e+03 (tol = 1.000e-06)
  Newton iteration 2: r (abs) = 1.618e+01 (tol = 1.000e-07) r (rel) = 1.135e+02 (tol = 1.000e-06)
  Newton iteration 3: r (abs) = 4.238e-01 (tol = 1.000e-07) r (rel) = 2.974e+00 (tol = 1.000e-06)
  Newton iteration 4: r (abs) = 4.155e+00 (tol = 1.000e-07) r (rel) = 2.915e+01 (tol = 1.000e-06)
  Newton iteration 5: r (abs) = 7.550e-03 (tol = 1.000e-07) r (rel) = 5.298e-02 (tol = 1.000e-06)
  Newton iteration 6: r (abs) = 4.902e-03 (tol = 1.000e-07) r (rel) = 3.440e-02 (tol = 1.000e-06)
  Newton iteration 7: r (abs) = 9.470e-09 (tol = 1.000e-07) r (rel) = 6.644e-08 (tol = 1.000e-06)
  Newton solver finished in 7 iterations and 7 linear solver iterations.
[-0.27184167  0.02014685 -2.05597763]

real    0m4.896s
user    0m3.837s
sys     0m1.053s

Modifying the options of the solver can make it much faster, but still slower than legacy dolfin.

# solver.convergence_criterion = "incremental"
ksp = solver.krylov_solver
opts = PETSc.Options()  # type: ignore
option_prefix = ksp.getOptionsPrefix()
opts[f"{option_prefix}ksp_type"] = "preonly"
opts[f"{option_prefix}pc_type"] = "lu"
opts[f"{option_prefix}pc_factor_mat_solver_type"] = "superlu_dist"
ksp.setFromOptions()
time python3 quick-start.py 
[2025-06-13 16:25:10.533] [info] Newton iteration 0: r (abs) = 0.14252192813739212 (tol = 1e-07), r (rel) = inf (tol = 1e-06)
[2025-06-13 16:25:10.692] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:11.136] [info] Newton iteration 1: r (abs) = 693.030452495085 (tol = 1e-07), r (rel) = 5.529913961969366 (tol = 1e-06)
[2025-06-13 16:25:11.298] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:11.660] [info] Newton iteration 2: r (abs) = 21.77428901424835 (tol = 1e-07), r (rel) = 0.17374408930854607 (tol = 1e-06)
[2025-06-13 16:25:11.830] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:12.219] [info] Newton iteration 3: r (abs) = 0.04302579173660394 (tol = 1e-07), r (rel) = 0.00034331669783402443 (tol = 1e-06)
[2025-06-13 16:25:12.382] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:12.745] [info] Newton iteration 4: r (abs) = 0.02349683992978127 (tol = 1e-07), r (rel) = 0.00018748887977729705 (tol = 1e-06)
[2025-06-13 16:25:12.906] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:13.260] [info] Newton iteration 5: r (abs) = 8.959412247456329e-07 (tol = 1e-07), r (rel) = 7.149004592781508e-09 (tol = 1e-06)
[2025-06-13 16:25:13.260] [info] Newton solver finished in 5 iterations and 5 linear solver iterations.
[-0.07666791  0.01112276 -1.09760436]
[2025-06-13 16:25:13.271] [info] Newton iteration 0: r (abs) = 0.14252193470066854 (tol = 1e-07), r (rel) = 0.001137228578846184 (tol = 1e-06)
[2025-06-13 16:25:13.431] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:13.787] [info] Newton iteration 1: r (abs) = 601.0374604496251 (tol = 1e-07), r (rel) = 5.147910045543624 (tol = 1e-06)
[2025-06-13 16:25:13.947] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:14.302] [info] Newton iteration 2: r (abs) = 16.182101246180874 (tol = 1e-07), r (rel) = 0.13860034863866943 (tol = 1e-06)
[2025-06-13 16:25:14.462] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:14.817] [info] Newton iteration 3: r (abs) = 0.4238223468828708 (tol = 1e-07), r (rel) = 0.0036300554634515476 (tol = 1e-06)
[2025-06-13 16:25:14.978] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:15.336] [info] Newton iteration 4: r (abs) = 4.15512332972571 (tol = 1e-07), r (rel) = 0.03558879859762159 (tol = 1e-06)
[2025-06-13 16:25:15.502] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:15.896] [info] Newton iteration 5: r (abs) = 0.0075500412066756995 (tol = 1e-07), r (rel) = 6.466640688758152e-05 (tol = 1e-06)
[2025-06-13 16:25:16.057] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:16.414] [info] Newton iteration 6: r (abs) = 0.0049019810507217616 (tol = 1e-07), r (rel) = 4.198566504523235e-05 (tol = 1e-06)
[2025-06-13 16:25:16.575] [info] PETSc Krylov solver starting to solve system.
[2025-06-13 16:25:16.936] [info] Newton iteration 7: r (abs) = 9.47133706298621e-09 (tol = 1e-07), r (rel) = 8.112238324513367e-11 (tol = 1e-06)
[2025-06-13 16:25:16.936] [info] Newton solver finished in 7 iterations and 7 linear solver iterations.
[-0.27184167  0.02014685 -2.05597763]

real    0m6.885s
user    0m9.189s
sys     0m0.853s

I can’t reproduce your timings.
Using "mumps" rather than superlu yields the following timings with dolfinx (v0.9.0, through docker):

real    0m5.314s
user    0m4.648s
sys     0m0.429s

and the following with legacy:

real    0m5.225s
user    0m4.309s
sys     0m0.483s

If you change to 5th order quadrature (same as you use in legacy) in DOLFINx, and use residual rather than incremental, I get the following timings in DOLFINx:

real    0m4.698s
user    0m4.106s
sys     0m0.353s

Full code is then:

from petsc4py import PETSc
from dolfinx import log, default_scalar_type
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
import numpy as np
import ufl

from mpi4py import MPI
from dolfinx import fem, mesh


########################################################################
## 1. 定义网格与函数空间
########################################################################
L = 20.0
domain = mesh.create_box(
    MPI.COMM_WORLD,
    [[0.0, 0.0, 0.0], [L, 1, 1]],
    [40, 10, 10],
    mesh.CellType.tetrahedron,
)
V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim,)))


########################################################################
## 2. 定义边界
########################################################################
def left(x):
    return np.isclose(x[0], 0)


def right(x):
    return np.isclose(x[0], L)


V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim,)))
fdim = domain.topology.dim - 1
left_facets = mesh.locate_entities_boundary(domain, fdim, left)
right_facets = mesh.locate_entities_boundary(domain, fdim, right)
marked_facets = np.hstack([left_facets, right_facets])
marked_values = np.hstack([np.full_like(left_facets, 1), np.full_like(right_facets, 2)])
sorted_facets = np.argsort(marked_facets)
facet_tag = mesh.meshtags(
    domain, fdim, marked_facets[sorted_facets], marked_values[sorted_facets]
)
u_bc = np.array((0,) * domain.geometry.dim, dtype=default_scalar_type)
left_dofs = fem.locate_dofs_topological(V, facet_tag.dim, facet_tag.find(1))
bcs = [fem.dirichletbc(u_bc, left_dofs, V)]


########################################################################
## 3. 定义本构关系
########################################################################
B = fem.Constant(domain, default_scalar_type((0, 0, 0)))
T = fem.Constant(domain, default_scalar_type((0, 0, 0)))

v = ufl.TestFunction(V)
disp = fem.Function(V)

d = len(disp)
I = ufl.variable(ufl.Identity(d))
F = ufl.variable(I + ufl.grad(disp))
C = ufl.variable(F.T * F)
Ic = ufl.variable(ufl.tr(C))
J = ufl.variable(ufl.det(F))

E = default_scalar_type(1.0e4)
nu = default_scalar_type(0.3)
mu = fem.Constant(domain, E / (2 * (1 + nu)))
lmbda = fem.Constant(domain, E * nu / ((1 + nu) * (1 - 2 * nu)))
psi = (mu / 2) * (Ic - 3) - mu * ufl.ln(J) + (lmbda / 2) * (ufl.ln(J)) ** 2
P = ufl.diff(psi, F)

########################################################################
## 4. 定义弱形式
########################################################################
metadata = {"quadrature_degree": 5}
ds = ufl.Measure("ds", domain=domain, subdomain_data=facet_tag, metadata=metadata)
dx = ufl.Measure("dx", domain=domain, metadata=metadata)

F = ufl.inner(ufl.grad(v), P) * dx - ufl.inner(v, B) * dx - ufl.inner(v, T) * ds(2)

########################################################################
## 5. 定义非线性求解器
########################################################################
problem = NonlinearProblem(F, disp, bcs)
solver = NewtonSolver(domain.comm, problem)
solver.atol = 1e-7
solver.rtol = 1e-6
solver.convergence_criterion = "residual"
opts = PETSc.Options()  # type: ignore
ksp = solver.krylov_solver
option_prefix = ksp.getOptionsPrefix()
opts[f"{option_prefix}ksp_type"] = "preonly"
opts[f"{option_prefix}pc_type"] = "lu"
opts[f"{option_prefix}pc_factor_mat_solver_type"] = "mumps"
ksp.setFromOptions()

########################################################################
## 6. 求解
########################################################################
tval0 = -1.5
from dolfinx.geometry import bb_tree, compute_colliding_cells, compute_collisions_points

tree = bb_tree(domain, domain.geometry.dim)
x0 = np.array([L / 2, 0.5, 0.5], dtype=default_scalar_type)
cell_candidates = compute_collisions_points(tree, x0)
cell = compute_colliding_cells(domain, cell_candidates, x0).array
first_cell = cell[0]
log.set_log_level(log.LogLevel.INFO)
for n in range(1, 3):
    T.value[2] = n * tval0
    num_its, converged = solver.solve(disp)
    assert converged
    disp.x.scatter_forward()
    print(disp.eval(x0, first_cell)[:3])

Thank you very much! I think I should install mumps as backend.

By the way, I have another question about computational efficiency. How does the docker version of DOLFINx utilize so many resources when running in parallel?

When I run simulations using 1 core, 2 cores, and 4 cores in parallel, each process consumes much more than 100% CPU. The fact is that it runs much faster!

image
image

However, when using the Spack-installed version, each process is limited to 100% CPU usage per processor.

Why is there such a difference in resource utilization between the two versions?

To compare the performance, I reinstall the FEniCSx as the Dockerfile in the repository. I found recent few commits will result in compatibility problems.

solver = NewtonSolver(domain.comm, problem)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/home/kokkos_4/software/dolfinx-real/lib/python3.12/dist-packages/dolfinx/nls/petsc.py", line 57, in __init__
    self._A = create_matrix(problem.a)
                            ^^^^^^^^^
AttributeError: 'NonlinearProblem' object has no attribute 'a'. Did you mean: 'A'?
>>> solver.atol = 1e-7
Exception ignored in: <function NewtonSolver.__del__ at 0x79ab025396c0>
Traceback (most recent call last):
  File "/home/kokkos_4/software/dolfinx-real/lib/python3.12/dist-packages/dolfinx/nls/petsc.py", line 64, in __del__
    self._A.destroy()

# sudo apt-get update
# sudo apt-get dist-upgrade -y
# sudo apt-get install \
#     cmake \
#     g++ \
#     gfortran \
#     libboost-dev \
#     liblapack-dev \
#     libopenblas-dev \
#     libpugixml-dev \
#     ninja-build \
#     pkg-config \
#     python3-dev \
#     python3-pip \
#     python3-venv \
#     catch2 \
#     git \
#     graphviz \
#     libeigen3-dev \
#     valgrind \
#     wget \
#     libglu1 \
#     libxcursor-dev \
#     libxft2 \
#     libxinerama1 \
#     libfltk1.3-dev \
#     libfreetype6-dev  \
#     libgl1-mesa-dev \
#     libocct-foundation-dev \
#     libocct-data-exchange-dev \
#     bison flex

# su kokkos_5
export https_proxy=http://127.0.0.1:7890 http_proxy=http://127.0.0.1:7890 all_proxy=socks5://127.0.0.1:7890
export PYVISTA_VERSION=0.45.2
export PYTHON_VERSION=3.12
export ADIOS2_VERSION=2.10.2
export DOXYGEN_VERSION=1_13_2
export GMSH_VERSION=4_13_1
export HDF5_VERSION=1.14.6
export KAHIP_VERSION=3.18
export NUMPY_VERSION=2.1.3
export PETSC_VERSION=3.23.2
export SLEPC_VERSION=3.23.1
export SPDLOG_VERSION=1.15.1
export PETSC_SLEPC_OPTFLAGS="-O3 -march=native -mtune=native"
export PETSC_SLEPC_DEBUGGING="no"
export BUILD_NP=8
export OPENBLAS_NUM_THREADS=1
export OPENBLAS_VERBOSE=0
export VIRTUAL_ENV=$HOME/dolfinx-env
export PATH=$HOME/dolfinx-env/bin:$PATH

export INSTALL_DIR=$HOME/software
export PATH=$INSTALL_DIR/bin:$PATH
export LD_LIBRARY_PATH=$INSTALL_DIR/lib:$LD_LIBRARY_PATH
export MANPATH=$INSTALL_DIR/share/man:$MANPATH
export PETSC_DIR=$INSTALL_DIR/petsc
export SLEPC_DIR=$INSTALL_DIR/slepc
export PYTHONPATH=$INSTALL_DIR/lib:$PYTHONPATH
export CMAKE_PREFIX_PATH=$INSTALL_DIR/cmake:$CMAKE_PREFIX_PATH
export CMAKE_PREFIX_PATH=$INSTALL_DIR/lib/cmake:$CMAKE_PREFIX_PATH
export PETSC_ARCH=linux-gnu-real32-32:linux-gnu-complex64-32:linux-gnu-real64-32:linux-gnu-complex128-32:linux-gnu-real64-64:linux-gnu-complex128-64
export DOLFINX_CMAKE_BUILD_TYPE="Release"
export KAHIP_DIR=$HOME/software

# Real by default.
export PKG_CONFIG_PATH=$HOME/software/dolfinx-real/lib/pkgconfig:$PKG_CONFIG_PATH
export CMAKE_PREFIX_PATH=$HOME/software/dolfinx-real/lib/cmake:$CMAKE_PREFIX_PATH
export PETSC_ARCH=linux-gnu-real64-32
export PYTHONPATH=$HOME/software/dolfinx-real/lib/python${PYTHON_VERSION}/dist-packages:$PYTHONPATH
export LD_LIBRARY_PATH=$HOME/software/dolfinx-real/lib:$LD_LIBRARY_PATH

mkdir -p ~/tmp &&cd ~/tmp
wget -nc https://github.com/gabime/spdlog/archive/refs/tags/v${SPDLOG_VERSION}.tar.gz
wget -nc https://github.com/kahip/kahip/archive/v${KAHIP_VERSION}.tar.gz
wget -nc https://github.com/doxygen/doxygen/archive/refs/tags/Release_${DOXYGEN_VERSION}.tar.gz
wget https://github.com/HDFGroup/hdf5/archive/refs/tags/hdf5_${HDF5_VERSION}.tar.gz 
wget -nc https://github.com/ornladios/ADIOS2/archive/v${ADIOS2_VERSION}.tar.gz -O adios2-v${ADIOS2_VERSION}.tar.gz
git clone -b gmsh_${GMSH_VERSION} --single-branch --depth 1 https://gitlab.onelab.info/gmsh/gmsh.git
git clone --depth=1 -b v${PETSC_VERSION} https://gitlab.com/petsc/petsc.git ${PETSC_DIR}
git clone --depth=1 -b v${SLEPC_VERSION} https://gitlab.com/slepc/slepc.git ${SLEPC_DIR}
git clone https://github.com/FEniCS/dolfinx.git
git clone https://github.com/FEniCS/basix.git
git clone https://github.com/FEniCS/ufl.git
git clone https://github.com/FEniCS/ffcx.git

cd ~/tmp
tar xfz v${SPDLOG_VERSION}.tar.gz && \
    cd spdlog-${SPDLOG_VERSION} && \
    cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DSPDLOG_BUILD_SHARED=ON -DSPDLOG_BUILD_PIC=ON -DCMAKE_INSTALL_PREFIX=$HOME/software -B build-dir . && \
    cmake --build build-dir && \
    cmake --install build-dir
    rm -rf build-dir

cd ~/tmp
tar xfz Release_${DOXYGEN_VERSION}.tar.gz && \
    cd doxygen-Release_${DOXYGEN_VERSION} && \
    cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$HOME/software -B build-dir . && \
    cmake --build build-dir && \
    cmake --install build-dir && \
    rm -rf build-dir

cd ~/tmp
python3 -m venv ${VIRTUAL_ENV}
pip install --no-cache-dir --upgrade pip setuptools wheel && \
pip install --no-cache-dir cython numpy==${NUMPY_VERSION} && \
pip install --no-cache-dir mpi4py

cd ~/tmp
tar -xf v${KAHIP_VERSION}.tar.gz
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DNONATIVEOPTIMIZATIONS=on -DCMAKE_INSTALL_PREFIX=$HOME/software -B build-dir -S KaHIP-${KAHIP_VERSION}
cmake --build build-dir
cmake --install build-dir
rm -r build-dir

tar xfz hdf5_${HDF5_VERSION}.tar.gz
cmake -G Ninja -DCMAKE_INSTALL_PREFIX=$HOME/software -DCMAKE_BUILD_TYPE=Release -DHDF5_ENABLE_PARALLEL=on -DHDF5_ENABLE_Z_LIB_SUPPORT=on -B build-dir -S hdf5-hdf5_${HDF5_VERSION}
cmake --build build-dir -j${BUILD_NP}
cmake --install build-dir
rm -r build-dir

cd ~/tmp
mkdir -p adios2-v${ADIOS2_VERSION} && tar -xf adios2-v${ADIOS2_VERSION}.tar.gz -C adios2-v${ADIOS2_VERSION} --strip-components 1 
cmake -G Ninja -DADIOS2_USE_HDF5=on -DCMAKE_INSTALL_PREFIX=$HOME/software -DCMAKE_INSTALL_PYTHONDIR=$HOME/software/lib/ -DADIOS2_USE_Fortran=off -DBUILD_TESTING=off -DADIOS2_BUILD_EXAMPLES=off -DADIOS2_USE_ZeroMQ=off -B build-dir-adios -S ./adios2-v${ADIOS2_VERSION}
cmake --build build-dir-adios -j${BUILD_NP} && cmake --install build-dir-adios
rm -r build-dir-adios

cd ~/tmp
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -DENABLE_BUILD_DYNAMIC=1 -DCMAKE_INSTALL_PREFIX=$HOME/software -DCMAKE_INSTALL_PYTHONDIR=$HOME/software/lib/ -DENABLE_OPENMP=1 -B build-dir -S gmsh
cmake --build build-dir -j32 && cmake --install build-dir
rm -r build-dir

cd ${PETSC_DIR}
./configure \
    PETSC_ARCH=linux-gnu-real64-32 \
    --COPTFLAGS="${PETSC_SLEPC_OPTFLAGS}" \
    --CXXOPTFLAGS="${PETSC_SLEPC_OPTFLAGS}" \
    --FOPTFLAGS="${PETSC_SLEPC_OPTFLAGS}" \
    --with-64-bit-indices=no \
    --with-debugging=${PETSC_SLEPC_DEBUGGING} \
    --with-fortran-bindings=no \
    --with-shared-libraries \
    --download-hypre \
    --download-metis \
    --download-mumps-avoid-mpi-in-place \
    --download-mumps \
    --download-ptscotch \
    --download-scalapack \
    --download-spai \
    --download-suitesparse \
    --download-superlu \
    --download-superlu_dist \
    --with-scalar-type=real \
    --with-precision=double

make PETSC_ARCH=linux-gnu-real64-32 ${MAKEFLAGS} all -j${BUILD_NP}
make PETSC_DIR=/home/pengfei/software/petsc PETSC_ARCH=linux-gnu-real64-32 check 
cd src/binding/petsc4py   &&  pip -v install --no-cache-dir --no-build-isolation . 

cd ${SLEPC_DIR}
export PETSC_ARCH=linux-gnu-real64-32 && ./configure && make -j${BUILD_NP}
cd src/binding/slepc4py && pip -v install --no-cache-dir --no-build-isolation . 

cd ~/tmp
pip install --no-cache-dir -r dolfinx/python/build-requirements.txt
pip install --no-cache-dir pyamg pytest scipy matplotlib numba

cd ~/tmp/basix && cmake -G Ninja -DCMAKE_BUILD_TYPE=${DOLFINX_CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=$HOME/software -B build-dir -S ./cpp
cmake --build build-dir -j${BUILD_NP} && cmake --install build-dir

pip install ./python && \
    cd ../ufl && pip install --no-cache-dir . && \
    cd ../ffcx && pip install --no-cache-dir . && \
    cd ../ && pip install --no-cache-dir ipython

cd ~/tmp/dolfinx && mkdir -p build-real && cd build-real
PETSC_ARCH=linux-gnu-real64-32 cmake -G Ninja -DCMAKE_INSTALL_PREFIX=$HOME/software/dolfinx-real -DCMAKE_BUILD_TYPE=${DOLFINX_CMAKE_BUILD_TYPE} -DDOLFINX_ENABLE_PETSC=ON -DDOLFINX_ENABLE_SLEPC=ON -DDOLFINX_ENABLE_SCOTCH=ON -DDOLFINX_ENABLE_KAHIP=ON -DDOLFINX_ENABLE_ADIOS2=ON ../cpp 
ninja install -j ${BUILD_NP} && cd ../python
PETSC_ARCH=linux-gnu-real64-32 pip -v install --config-settings=cmake.build-type="${DOLFINX_CMAKE_BUILD_TYPE}" --config-settings=install.strip=false --no-build-isolation --check-build-dependencies --target $HOME/software/dolfinx-real/lib/python${PYTHON_VERSION}/dist-packages --no-dependencies --no-cache-dir '.'

@shaoyaoqian The previous implementation of NonlinearProblem has been deprecated. The new one relies on the SNES interface. See the commit for more. You can either come back to the deprecated implementation wrapped by NewtonSolverNonlinearProblem for now or adapt your code according to the current implementation of NonlinearProblem.

1 Like

This is an updated version of the script:

from mpi4py import MPI

from dolfinx import log, default_scalar_type
from dolfinx.fem.petsc import NonlinearProblem
import numpy as np
import ufl

from dolfinx import fem, mesh


########################################################################
## 1. 定义网格与函数空间
########################################################################
L = 20.0
domain = mesh.create_box(
    MPI.COMM_WORLD,
    [[0.0, 0.0, 0.0], [L, 1, 1]],
    [40, 10, 10],
    mesh.CellType.tetrahedron,
)
V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim,)))


########################################################################
## 2. 定义边界
########################################################################
def left(x):
    return np.isclose(x[0], 0)


def right(x):
    return np.isclose(x[0], L)


V = fem.functionspace(domain, ("Lagrange", 1, (domain.geometry.dim,)))
fdim = domain.topology.dim - 1
left_facets = mesh.locate_entities_boundary(domain, fdim, left)
right_facets = mesh.locate_entities_boundary(domain, fdim, right)
marked_facets = np.hstack([left_facets, right_facets])
marked_values = np.hstack([np.full_like(left_facets, 1), np.full_like(right_facets, 2)])
sorted_facets = np.argsort(marked_facets)
facet_tag = mesh.meshtags(
    domain, fdim, marked_facets[sorted_facets], marked_values[sorted_facets]
)
u_bc = np.array((0,) * domain.geometry.dim, dtype=default_scalar_type)
left_dofs = fem.locate_dofs_topological(V, facet_tag.dim, facet_tag.find(1))
bcs = [fem.dirichletbc(u_bc, left_dofs, V)]


########################################################################
## 3. 定义本构关系
########################################################################
B = fem.Constant(domain, default_scalar_type((0, 0, 0)))
T = fem.Constant(domain, default_scalar_type((0, 0, 0)))

v = ufl.TestFunction(V)
disp = fem.Function(V)

d = len(disp)
I = ufl.variable(ufl.Identity(d))
F = ufl.variable(I + ufl.grad(disp))
C = ufl.variable(F.T * F)
Ic = ufl.variable(ufl.tr(C))
J = ufl.variable(ufl.det(F))

E = default_scalar_type(1.0e4)
nu = default_scalar_type(0.3)
mu = fem.Constant(domain, E / (2 * (1 + nu)))
lmbda = fem.Constant(domain, E * nu / ((1 + nu) * (1 - 2 * nu)))
psi = (mu / 2) * (Ic - 3) - mu * ufl.ln(J) + (lmbda / 2) * (ufl.ln(J)) ** 2
P = ufl.diff(psi, F)

########################################################################
## 4. 定义弱形式
########################################################################
metadata = {"quadrature_degree": 5}
ds = ufl.Measure("ds", domain=domain, subdomain_data=facet_tag, metadata=metadata)
dx = ufl.Measure("dx", domain=domain, metadata=metadata)

cffi_options = ["-Ofast", "-march=native"]
jit_options = {
    "cffi_extra_compile_args": cffi_options,
    "cffi_libraries": ["m"],
}
F = ufl.inner(ufl.grad(v), P) * dx - ufl.inner(v, B) * dx - ufl.inner(v, T) * ds(2)

########################################################################
## 5. 定义非线性求解器
########################################################################

petsc_options = {
    "ksp_type": "preonly",
    "pc_type": "lu",
    "pc_factor_mat_solver_type": "mumps",
    "snes_type": "newtonls",
    "snes_linesearch_type": "none",
    "snes_atol": 1e-7,
    "snes_rtol": 1e-6,
    # "snes_error_if_not_converged": True,
    # "ksp_error_if_not_converged": True,
    "snes_monitor": None,
}
problem = NonlinearProblem(
    F, disp, bcs, petsc_options=petsc_options, jit_options=jit_options
)

########################################################################
## 6. 求解
########################################################################
tval0 = -1.5
from dolfinx.geometry import bb_tree, compute_colliding_cells, compute_collisions_points

tree = bb_tree(domain, domain.geometry.dim)
x0 = np.array([L / 2, 0.5, 0.5], dtype=default_scalar_type)
cell_candidates = compute_collisions_points(tree, x0)
cell = compute_colliding_cells(domain, cell_candidates, x0).array
first_cell = cell[0]
log.set_log_level(log.LogLevel.INFO)
for n in range(1, 3):
    T.value[2] = n * tval0
    _, converged_reason, num_iterations = problem.solve()
    print(f"Step {n}: {converged_reason=} {num_iterations=}")
    print(disp.eval(x0, first_cell)[:3])
#problem._snes.view()