Hello,
I monitored a significant slowdown in FFCx compilation speed using latest nightly dolfinx vs. docker image from Oct. 2nd. Consider this nonlinear elasticity MWE:
#!/usr/bin/env python3
import time, sys
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np
from dolfinx import fem, mesh
import ufl
start = time.time()
comm = MPI.COMM_WORLD
msh = mesh.create_box(comm, [np.array([0.0, 0.0, 0.0]),np.array([2.0, 1.0, 1.0])], [5, 5, 5],
mesh.CellType.tetrahedron, mesh.GhostMode.none)
dim = msh.geometry.dim
P = ufl.VectorElement("CG", msh.ufl_cell(), 1)
V = fem.FunctionSpace(msh, P)
u = fem.Function(V)
du = ufl.TrialFunction(V)
var_u = ufl.TestFunction(V)
F = ufl.Identity(dim) + ufl.grad(u)
C = ufl.variable(F.T*F)
Ic = ufl.tr(C)
IIc = 0.5*(ufl.tr(C)**2. - ufl.tr(C*C))
IIIc = ufl.det(C)
Ic_bar = IIIc**(-1./3.) * Ic
IIc_bar = IIIc**(-2./3.) * IIc
# dev Mooney-Rivlin + vol Ogden
Psi = 1. * (Ic_bar - 3.) + 1. * (IIc_bar - 3.) + (1./4.) * (-2.*ufl.ln(ufl.sqrt(IIIc)) + ufl.sqrt(IIIc)**(2.) - 1.)
S = 2.*ufl.diff(Psi,C)
# inner virtual work
var_C = ufl.grad(var_u).T * F + F.T * ufl.grad(var_u)
dW_int = ufl.inner(S, 0.5*var_C)*ufl.dx
# jacobian - either via ufl's derivative or directly written as form
#jac = ufl.derivative(dW_int, u, du)
Cmat = 2.*ufl.diff(S,C)
i, j, k, l, m, n = ufl.indices(6)
Ctang = ufl.as_tensor(Cmat[i,j,k,l]*ufl.derivative(C, u, du)[k,l], (i,j))
jac = (ufl.inner(0.5*Ctang,0.5*var_C) + ufl.inner(S,ufl.derivative(0.5*var_C, u, du)))*ufl.dx
# assemble
r = fem.petsc.assemble_vector(fem.form(dW_int))
r.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
K = fem.petsc.assemble_matrix(fem.form(jac), [])
K.assemble()
if comm.rank == 0: # only proc 0 should print this
print('Time needed: %.4f s (= %.2f min)' % ( time.time()-start, (time.time()-start)/60. ))
sys.stdout.flush()
This takes 2.2 seconds with Oct. 2nd docker image and 21.0 seconds using latest (1 core).
Is there anything specific to set to speed up FFCx?
(Sorry that it’s a bit lengthy but I wanted to create sufficiently complex forms so that reasonable timings can be measured…)
Thanks!
Best,
Marc