Hi,
I was testing timings for assembling a hyperelastic form (in serial) and noticed that the timings for hex and tet elements (simple first order trilinear and linear elements respectively) were significantly different. While the number of degrees of freedom are the same (in this case) the assembly timings aren’t. Is such a big difference expected?
Note that I’m yet to try in parallel, but I was a bit thrown off with this big a difference. Anything I missed below that is causing such a huge difference in the run-times?
This is on dolfinx:nightly
image from docker with commit-hash: 9d4002772ed9650a6720cf092948a2fd5f2f20cb
Timings
# Assembly Runtimes (unit-cube)
| DOF (tet) | DOF (hex) | tet | hex |
| --------- | ------- | -------------------------------- | ------------------------------- |
| 81 | 81 | 0.00061 | 0.01417 |
| 375 | 375 | 0.00222 | 0.10319 |
| 2187 | 2187 | 0.01302 | 0.85903 |
| 14739 | 14739 | 0.11389 | 7.12959 |
Testing script
from timeit import timeit
import matplotlib.pyplot as plt
import numpy as np
from mpi4py import MPI
from dolfinx.mesh import create_unit_cube, CellType
from ufl import (
Measure,
grad,
tr,
Identity,
det,
TrialFunction,
TestFunction,
ln,
derivative,
)
from basix.ufl import element
from dolfinx.log import LogLevel, set_output_file, set_log_level
from dolfinx import default_scalar_type
from dolfinx import git_commit_hash
from dolfinx.fem import (
form,
petsc,
Function,
functionspace,
)
import os
set_log_level(LogLevel.WARNING)
print(f"hash: {git_commit_hash}")
results_dir = os.path.join(os.getcwd(), "timingsDolfinxAssembly")
os.makedirs(results_dir, exist_ok=True)
set_output_file(os.path.join(results_dir, "logTimingsHex.txt"))
def load_mesh(n, cell_type):
mesh_dolfinx = create_unit_cube(MPI.COMM_WORLD, n, n, n, cell_type=cell_type)
return mesh_dolfinx
def pre_hyperelastic_dolfinx(mesh, **kwargs):
quadrature_degree = 1
ord = 1
elemu = element("P", mesh.basix_cell(), ord, shape=(3,))
V = functionspace(
mesh, elemu, form_compiler_options={"quadrature_degree": quadrature_degree}
)
u = Function(V, name="u")
v = TestFunction(V)
du = TrialFunction(V)
F = Identity(len(u)) + grad(u)
C = F.T * F
Ic = tr(C)
J = det(F)
dx = Measure("dx", domain=mesh)
mu, lmbda = default_scalar_type(1.0), default_scalar_type(2.0)
psi = 1 / 2 * (mu * (Ic - 3)) - mu * ln(J) + lmbda / 2 * ln(J) ** 2
stress = derivative(psi, u, v) * dx
jacobian = derivative(stress, u, du)
return jacobian
def assemble_hyper_dolfinx(jacobian_form):
A = petsc.create_matrix(jacobian_form)
A.zeroEntries()
petsc.assemble_matrix(A, jacobian_form)
A.assemble()
# return A
cache_dir = results_dir
print("# Assembly Runtimes")
print("")
print(
"| DOF (tet) | DOF (hex) | tet | hex |",
)
print(
"| --------- | ------- | -------------------------------- | ------------------------------- |",
)
points_per_axis = np.array([2, 4, 8, 16])
number = 3
runtimes = np.zeros((len(points_per_axis), 3))
for i, n in enumerate(points_per_axis):
mesh_hex = load_mesh(n, cell_type=CellType.hexahedron)
mesh_tet = load_mesh(n, cell_type=CellType.tetrahedron)
jacobian_hex = pre_hyperelastic_dolfinx(mesh_hex)
jacobian_tet = pre_hyperelastic_dolfinx(mesh_tet)
jacobian_form_tet = form(
jacobian_tet,
jit_options={
"cffi_extra_compile_args": ["-Ofast"],
"cache_dir": cache_dir,
"cffi_libraries": ["m"],
},
)
jacobian_form_hex = form(
jacobian_hex,
jit_options={
"cffi_extra_compile_args": ["-Ofast"],
"cache_dir": cache_dir,
"cffi_libraries": ["m"],
},
)
time_hyperelastic_dolfinx_tet = (
timeit(lambda: assemble_hyper_dolfinx(jacobian_form_tet), number=number)
/ number
)
time_hyperelastic_dolfinx_hex = (
timeit(lambda: assemble_hyper_dolfinx(jacobian_form_hex), number=number)
/ number
)
runtimes[i] = (
time_hyperelastic_dolfinx_tet,
time_hyperelastic_dolfinx_hex,
mesh_tet.geometry.x.shape[0],
)
print(
f"|{mesh_tet.geometry.x.shape[0] * 3:7d} | {mesh_hex.geometry.x.shape[0] * 3:7d} | {runtimes[i][0]:19.5f} | {runtimes[i][1]:19.5f} |",
)
fig, ax = plt.subplots()
ax.plot(runtimes[:, -1] * 3.0, runtimes[:, 1] / runtimes[:, 0])
ax.set_xlabel("DOF")
ax.set_ylabel("Speedup")
ax.set_title("Speedup Hex/Tet")
ax.grid()
fig.tight_layout()
fig.savefig(os.path.join(results_dir, "speedupHexTet.png"))
plt.close()
# list_timings(MPI.COMM_WORLD, [TimingType.wall])