Hello,
I am transitioning to dolfinx from legacy-dolfin but I cannot reach the same performance when solving nonlinear time-dependet problem.
Consider this example in legacy dolfin:
from fenics import *
import ufl_legacy as ufl
from dolfin import __version__ as fenics_version
import os
import time
mpi_rank = MPI.rank(MPI.comm_world)
set_log_active(False)
if mpi_rank == 0:
print(f"FeniCS version: {fenics_version}")
domain = Mesh()
with XDMFFile("meshes/simple_system/mesh.xdmf") as infile:
infile.read(domain)
mvc = MeshValueCollection("size_t", domain, 3)
with XDMFFile("meshes/simple_system/mesh_subdomains.xdmf") as infile:
infile.read(mvc)
subdomains = cpp.mesh.MeshFunctionSizet(domain, mvc)
dx = Measure("dx", domain=domain, subdomain_data=subdomains)
t = 0
t_max = 2*3600
dt = 2
theta = 0.5
T0 = 20.0
Qc = 300 # W
alpha = 5 #W/m2K
T_amb = 20
initial_condition = Constant(20.0)
V = FunctionSpace(domain, "CG", 1)
T = Function(V)
T.interpolate(initial_condition)
T_n = Function(V)
T_n.interpolate(initial_condition)
T_v = TestFunction(V)
Vc = assemble(Constant(1)*dx)
q = Qc/Vc
T_v = TestFunction(V)
class Steel04:
def __init__(self):
pass
def k(self, T):
return 48.11-0.006*T-25.5e-6*T**2
def rho(self, T):
return 7850
def cp(self, T):
return 450
steel = Steel04()
F = steel.rho(T)*steel.cp(T)*T*T_v*dx - steel.rho(T_n)*steel.cp(T_n)*T_n*T_v*dx
F += theta*dt*inner(steel.k(T)*grad(T), grad(T_v))*dx + (1-theta)*dt*inner(steel.k(T_n)*grad(T_n), grad(T_v))*dx
F += theta*dt*((alpha*T*T_v) - (alpha*T_amb*T_v))*ds + (1-theta)*dt*((alpha*T_n*T_v) - (alpha*T_amb*T_v))*ds
F += -theta*dt*q*T_v*dx(3) - (1-theta)*dt*q*T_v*dx(3)
J = derivative(F, T)
problem = NonlinearVariationalProblem(F, T, [], J)
solver = NonlinearVariationalSolver(problem)
prm = solver.parameters
prm['newton_solver']['absolute_tolerance'] = 1e-6
prm['newton_solver']['relative_tolerance'] = 1e-6
prm['newton_solver']['linear_solver'] = 'gmres'
prm['newton_solver']['preconditioner'] = 'hypre_amg'
prm['newton_solver']['krylov_solver']['monitor_convergence'] = False
num_steps = int(t_max/dt)
for i in range(num_steps):
t_start = time.time()
t += dt
solver.solve()
T_n.assign(T)
if mpi_rank == 0:
print(f"t={t}s, iter time: {time.time() - t_start}s")
This returns the following output:
FeniCS version: 2019.2.0.dev0
t=2s, iter time: 3.6996588706970215s
t=4s, iter time: 1.8177123069763184s
t=6s, iter time: 1.8490257263183594s
t=8s, iter time: 1.8499982357025146s
t=10s, iter time: 1.7931668758392334s
t=12s, iter time: 1.7902030944824219s
t=14s, iter time: 1.809546947479248s
t=16s, iter time: 1.8374125957489014s
I reimplemented the same using dolfinx:
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx import mesh, io, plot, fem, nls
import dolfinx
import numpy as np
import ufl
import time
if MPI.COMM_WORLD.rank == 0:
print(f"Dolfinx version: {dolfinx.__version__}")
domain, cell_tags, facet_tags = io.gmshio.read_from_msh("meshes/simple_system/mesh.msh", MPI.COMM_WORLD, 0, gdim=3)
dx = ufl.Measure("dx", domain=domain, subdomain_data=cell_tags)
ds = ufl.Measure("ds", domain=domain, subdomain_data=facet_tags)
t = 0
t_max = 2*3600
dt = 2
theta = 0.5
T0 = 20.0
Qc = 300 #W
alpha = 5 #W/m2K
T_amb = 20
initial_condition = lambda x: np.full((x.shape[1],), 20.0)
V = fem.FunctionSpace(domain, ("CG", 1))
T_n = fem.Function(V)
T_n.name = "T_n"
T_n.interpolate(initial_condition)
T = fem.Function(V)
T.name = "T"
T.interpolate(initial_condition)
Vc = fem.assemble_scalar(fem.form(1*dx(3)))
Vc = domain.comm.allreduce((Vc), op=MPI.SUM)
q = Qc/Vc
T_v = ufl.TestFunction(V)
class Steel04:
def __init__(self):
pass
def k(self, T):
return 48.11-0.006*T-25.5e-6*T**2
def rho(self, T):
return 7850
def cp(self, T):
return 450
steel = Steel04()
F = steel.rho(T)*steel.cp(T)*T*T_v*dx - steel.rho(T_n)*steel.cp(T_n)*T_n*T_v*dx
F += theta*dt*ufl.dot(steel.k(T)*ufl.grad(T), ufl.grad(T_v))*dx + (1-theta)*dt*ufl.dot(steel.k(T_n)*ufl.grad(T_n), ufl.grad(T_v))*dx
F += theta*dt*((alpha*T*T_v) - (alpha*T_amb*T_v))*ds + (1-theta)*dt*((alpha*T_n*T_v) - (alpha*T_amb*T_v))*ds
F += -theta*dt*q*T_v*dx(3) - (1-theta)*dt*q*T_v*dx(3)
problem = fem.petsc.NonlinearProblem(F, T)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)
solver.convergence_criterion = "incremental"
solver.atol = 1e-6
solver.rtol = 1e-6
ksp = solver.krylov_solver
opts = PETSc.Options()
option_prefix = ksp.getOptionsPrefix()
opts[f"{option_prefix}ksp_type"] = "gmres"
opts[f"{option_prefix}pc_type"] = "gamg"
ksp.setFromOptions()
num_steps = int(t_max/dt)
for i in range(num_steps):
t_start = time.time()
t += dt
r = solver.solve(T)
T_n.x.array[:] = T.x.array
if MPI.COMM_WORLD.rank == 0:
print(f"t={t}s, iter time: {time.time() - t_start}s")
which gives following output:
Dolfinx version: 0.6.0
Info : Reading 'meshes/simple_system/mesh.msh'...
Info : 486 entities
Info : 109049 nodes
Info : 632456 elements
Info : Done reading 'meshes/simple_system/mesh.msh'
t=2s, iter time: 5.594205379486084s
t=4s, iter time: 5.5990447998046875s
t=6s, iter time: 5.569369316101074s
t=8s, iter time: 5.54309606552124s
t=10s, iter time: 5.508774280548096s
t=12s, iter time: 5.593702793121338s
t=14s, iter time: 5.546661376953125s
t=16s, iter time: 5.587730407714844s
t=18s, iter time: 5.540001392364502s
Therefore dolfinx is quite slower than legacy version.
I think that the dolfinx keeps recompiling the assembly/form or something but I fail to find a way to prevent that.
Do I have to create custom version of the whole timesteping loop to prevent JIT or is it caused by some other issue?
Mesh data are available here: issues - Google Drive
Thank you for any reply.