Hey guys, I want to carry out fluid flow simulations in FEniCS. Here are the version specs:
DOLFINx version: 0.10.0.post2
PETSc version: (3, 15, 5)
petsc4py version: 3.15.1
Basix version: 0.10.0
UFL version: 2025.2.0.post0
MPI Vendor: (‘Open MPI’, (4, 1, 2))
To begin with I am trying to setup simple benchmarks. I am a complete noob at FEniCS at the moment and I have very less idea of the inner workings. Presently, I am trying to translate my MATLAB or non-PETSc C++ FEM code into it. Below is a 3d driven cavity example using gamg. The krylov iterations and picard iterations are converging as expected. I am unsure if the code is reassembling the non-convective terms in the loop at the moment; or does it recognise the repeating work automatically? I tried explicitly assembling the linear parts separately and adding them but I am running into PETSc issues. It seems as if the structure of the matrix changes while assembling and the linear parts do not simply add correctly to the nonlinear newly assembled pieces. Are there any tutorials, examples, suggestions which can help me in running the code faster? Thanks
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np
import os
from pathlib import Path
# DOLFINx Imports
import dolfinx
from dolfinx.fem import (
Constant, Function, functionspace, dirichletbc,
locate_dofs_geometrical, form, extract_function_spaces
)
from dolfinx.fem.petsc import (
create_matrix, create_vector, assemble_matrix, assemble_vector,
apply_lifting, set_bc
)
from dolfinx.mesh import create_unit_cube, CellType
from dolfinx.io import VTKFile, XDMFFile
from basix.ufl import element, mixed_element
from ufl import (
TestFunctions, TrialFunctions, split, dot, div, grad, dx, inner,
sqrt, CellDiameter, lhs, rhs
)
# -------------------------------------------------------------------
# 1. Simulation Parameters
# -------------------------------------------------------------------
N = 16
Re = 1000.0
T_end = 10.0
# --- IO Controls ---
output_freq = 5
results_folder = Path("results_3d_cavity_final")
checkpoint_xdmf_path = results_folder / "checkpoint.xdmf"
checkpoint_meta_path = results_folder / "checkpoint_meta.npy"
# --- Adaptive Time Stepping ---
Target_CFL = 1
Max_dt = 0.1
Min_dt = 1e-5
WARMUP_STEPS = 3
WARMUP_DT_START = 1e-5
# Ramp the lid velocity over the first 0.1 seconds to prevent shock
RAMP_DURATION = 0.1
# --- Solver Tolerances ---
PROD_MAX_PICARD = 10
PROD_MAX_KSP = 100
PROD_KSP_TOL = 1e-8
# Convergence Criteria
STAGNATION_TOL = 1e-4
NONLIN_RES_TOL = 1e-4
picard_relaxation = 1
ENABLE_SUPG = 1.0
ENABLE_PSPG = 1.0
comm = MPI.COMM_WORLD
# -------------------------------------------------------------------
# 2. Mesh & Functions
# -------------------------------------------------------------------
if comm.rank == 0:
print(f"Generating 3D Mesh ({N}x{N}x{N})...", flush=True)
mesh = create_unit_cube(comm, N, N, N, cell_type=CellType.tetrahedron)
h_min = 1.0 / N
dt_init = 0.5 * h_min
# Spaces
P2_v = element("Lagrange", mesh.basix_cell(), 2, shape=(mesh.geometry.dim,))
P1_s = element("Lagrange", mesh.basix_cell(), 1)
Mixed_Elem = mixed_element([P2_v, P1_s])
W = functionspace(mesh, Mixed_Elem)
# Functions
w = Function(W)
w_k = Function(W)
w0 = Function(W)
w00 = Function(W)
(u_trial, p_trial) = TrialFunctions(W)
(v, q) = TestFunctions(W)
u_k, p_k = split(w_k)
u0, p0 = split(w0)
u00, p00 = split(w00)
# Physical Constants
rho = 1.0
mu = 1.0 / Re
nu = Constant(mesh, PETSc.ScalarType(mu / rho))
dt = Constant(mesh, PETSc.ScalarType(WARMUP_DT_START))
f_body = Constant(mesh, PETSc.ScalarType((0.0, 0.0, 0.0)))
# -------------------------------------------------------------------
# 3. Boundary Conditions
# -------------------------------------------------------------------
V_sub, _ = W.sub(0).collapse()
Q_sub, _ = W.sub(1).collapse()
# A. Wall BC (Value = 0.0)
u_walls = Function(V_sub)
u_walls.x.array[:] = 0.0
dofs_walls = locate_dofs_geometrical((W.sub(0), V_sub),
lambda x: np.logical_or(np.isclose(x[0],0)|np.isclose(x[0],1), np.isclose(x[2],0)|np.isclose(x[2],1) | np.isclose(x[1],0)))
bc_walls = dirichletbc(u_walls, dofs_walls, W.sub(0))
# B. Lid BC (Value = Ramped from 0.0 to 1.0)
u_lid = Function(V_sub)
u_lid.interpolate(lambda x: np.vstack((np.ones_like(x[0]), np.zeros_like(x[0]), np.zeros_like(x[0]))))
dofs_lid = locate_dofs_geometrical((W.sub(0), V_sub), lambda x: np.isclose(x[1], 1.0))
bc_lid = dirichletbc(u_lid, dofs_lid, W.sub(0))
# C. Pressure Pin (Value = 0.0)
dofs_p = locate_dofs_geometrical((W.sub(1), Q_sub), lambda x: (np.isclose(x[0],0)&np.isclose(x[1],0)&np.isclose(x[2],0)))
bc_p = dirichletbc(Function(Q_sub), dofs_p, W.sub(1))
# --- REAL BCs (Applied to System) ---
bcs = [bc_lid, bc_walls, bc_p]
# --- ZERO BCs (Applied ONLY to Residual Vector check) ---
# We need a BC that targets the Lid DOFs but forces them to 0.0
u_zero_func = Function(V_sub)
u_zero_func.x.array[:] = 0.0
bc_lid_zero = dirichletbc(u_zero_func, dofs_lid, W.sub(0))
# The walls and pin are already 0.0, so we can reuse them.
# This list is used solely to zero out the "Reaction Forces" in the residual check.
bcs_zero = [bc_lid_zero, bc_walls, bc_p]
# -------------------------------------------------------------------
# 4. Form Helper Functions
# -------------------------------------------------------------------
h = CellDiameter(mesh)
def get_tau(u_vec, dt_val):
u_mag = sqrt(dot(u_vec, u_vec) + 1e-10)
return 1.0 / sqrt( (2.0/dt_val)**2 + (2.0*u_mag/h)**2 + (4.0*nu/h**2)**2 )
# -------------------------------------------------------------------
# 5. Linearized System Forms
# -------------------------------------------------------------------
tau_lin = get_tau(u_k, dt)
# --- BDF1 Linear ---
dt_inv = 1.0/dt
mom_lhs_bdf1 = dt_inv * u_trial + dot(grad(u_trial),u_k) + grad(p_trial)
mom_rhs_bdf1 = f_body + dt_inv * u0
F_lin_bdf1 = inner(dt_inv * u_trial, v)*dx \
+ inner(dot(grad(u_trial),u_k), v)*dx \
+ nu * inner(grad(u_trial), grad(v))*dx \
- inner(p_trial, div(v))*dx + inner(div(u_trial), q)*dx \
- inner(f_body + dt_inv * u0, v)*dx
F_lin_bdf1 += ENABLE_SUPG * inner(mom_lhs_bdf1 - mom_rhs_bdf1, tau_lin * dot(grad(v),u_k)) * dx
F_lin_bdf1 += ENABLE_PSPG * inner(mom_lhs_bdf1 - mom_rhs_bdf1, tau_lin * grad(q)) * dx
# --- BDF2 Linear ---
bdf2_fac = 1.0 / (2.0*dt)
mom_lhs_bdf2 = 3.0*bdf2_fac * u_trial + dot(grad(u_trial),u_k) + grad(p_trial)
mom_rhs_bdf2 = f_body + bdf2_fac * (4.0*u0 - u00)
F_lin_bdf2 = inner(3.0*bdf2_fac * u_trial, v)*dx \
+ inner(dot(grad(u_trial),u_k), v)*dx \
+ nu * inner(grad(u_trial), grad(v))*dx \
- inner(p_trial, div(v))*dx + inner(div(u_trial), q)*dx \
- inner(f_body + bdf2_fac * (4.0*u0 - u00), v)*dx
F_lin_bdf2 += ENABLE_SUPG * inner(mom_lhs_bdf2 - mom_rhs_bdf2, tau_lin * dot(grad(v),u_k)) * dx
F_lin_bdf2 += ENABLE_PSPG * inner(mom_lhs_bdf2 - mom_rhs_bdf2, tau_lin * grad(q)) * dx
# Use standard 'form' and imported 'lhs/rhs'
a_bdf1, L_bdf1 = form(lhs(F_lin_bdf1)), form(rhs(F_lin_bdf1))
a_bdf2, L_bdf2 = form(lhs(F_lin_bdf2)), form(rhs(F_lin_bdf2))
# -------------------------------------------------------------------
# 6. NON-LINEAR Residual Forms
# -------------------------------------------------------------------
u_curr, p_curr = split(w)
tau_nonlin = get_tau(u_curr, dt)
# --- BDF1 ---
mom_strong_bdf1 = dt_inv*(u_curr - u0) + dot(grad(u_curr),u_curr) + grad(p_curr) - f_body
R_bdf1 = inner(dt_inv*(u_curr - u0), v)*dx + inner(dot(grad(u_curr),u_curr), v)*dx + nu * inner(grad(u_curr), grad(v))*dx \
- inner(p_curr, div(v))*dx + inner(div(u_curr), q)*dx - inner(f_body, v)*dx
R_bdf1 += ENABLE_SUPG * inner(mom_strong_bdf1, tau_nonlin * dot(grad(v),u_curr)) * dx
R_bdf1 += ENABLE_PSPG * inner(mom_strong_bdf1, tau_nonlin * grad(q)) * dx
# --- BDF2 ---
mom_strong_bdf2 = bdf2_fac*(3.0*u_curr - 4.0*u0 + u00) + dot(grad(u_curr),u_curr) + grad(p_curr) - f_body
R_bdf2 = inner(bdf2_fac*(3.0*u_curr - 4.0*u0 + u00), v)*dx + inner(dot(grad(u_curr),u_curr), v)*dx + nu * inner(grad(u_curr), grad(v))*dx \
- inner(p_curr, div(v))*dx + inner(div(u_curr), q)*dx - inner(f_body, v)*dx
R_bdf2 += ENABLE_SUPG * inner(mom_strong_bdf2, tau_nonlin * dot(grad(v),u_curr)) * dx
R_bdf2 += ENABLE_PSPG * inner(mom_strong_bdf2, tau_nonlin * grad(q)) * dx
Res_Form_BDF1 = form(R_bdf1)
Res_Form_BDF2 = form(R_bdf2)
# -------------------------------------------------------------------
# 7. Solver Setup
# -------------------------------------------------------------------
A = create_matrix(a_bdf1)
b = create_vector(extract_function_spaces(L_bdf1))
res_vector = create_vector(extract_function_spaces(L_bdf1))
ksp = PETSc.KSP().create(mesh.comm)
ksp.setOperators(A)
ksp.setType("gmres")
ksp.setTolerances(rtol=PROD_KSP_TOL, max_it=PROD_MAX_KSP)
pc = ksp.getPC()
pc.setType("gamg")
pc.setGAMGType("agg")
opts = PETSc.Options()
opts["pc_gamg_sym_graph"] = True # Fixes "un-symmetric graph" error
opts["pc_gamg_square_graph"] = 1 # Improves robustness for non-symmetric/convective systems
opts["pc_gamg_agg_nsmooths"] = 1 # (Optional) Smoothens aggregation
ksp.setFromOptions()
# IO
results_folder.mkdir(exist_ok=True, parents=True)
file_u = VTKFile(comm, results_folder / "velocity.pvd", "w")
file_s = VTKFile(comm, results_folder / "pressure.pvd", "w")
u_vis = Function(V_sub, name="Velocity")
p_vis = Function(Q_sub, name="Pressure")
# -------------------------------------------------------------------
# 8. Optimized Time Loop
# -------------------------------------------------------------------
t = 0.0
step = 0
# Helper to scale lid velocity
def update_lid_bc(current_time):
scale = min(current_time / RAMP_DURATION, 1.0)
# 1. Update the Function used by the Real BC
u_lid.x.array[:] = 0.0
u_lid.interpolate(lambda x: np.vstack((np.full_like(x[0], scale), np.zeros_like(x[0]), np.zeros_like(x[0]))))
u_lid.x.scatter_forward()
if comm.rank == 0:
print(f"{'Step':<5} | {'Time':<7} | {'dt':<9} | {'Mode':<6} | {'Picard':<6} | {'KSP':<4} | {'Diff(Rel)':<10} | {'NL-Res(Abs)':<11}", flush=True)
while t < T_end:
step += 1
# 1. Update Time Step & Mode
if step <= WARMUP_STEPS:
current_a, current_L = a_bdf1, L_bdf1
current_Res_Form = Res_Form_BDF1
ramp = step / WARMUP_STEPS
dt.value = WARMUP_DT_START + ramp * (dt_init - WARMUP_DT_START)
min_picard = 1
solver_mode = "Warm"
else:
current_a, current_L = a_bdf2, L_bdf2
current_Res_Form = Res_Form_BDF2
# CFL Update
u_vis.interpolate(w.sub(0))
u_flat = u_vis.x.array.reshape((-1, 3))
u_max = comm.allreduce(np.max(np.linalg.norm(u_flat, axis=1)), op=MPI.MAX)
dt.value = max(min(Target_CFL * h_min / max(u_max, 1e-6), Max_dt), Min_dt)
min_picard = 2
solver_mode = "BDF2"
t += dt.value
# 2. Ramp BCs
update_lid_bc(t)
# Predictor
w_k.x.array[:] = w0.x.array[:]
w_k.x.scatter_forward()
rel_diff = 1.0
nonlin_res_norm = 1.0
# --- Picard Loop ---
for i in range(PROD_MAX_PICARD):
# A. Linear Solve
A.zeroEntries()
assemble_matrix(A, current_a, bcs=bcs)
A.assemble()
with b.localForm() as loc_b: loc_b.set(0)
assemble_vector(b, current_L)
apply_lifting(b, [current_a], [bcs])
b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
set_bc(b, bcs)
ksp.solve(b, w.x.petsc_vec)
w.x.scatter_forward()
# B. Calculate Relative Update Difference
diff_sq = comm.allreduce(np.sum((w.x.array - w_k.x.array)**2), op=MPI.SUM)
norm_sq = comm.allreduce(np.sum(w.x.array**2), op=MPI.SUM)
rel_diff = np.sqrt(diff_sq) / (np.sqrt(norm_sq) + 1e-10)
# C. Calculate True Non-Linear Residual
# 1. Assemble the vector (Integral of the Strong Form)
with res_vector.localForm() as loc_r: loc_r.set(0)
assemble_vector(res_vector, current_Res_Form)
# 2. Ghost Update (Sum contributions from parallel partitions)
res_vector.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
# 3. Force Boundary Residuals to Zero
set_bc(res_vector, bcs_zero)
nonlin_res_norm = res_vector.norm()
# D. Update Guess
w_k.x.array[:] = picard_relaxation * w.x.array[:] + (1.0 - picard_relaxation) * w_k.x.array[:]
w_k.x.scatter_forward()
# E. Check Convergence
if i >= (min_picard - 1):
if (rel_diff < STAGNATION_TOL) and (nonlin_res_norm < NONLIN_RES_TOL):
break
# Stagnation Safety (If solution stops changing, we trust it)
if rel_diff < 1e-9:
break
# Update History
w00.x.array[:] = w0.x.array[:]
w0.x.array[:] = w.x.array[:]
if comm.rank == 0:
print(f"{step:<5} | {t:<7.4f} | {dt.value:<9.2e} | {solver_mode:<6} | {i+1:<6} | {ksp.getIterationNumber():<4} | {rel_diff:<10.2e} | {nonlin_res_norm:<11.2e}", flush=True)
# Output
if step % output_freq == 0 or step == WARMUP_STEPS:
p_vis.interpolate(w.sub(1))
u_vis.interpolate(w.sub(0))
file_u.write_function([u_vis], t)
file_s.write_function([p_vis], t)
with XDMFFile(comm, checkpoint_xdmf_path, "w") as xdmf:
xdmf.write_mesh(mesh)
u_out = Function(functionspace(mesh, ("Lagrange", 1, (mesh.geometry.dim,))))
u_out.interpolate(w.sub(0))
u_out.name = "velocity"
xdmf.write_function(u_out)
p_out = w.sub(1).collapse()
p_out.name = "pressure"
xdmf.write_function(p_out)
if comm.rank == 0:
np.save(checkpoint_meta_path, {'t': t, 'step': step, 'dt': dt.value})
if comm.rank == 0:
print("Done.", flush=True)