Why don't iterative solvers converge for 3D Stokes + Immersed Boundary Method in FEniCSx?

I’m solving a 3D fluid-structure interaction problem using FEniCSx 0.10.0 with Stokes equations and Immersed Boundary Method. Direct LU (MUMPS) works perfectly, but iterative solvers fail to converge.

Minimal Working Example

"""
3D Stokes + IBM iterative solver comparison
"""
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.fem import (form, functionspace, Constant, Function, dirichletbc,
                          locate_dofs_topological)
from dolfinx.fem.petsc import assemble_matrix, assemble_vector
from dolfinx.mesh import CellType, create_box, locate_entities_boundary
import ufl
# Domain: 15x15x30 cm, mesh 6x6x12 hexahedra
L_x, L_y, L_z = 0.15, 0.15, 0.30
nx, ny, nz = 6, 6, 12
msh = create_box(MPI.COMM_WORLD,
    [np.array([0.0, 0.0, 0.0]), np.array([L_x, L_y, L_z])],
    (nx, ny, nz), CellType.hexahedron)
gdim = msh.geometry.dim
# Taylor-Hood: P2 velocity (12675 DOF) + P1 pressure (637 DOF)
V_vel = functionspace(msh, ("Lagrange", 2, (gdim,)))
V_pres = functionspace(msh, ("Lagrange", 1))
V_scalar = functionspace(msh, ("Lagrange", 1))
u, v = ufl.TrialFunction(V_vel), ufl.TestFunction(V_vel)
p, q = ufl.TrialFunction(V_pres), ufl.TestFunction(V_pres)
# Stokes variational forms
a00 = 1e-3 * ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
a01 = -p * ufl.div(v) * ufl.dx
a10 = ufl.div(u) * q * ufl.dx
# Immersed Boundary: penalty term (beta * eta * u)
eta = Function(V_scalar)
beta = Constant(msh, PETSc.ScalarType(5000.0))
xd = V_scalar.tabulate_dof_coordinates()
eta.x.array[:] = (np.sqrt((xd[:,0]-L_x/2)**2 + (xd[:,1]-L_y/2)**2 + (xd[:,2]-L_z*0.7)**2) < 0.02).astype(float)
eta.x.scatter_forward()
a_ibm = beta * ufl.inner(u, v) * ufl.dx
# BCs: no-slip walls + pressure outlet on top
walls = lambda x: np.isclose(x[0],0)|np.isclose(x[0],L_x)|np.isclose(x[1],0)|np.isclose(x[1],L_y)|np.isclose(x[2],0)
noslip = Function(V_vel); noslip.x.array[:] = 0.0
bc_vel = [dirichletbc(noslip, locate_dofs_topological(V_vel, 2, locate_entities_boundary(msh, 2, walls)))]
top = lambda x: np.isclose(x[2], L_z)
p_out = Function(V_pres); p_out.x.array[:] = 0.0
bc_pres = [dirichletbc(p_out, locate_dofs_topological(V_pres, 2, locate_entities_boundary(msh, 2, top)))]
# Assemble blocked Nest matrix
A00 = assemble_matrix(form(a00 + a_ibm), bcs=bc_vel); A00.assemble()
A01 = assemble_matrix(form(a01), bcs=[]); A01.assemble()
A10 = assemble_matrix(form(a10), bcs=[]); A10.assemble()
A11 = assemble_matrix(form(ufl.inner(p,q)*ufl.dx), bcs=bc_pres); A11.assemble(); A11.scale(1e-6)
vel_sz = V_vel.dofmap.index_map.size_local * gdim
pr_sz = V_pres.dofmap.index_map.size_local
K = PETSc.Mat().createNest([[A00, A01], [A10, A11]]); K.assemble()
# RHS: IBM forcing from coin velocity
u_coin = Constant(msh, np.array([0., 0., -0.1]))
L_ibm = form(beta * ufl.inner(u_coin * eta, v) * ufl.dx)
b = assemble_vector(L_ibm)
b.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
b_arr = np.concatenate([b.array[:vel_sz], np.zeros(pr_sz)])
b = PETSc.Vec().createWithArray(b_arr, comm=MPI.COMM_WORLD)
# --- SOLVER TESTS ---
print(f"DOFs: vel={vel_sz}, pres={pr_sz}")
# 1) LU (MUMPS) - WORKS
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("preonly"); ksp.getPC().setType("lu")
if PETSc.Sys().hasExternalPackage("mumps"): ksp.getPC().setFactorSolverType("mumps")
ksp.setOperators(K); ksp.solve(b, K.createVecRight())
print(f"LU: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")
# 2) GMRES + bjacobi - FAILS to converge
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("gmres"); ksp.setTolerances(rtol=1e-6, atol=1e-10, max_it=200)
ksp.getPC().setType("bjacobi"); ksp.setOperators(K)
x = K.createVecRight(); ksp.solve(b, x)
print(f"GMRES+bjacobi: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")
# 3) fieldsplit + LU/jacobi - FAILS to converge
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("fgmres"); ksp.setTolerances(rtol=1e-6, atol=1e-10, max_it=200)
pc = ksp.getPC(); pc.setType("fieldsplit"); pc.setFieldSplitType(PETSc.PC.CompositeType.ADDITIVE)
is_v = PETSc.IS().createStride(vel_sz, first=0, step=1, comm=MPI.COMM_WORLD)
is_p = PETSc.IS().createStride(pr_sz, first=vel_sz, step=1, comm=MPI.COMM_WORLD)
pc.setFieldSplitIS(("vel", is_v)); pc.setFieldSplitIS(("pres", is_p))
ksp.setOperators(K); ksp.setFromOptions()
x = K.createVecRight(); ksp.solve(b, x)
print(f"fieldsplit+LU: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")

Output

DOFs: vel=12675, pres=637, total=13312
LU: iters=1, converged=4
GMRES+bjacobi: iters=200, converged=-3  (KSP_DIVERGED_BREAKDOWN)
fieldsplit+LU: iters=200, converged=-3  (KSP_DIVERGED_BREAKDOWN)

Questions

  1. Why does GMRES fail to converge for this Stokes+IBM system?
  2. Is there a proper fieldsplit configuration that would work?
  3. Should I be using a different preconditioner or solver approach?

Environment

  • FEniCSx: 0.10.0 (conda-forge)
  • PETSc: 3.x
  • petsc4py: via conda
  • Platform: Linux, single process

You blocked assembly is not correct.

This will not zero out the correct rows due to the dirichlet bc in velocity and pressure.
Furthermore, there are plenty of example on how to precondition stokes at:

here is an adapted, simplified and corrected version (eta was not part of a_ibm, and made a sign correction to ensure symmetry) + added a preconditioner matrix P:

"""
3D Stokes + IBM iterative solver comparison
"""
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.fem import (form, functionspace, Constant, Function, dirichletbc,
                          locate_dofs_topological)
from dolfinx.fem.petsc import assemble_matrix, assemble_vector, LinearProblem
from dolfinx.mesh import CellType, create_box, locate_entities_boundary
import ufl
# Domain: 15x15x30 cm, mesh 6x6x12 hexahedra
L_x, L_y, L_z = 0.15, 0.15, 0.30
nx, ny, nz = 6, 6, 12
msh = create_box(MPI.COMM_WORLD,
    [np.array([0.0, 0.0, 0.0]), np.array([L_x, L_y, L_z])],
    (nx, ny, nz), CellType.hexahedron)
gdim = msh.geometry.dim
# Taylor-Hood: P2 velocity (12675 DOF) + P1 pressure (637 DOF)
V_vel = functionspace(msh, ("Lagrange", 2, (gdim,)))
V_pres = functionspace(msh, ("Lagrange", 1))
V_scalar = functionspace(msh, ("Lagrange", 1))
W = ufl.MixedFunctionSpace(V_vel, V_pres)

u, p = ufl.TrialFunctions(W)
v, q = ufl.TestFunctions(W)


mu = Constant(msh, PETSc.ScalarType(1e-3))
# Stokes variational forms
a00 = mu * ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
a01 = -p * ufl.div(v) * ufl.dx
a10 = -ufl.div(u) * q * ufl.dx
# Immersed Boundary: penalty term (beta * eta * u)
eta = Function(V_scalar)
beta = Constant(msh, PETSc.ScalarType(5000.0))
xd = V_scalar.tabulate_dof_coordinates()
eta.x.array[:] = (np.sqrt((xd[:,0]-L_x/2)**2 + (xd[:,1]-L_y/2)**2 + (xd[:,2]-L_z*0.7)**2) < 0.02).astype(float)
eta.x.scatter_forward()
a_ibm = beta * eta* ufl.inner(u, v) * ufl.dx
# BCs: no-slip walls + pressure outlet on top
walls = lambda x: np.isclose(x[0],0)|np.isclose(x[0],L_x)|np.isclose(x[1],0)|np.isclose(x[1],L_y)|np.isclose(x[2],0)
noslip = Function(V_vel); noslip.x.array[:] = 0.0
bc_vel = [dirichletbc(noslip, locate_dofs_topological(V_vel, 2, locate_entities_boundary(msh, 2, walls)))]
top = lambda x: np.isclose(x[2], L_z)
p_out = Function(V_pres); p_out.x.array[:] = 0.0
bc_pres = [dirichletbc(p_out, locate_dofs_topological(V_pres, 2, locate_entities_boundary(msh, 2, top)))]

c = Constant(msh,0.0)
a11 = c * ufl.inner(p, q) * ufl.dx(99)

a = a00 + a01 + a10 + a11 + a_ibm
# RHS: IBM forcing from coin velocity
u_coin = Constant(msh, np.array([0., 0., -0.1]))
L_ibm = beta * ufl.inner(u_coin * eta, v) * ufl.dx
d = Constant(msh, 0.0)
L = L_ibm + d * q * ufl.dx(99)

a_p11 = 1/mu * ufl.inner(p, q) * ufl.dx
P = ufl.extract_blocks(a00 + a_ibm + a_p11)
petsc_options = {"ksp_type": "preonly", "pc_type": "lu", "pc_mat_factor_solver_type": "mumps",
    "ksp_error_if_not_converged": True,
    "ksp_monitor": None}
petsc_options={
    "ksp_type": "minres",
    "ksp_rtol": 1e-9,
    "pc_type": "fieldsplit",
    "pc_fieldsplit_type": "additive",
    "fieldsplit_0_ksp_type": "preonly",
    "fieldsplit_0_pc_type": "gamg",
    "fieldsplit_1_ksp_type": "preonly",
    "fieldsplit_1_pc_type": "jacobi",
    "ksp_error_if_not_converged": True,
    "ksp_monitor": None
}
problem = LinearProblem(ufl.extract_blocks(a), ufl.extract_blocks(L), bcs=bc_vel+bc_pres, petsc_options=petsc_options, kind="nest",
P =P,
petsc_options_prefix="stokes_ibm_")
uh, ph = problem.solve()