Why don't iterative solvers converge for 3D Stokes + Immersed Boundary Method in FEniCSx?

I’m solving a 3D fluid-structure interaction problem using FEniCSx 0.10.0 with Stokes equations and Immersed Boundary Method. Direct LU (MUMPS) works perfectly, but iterative solvers fail to converge.

Minimal Working Example

"""
3D Stokes + IBM iterative solver comparison
"""
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.fem import (form, functionspace, Constant, Function, dirichletbc,
                          locate_dofs_topological)
from dolfinx.fem.petsc import assemble_matrix, assemble_vector
from dolfinx.mesh import CellType, create_box, locate_entities_boundary
import ufl
# Domain: 15x15x30 cm, mesh 6x6x12 hexahedra
L_x, L_y, L_z = 0.15, 0.15, 0.30
nx, ny, nz = 6, 6, 12
msh = create_box(MPI.COMM_WORLD,
    [np.array([0.0, 0.0, 0.0]), np.array([L_x, L_y, L_z])],
    (nx, ny, nz), CellType.hexahedron)
gdim = msh.geometry.dim
# Taylor-Hood: P2 velocity (12675 DOF) + P1 pressure (637 DOF)
V_vel = functionspace(msh, ("Lagrange", 2, (gdim,)))
V_pres = functionspace(msh, ("Lagrange", 1))
V_scalar = functionspace(msh, ("Lagrange", 1))
u, v = ufl.TrialFunction(V_vel), ufl.TestFunction(V_vel)
p, q = ufl.TrialFunction(V_pres), ufl.TestFunction(V_pres)
# Stokes variational forms
a00 = 1e-3 * ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx
a01 = -p * ufl.div(v) * ufl.dx
a10 = ufl.div(u) * q * ufl.dx
# Immersed Boundary: penalty term (beta * eta * u)
eta = Function(V_scalar)
beta = Constant(msh, PETSc.ScalarType(5000.0))
xd = V_scalar.tabulate_dof_coordinates()
eta.x.array[:] = (np.sqrt((xd[:,0]-L_x/2)**2 + (xd[:,1]-L_y/2)**2 + (xd[:,2]-L_z*0.7)**2) < 0.02).astype(float)
eta.x.scatter_forward()
a_ibm = beta * ufl.inner(u, v) * ufl.dx
# BCs: no-slip walls + pressure outlet on top
walls = lambda x: np.isclose(x[0],0)|np.isclose(x[0],L_x)|np.isclose(x[1],0)|np.isclose(x[1],L_y)|np.isclose(x[2],0)
noslip = Function(V_vel); noslip.x.array[:] = 0.0
bc_vel = [dirichletbc(noslip, locate_dofs_topological(V_vel, 2, locate_entities_boundary(msh, 2, walls)))]
top = lambda x: np.isclose(x[2], L_z)
p_out = Function(V_pres); p_out.x.array[:] = 0.0
bc_pres = [dirichletbc(p_out, locate_dofs_topological(V_pres, 2, locate_entities_boundary(msh, 2, top)))]
# Assemble blocked Nest matrix
A00 = assemble_matrix(form(a00 + a_ibm), bcs=bc_vel); A00.assemble()
A01 = assemble_matrix(form(a01), bcs=[]); A01.assemble()
A10 = assemble_matrix(form(a10), bcs=[]); A10.assemble()
A11 = assemble_matrix(form(ufl.inner(p,q)*ufl.dx), bcs=bc_pres); A11.assemble(); A11.scale(1e-6)
vel_sz = V_vel.dofmap.index_map.size_local * gdim
pr_sz = V_pres.dofmap.index_map.size_local
K = PETSc.Mat().createNest([[A00, A01], [A10, A11]]); K.assemble()
# RHS: IBM forcing from coin velocity
u_coin = Constant(msh, np.array([0., 0., -0.1]))
L_ibm = form(beta * ufl.inner(u_coin * eta, v) * ufl.dx)
b = assemble_vector(L_ibm)
b.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
b_arr = np.concatenate([b.array[:vel_sz], np.zeros(pr_sz)])
b = PETSc.Vec().createWithArray(b_arr, comm=MPI.COMM_WORLD)
# --- SOLVER TESTS ---
print(f"DOFs: vel={vel_sz}, pres={pr_sz}")
# 1) LU (MUMPS) - WORKS
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("preonly"); ksp.getPC().setType("lu")
if PETSc.Sys().hasExternalPackage("mumps"): ksp.getPC().setFactorSolverType("mumps")
ksp.setOperators(K); ksp.solve(b, K.createVecRight())
print(f"LU: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")
# 2) GMRES + bjacobi - FAILS to converge
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("gmres"); ksp.setTolerances(rtol=1e-6, atol=1e-10, max_it=200)
ksp.getPC().setType("bjacobi"); ksp.setOperators(K)
x = K.createVecRight(); ksp.solve(b, x)
print(f"GMRES+bjacobi: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")
# 3) fieldsplit + LU/jacobi - FAILS to converge
ksp = PETSc.KSP().create(MPI.COMM_WORLD)
ksp.setType("fgmres"); ksp.setTolerances(rtol=1e-6, atol=1e-10, max_it=200)
pc = ksp.getPC(); pc.setType("fieldsplit"); pc.setFieldSplitType(PETSc.PC.CompositeType.ADDITIVE)
is_v = PETSc.IS().createStride(vel_sz, first=0, step=1, comm=MPI.COMM_WORLD)
is_p = PETSc.IS().createStride(pr_sz, first=vel_sz, step=1, comm=MPI.COMM_WORLD)
pc.setFieldSplitIS(("vel", is_v)); pc.setFieldSplitIS(("pres", is_p))
ksp.setOperators(K); ksp.setFromOptions()
x = K.createVecRight(); ksp.solve(b, x)
print(f"fieldsplit+LU: iters={ksp.getIterationNumber()}, converged={ksp.getConvergedReason()}")

Output

DOFs: vel=12675, pres=637, total=13312
LU: iters=1, converged=4
GMRES+bjacobi: iters=200, converged=-3  (KSP_DIVERGED_BREAKDOWN)
fieldsplit+LU: iters=200, converged=-3  (KSP_DIVERGED_BREAKDOWN)

Questions

  1. Why does GMRES fail to converge for this Stokes+IBM system?
  2. Is there a proper fieldsplit configuration that would work?
  3. Should I be using a different preconditioner or solver approach?

Environment

  • FEniCSx: 0.10.0 (conda-forge)
  • PETSc: 3.x
  • petsc4py: via conda
  • Platform: Linux, single process