Using MatNest in SNES does not work in parallel

Hi everyone. I am implementing a P2-P0 incompressible formulation with a fieldsplit preconditioner with a Nest matrix for efficiency, and because there is no static condensation in fenics (2019). My problem runs in serial but fails to converge in parallel, so I’m guessing that the problem lies in ghost nodes or somewhere in the reordering? Here is the MWE (sorry for the length), hopefully someone will see a problem, as I do not. Anyone with experience in such type of applications? I have looked at the Nest interface in 2019, but can’t see how to use i) SNES and ii) set a fieldsplit in the snes.ksp.pc object.

Thanks!

from petsc4py import PETSc
from dolfin import *
from mpi4py import MPI


parameters["form_compiler"]["cpp_optimize"] = True
parameters["form_compiler"]["optimize"] = True
parameters["form_compiler"]["quadrature_degree"] = 3
parameters["mesh_partitioner"] = "ParMETIS"
parameters["reorder_dofs_serial"] = False  # Already checked, this is not part of the problem.

denom = 6.0

# Set options for KSP
PETSc.Options().setValue("-ksp_type", "gmres")
PETSc.Options().setValue("-ksp_atol", 1e-12)
PETSc.Options().setValue("-ksp_rtol", 1e-6)
PETSc.Options().setValue("-pc_type", "fieldsplit")
PETSc.Options().setValue("-snes_monitor", None)
PETSc.Options().setValue("-ksp_monitor", None)
PETSc.Options().setValue("-snes_linesearch_type", "basic")

# Fieldsplit specs, using full everywhere for debug.
PETSc.Options().setValue("-pc_fieldsplit_type", "schur")
PETSc.Options().setValue("-pc_fieldsplit_schur_fact_type", "full")
PETSc.Options().setValue("-pc_fieldsplit_schur_precondition", "full")  # selfp | a11

# KSP 0
PETSc.Options().setValue("-fieldsplit_0_ksp_type", "preonly")  # preonly | gmres
PETSc.Options().setValue("-fieldsplit_0_pc_type", "lu")

# KSP 1
PETSc.Options().setValue("-fieldsplit_1_ksp_type", "preonly") 
PETSc.Options().setValue("-fieldsplit_1_pc_type", "lu")  # hypre!

# Model params
N = 2
u_deg = 2
nx = (N+1)*2
ny = (N+1)*1
lx = 8  # m
ly = 1  # m
theta = pi/denom
mesh = UnitSquareMesh(nx, ny)
Vu = VectorFunctionSpace(mesh, 'CG', u_deg)
Vp = FunctionSpace(mesh, 'DG', 0)
if MPI.COMM_WORLD.rank == 0:
    dofs_tot = Vu.dim() + Vp.dim()
    print("Problem dofs = {} ({} + {}), dofs/core = {}".format(dofs_tot,
                                                               Vu.dim(), Vp.dim(), dofs_tot / MPI.COMM_WORLD.size), flush=True)

# Mark boundary subdomians
left = CompiledSubDomain("near(x[0], side) && on_boundary", side=0.0)
right = CompiledSubDomain("near(x[0], side) && on_boundary", side=lx)

# Define Dirichlet boundary (x = 0 or x = lx)
c = Constant(("0.0", "0.0"))
r = Expression(("scale*0.0",
                "scale*(y0 + (x[1] - y0)*cos(theta) - x[1])"),
               scale=0.5, y0=0.5, theta=theta, degree=1)
bcl = DirichletBC(Vu, c, left)
bcr = DirichletBC(Vu, r, right)
bcs = [bcl, bcr]


ap = 9000  # Pa
bp = 9000  # Pa

# Define functions
du, dp = TrialFunction(Vu), TrialFunction(Vp)
v, q = TestFunction(Vu), TestFunction(Vp)
u, p = Function(Vu), Function(Vp)
sol = PETSc.Vec().createNest([u.vector().vec().copy(), p.vector().vec().copy()])

# Kinematics
d = mesh.geometry().dim()
I = Identity(d)    # Identity tensor
F = I + grad(u)    # Deformation gradient
J = det(F)
H = J * inv(F).T

# Problem definition
psi = Constant(ap) * (inner(F, F) - d) + Constant(bp) * \
    (inner(H, H)) - Constant(4 * bp + 2 * ap) * ln(J)
Pi = psi*dx - p * (J - 1) * dx
Res_u = derivative(Pi, u, v)
Res_p = derivative(Pi, p, q)
Jac_uu = derivative(Res_u, u, du)
Jac_up = derivative(Res_u, p, dp)
Jac_pu = derivative(Res_p, u, du)
Jac_pp = derivative(Res_p, p, dp)


class SNESProblem():
    def __init__(self, FFs, JJs, uu, pp, bbcs):
        self.Ls = FFs
        self.Js = JJs
        self.bcs = bbcs
        self.bcs0 = [DirichletBC(_bc) for _bc in bbcs]
        for bc in self.bcs0:
            bc.homogenize()
        self.u = uu
        self.p = pp

    def assemble_F(self, FF):
        # if MPI.COMM_WORLD.rank == 0:
        Fu, Fp = FF.getNestSubVecs()
        assemble(self.Ls[0], tensor=PETScVector(Fu))
        assemble(self.Ls[1], tensor=PETScVector(Fp))
        for bc in self.bcs0:
            bc.apply(PETScVector(Fu))
        Fu.assemble()
        FF.assemble()  # Something is not getting updated otherwise

    def assemble_J(self, JJ):
        Juu = JJ.getNestSubMatrix(0, 0)
        Jup = JJ.getNestSubMatrix(0, 1)
        Jpu = JJ.getNestSubMatrix(1, 0)
        Jpp = JJ.getNestSubMatrix(1, 1)
        Juu.setBlockSize(d)
        Jpp.setBlockSize(1)
        assemble(self.Js[0], tensor=PETScMatrix(Juu))
        assemble(self.Js[1], tensor=PETScMatrix(Jup))
        assemble(self.Js[2], tensor=PETScMatrix(Jpu))
        assemble(self.Js[3], tensor=PETScMatrix(Jpp))
        MM = Juu.getDiagonal().sum() / Juu.getSize()[0]
        for bc in self.bcs0:
            indexes = [*bc.get_boundary_values().keys()]
            Juu.zeroRows(indexes, diag=MM)
            Jup.zeroRows(indexes, diag=0.0)
        Juu.assemble()
        Jup.assemble()
        JJ.assemble()

    def F(self, snes, xx, FF):

        xu, xp = xx.getNestSubVecs()
        xu.copy(self.u.vector().vec())
        xp.copy(self.p.vector().vec())
        self.u.vector().apply("")
        self.p.vector().apply("")
        self.assemble_F(FF)

    def J(self, snes, xx, JJ, PP):
        xu, xp = xx.getNestSubVecs()
        xu.copy(self.u.vector().vec())
        xp.copy(self.p.vector().vec())
        self.u.vector().apply("")
        self.p.vector().apply("")
        self.assemble_J(JJ)


for bc in bcs:
    solu = sol.getNestSubVecs()[0]
    bc.apply(PETScVector(solu))

# Create indexes by hand... not working.
# dofs_s = Vu.dofmap().dofs()
# dofs_p = Vp.dofmap().dofs()
# LL = Vu.dim()
# dofs_p = [i + LL for i in dofs_p]
# is_0 = PETSc.IS().createGeneral(dofs_s)
# is_1 = PETSc.IS().createGeneral(dofs_p)
b = PETSc.Vec().createNest([u.vector().vec().copy(), p.vector().vec().copy()])
uu_temp = PETScMatrix()
up_temp = PETScMatrix()
pu_temp = PETScMatrix()
pp_temp = PETScMatrix()
assemble(Jac_uu, tensor=uu_temp)
assemble(Jac_up, tensor=up_temp)
assemble(Jac_pu, tensor=pu_temp)
assemble(Jac_pp, tensor=pp_temp)
ru, rp = b.getNestSubVecs()
assemble(Res_u, tensor=PETScVector(ru))
assemble(Res_p, tensor=PETScVector(rp))
uu_temp.mat().setBlockSize(d)
pp_temp.mat().setBlockSize(1)


J_mat = PETSc.Mat().createNest([[uu_temp.mat(), up_temp.mat()], [
    pu_temp.mat(), pp_temp.mat()]])  # Nested matrix
is_0, is_1 = J_mat.getNestISs()[0]

b.assemble()
J_mat.assemble()
problem = SNESProblem([Res_u, Res_p], [Jac_uu, Jac_up, Jac_pu, Jac_pp], u, p, bcs, is_0, is_1)
MM = uu_temp.mat().getDiagonal().sum() / len(is_0.getIndices())
for bc in bcs:
    BC = DirichletBC(bc)
    BC.homogenize()
    # BC.apply(uu_temp)
    BC.apply(PETScVector(ru))
    indexes = [*bc.get_boundary_values().keys()]
    uu_temp.mat().zeroRows(indexes, diag=MM)
    up_temp.mat().zeroRows(indexes, diag=0.0)

snes = PETSc.SNES().create(MPI.COMM_WORLD)
snes.setFunction(problem.F, b)
snes.setJacobian(problem.J, J_mat)

# Set up fieldsplit
ksp = snes.getKSP()
pc = ksp.getPC()
pc.setType("fieldsplit")
pc.setFieldSplitIS(("0", is_0), ("1", is_1))


# Setup and solve
snes.setFromOptions()
snes.ksp.setFromOptions()
snes.solve(None, sol)

Hi, just a debugging suggestion - can you have a setup without Dirichlet boundary conditions and test if that runs in parallel?

Hi MiroK, I have tested it and I am sure that it runs in parallel, I checked both PETSc output and also creating separate Mat() views by rank.

I suspect there is a problem with the dof ordering, as the same exact code works whenever I don’t use nests.

Thanks for looking into this!

For those who might have this issue: I ended up using Firedrake, as their assembly with mixed spaces naturally yields a Nest matrix. As far as I know, the same holds for DolfinX, so those might be better solutions for this type of problem.