Mesh partitioning affecting problem solving (scatter_forward() not working properly?)

Dear all,
I’m working on a Navier-Stokes solver using P1-P1 elements with SUPG/PSPG stabilization. My idea is to use nested matrices to avoid assembling the lhs at each time step and only modify the convection and stabilization matrices (and also to use block preconditioning methods) inside a Picard iteration schme to handle the nonlinearity of the PDEs. Currently, my solver runs smoothly in serial but in parallel the ghost updates seems to not be working as the mesh partitioning is visible on the solutions and makes them diverge.

This issue is also reproducible with the stokes problem. Consider the following (not so) MWE based on this tutorial by dokken:

import gmsh
import numpy as np
from basix.ufl import element
from dolfinx.fem import (Constant, Function, bcs_by_block, dirichletbc,
                         extract_function_spaces, form, functionspace,
                         locate_dofs_topological)
from dolfinx.fem.petsc import (_assemble_vector_nest_vec, apply_lifting_nest,
                               assemble_matrix, assemble_vector_nest,
                               set_bc_nest)
from dolfinx.io import XDMFFile, gmshio
from dolfinx.la import create_petsc_vector_wrap
from mpi4py import MPI
from petsc4py import PETSc
from ufl import TestFunction, TrialFunction, div, dx, grad, inner

gmsh.initialize()

L = 2.2
H = 0.41
c_x = c_y = 0.2
r = 0.05
gdim = 2
mesh_comm = MPI.COMM_WORLD
model_rank = 0
if mesh_comm.rank == model_rank:
    rectangle = gmsh.model.occ.addRectangle(0, 0, 0, L, H, tag=1)
    obstacle = gmsh.model.occ.addDisk(c_x, c_y, 0, r, r)

if mesh_comm.rank == model_rank:
    fluid = gmsh.model.occ.cut([(gdim, rectangle)], [(gdim, obstacle)])
    gmsh.model.occ.synchronize()

fluid_marker = 1
if mesh_comm.rank == model_rank:
    volumes = gmsh.model.getEntities(dim=gdim)
    assert (len(volumes) == 1)
    gmsh.model.addPhysicalGroup(volumes[0][0], [volumes[0][1]], fluid_marker)
    gmsh.model.setPhysicalName(volumes[0][0], fluid_marker, "Fluid")

inlet_marker, outlet_marker, wall_marker, obstacle_marker = 2, 3, 4, 5
inflow, outflow, walls, obstacle = [], [], [], []
if mesh_comm.rank == model_rank:
    boundaries = gmsh.model.getBoundary(volumes, oriented=False)
    for boundary in boundaries:
        center_of_mass = gmsh.model.occ.getCenterOfMass(boundary[0], boundary[1])
        if np.allclose(center_of_mass, [0, H / 2, 0]):
            inflow.append(boundary[1])
        elif np.allclose(center_of_mass, [L, H / 2, 0]):
            outflow.append(boundary[1])
        elif np.allclose(center_of_mass, [L / 2, H, 0]) or np.allclose(center_of_mass, [L / 2, 0, 0]):
            walls.append(boundary[1])
        else:
            obstacle.append(boundary[1])
    gmsh.model.addPhysicalGroup(1, walls, wall_marker)
    gmsh.model.setPhysicalName(1, wall_marker, "Walls")
    gmsh.model.addPhysicalGroup(1, inflow, inlet_marker)
    gmsh.model.setPhysicalName(1, inlet_marker, "Inlet")
    gmsh.model.addPhysicalGroup(1, outflow, outlet_marker)
    gmsh.model.setPhysicalName(1, outlet_marker, "Outlet")
    gmsh.model.addPhysicalGroup(1, obstacle, obstacle_marker)
    gmsh.model.setPhysicalName(1, obstacle_marker, "Obstacle")

# Create distance field from obstacle.
# Add threshold of mesh sizes based on the distance field
# LcMax -                  /--------
#                      /
# LcMin -o---------/
#        |         |       |
#       Point    DistMin DistMax
res_min = r / 3
if mesh_comm.rank == model_rank:
    distance_field = gmsh.model.mesh.field.add("Distance")
    gmsh.model.mesh.field.setNumbers(distance_field, "EdgesList", obstacle)
    threshold_field = gmsh.model.mesh.field.add("Threshold")
    gmsh.model.mesh.field.setNumber(threshold_field, "IField", distance_field)
    gmsh.model.mesh.field.setNumber(threshold_field, "LcMin", res_min)
    gmsh.model.mesh.field.setNumber(threshold_field, "LcMax", 0.25 * H)
    gmsh.model.mesh.field.setNumber(threshold_field, "DistMin", r)
    gmsh.model.mesh.field.setNumber(threshold_field, "DistMax", 2 * H)
    min_field = gmsh.model.mesh.field.add("Min")
    gmsh.model.mesh.field.setNumbers(min_field, "FieldsList", [threshold_field])
    gmsh.model.mesh.field.setAsBackgroundMesh(min_field)

if mesh_comm.rank == model_rank:
    gmsh.option.setNumber("Mesh.Algorithm", 8)
    gmsh.option.setNumber("Mesh.RecombinationAlgorithm", 2)
    gmsh.option.setNumber("Mesh.RecombineAll", 1)
    gmsh.option.setNumber("Mesh.SubdivisionAlgorithm", 1)
    gmsh.model.mesh.generate(gdim)
    gmsh.model.mesh.setOrder(2)
    gmsh.model.mesh.optimize("Netgen")

# Loading mesh and boundary markers
mesh, _, ft = gmshio.model_to_mesh(gmsh.model, mesh_comm, model_rank, gdim=gdim)
ft.name = "Facet markers"

# Physical and discretization parameters
t = 0
T = 8                       # Final time
dt = 1 / 1600                 # Time step size
num_steps = int(T / dt)
k = Constant(mesh, PETSc.ScalarType(dt))
mu = Constant(mesh, PETSc.ScalarType(0.001))  # Dynamic viscosity
rho = Constant(mesh, PETSc.ScalarType(1))     # Density

# Function spaces
v_cg2 = element("Lagrange", mesh.topology.cell_name(), 2, shape=(mesh.geometry.dim, ))
s_cg1 = element("Lagrange", mesh.topology.cell_name(), 1)
V = functionspace(mesh, v_cg2)
Q = functionspace(mesh, s_cg1)
fdim = mesh.topology.dim - 1

# Function space for velocity visualization
v_cg1 = element("Lagrange", mesh.topology.cell_name(), 2, shape=(mesh.geometry.dim, ))
V1 = functionspace(mesh, v_cg1)

# Define boundary conditions
class InletVelocity():
    def __init__(self, t):
        self.t = t
    def __call__(self, x):
        values = np.zeros((gdim, x.shape[1]), dtype=PETSc.ScalarType)
        values[0] = 4 * 1.5 * np.sin(self.t * np.pi / 8) * x[1] * (0.41 - x[1]) / (0.41**2)
        return values

# Inlet
u_inlet = Function(V)
inlet_velocity = InletVelocity(t)
u_inlet.interpolate(inlet_velocity)
bcu_inflow = dirichletbc(u_inlet, locate_dofs_topological(V, fdim, ft.find(inlet_marker)))
# Walls
u_nonslip = np.array((0,) * mesh.geometry.dim, dtype=PETSc.ScalarType)
bcu_walls = dirichletbc(u_nonslip, locate_dofs_topological(V, fdim, ft.find(wall_marker)), V)
# Obstacle
bcu_obstacle = dirichletbc(u_nonslip, locate_dofs_topological(V, fdim, ft.find(obstacle_marker)), V)
bcs = [bcu_inflow, bcu_obstacle, bcu_walls]

# Trial and test functions
u, v = TrialFunction(V), TestFunction(V)
p, q = TrialFunction(Q), TestFunction(Q)

# Initial condition
u0 = Function(V)
u0.x.array[:] = 0.0

# Fluid strain rate tensor
def eps(u):
  return 0.5*(grad(u) + grad(u).T)

# Variational formulation
a00 = rho/dt*inner(u, v)*dx + inner(2*mu*eps(u), grad(v))*dx
a01 = -p*div(v)*dx
a10 = q*div(u)*dx
L0 = rho/dt*inner(u0, v)*dx
L1 = Constant(mesh, 0.0)*q*dx
a = form([[a00, a01], [a10, None]])
L = form([L0, L1])

# Assemble Stokes problem lhs
A00 = assemble_matrix(form(a00), bcs=bcs, diagonal=1.0)
A00.assemble()
A01 = assemble_matrix(form(a01), bcs=bcs)
A01.assemble()
A10 = assemble_matrix(form(a10), bcs=bcs)
A10.assemble()

# Extract (0,0) block from nested matrix and add convection
A = PETSc.Mat()
A.createNest([[A00, A01], [A10, None]])
A.setUp()
A.assemble()

# Assemble Navier-Stokes problem rhs
b = assemble_vector_nest(L)
b.assemble()

# Create and configure solver
ksp = PETSc.KSP().create(mesh.comm)  # type: ignore
ksp.setOperators(A)
ksp.setType("preonly")
ksp.getPC().setType("lu")
ksp.getPC().setFactorSolverType("mumps")
opts = PETSc.Options()  # type: ignore
opts["mat_mumps_icntl_14"] = 80  # Increase MUMPS working memory
opts["mat_mumps_icntl_24"] = 1  # Option to support solving a singular matrix (pressure nullspace)
opts["mat_mumps_icntl_25"] = 0  # Option to support solving a singular matrix (pressure nullspace)
opts["ksp_error_if_not_converged"] = 1
ksp.setFromOptions()

# Split the solution
u_h = Function(V)
u_h.name = "u"
p_h = Function(Q)
p_h.name = "p"
x = PETSc.Vec().createNest([create_petsc_vector_wrap(u_h.x),
                            create_petsc_vector_wrap(p_h.x)])

# Write initial condition to file
t = 0.0
u_file_xdmf = XDMFFile(mesh.comm, "output/u_test.xdmf", "w")
p_file_xdmf = XDMFFile(mesh.comm, "output/p_test.xdmf", "w")
u_file_xdmf.write_mesh(mesh)
p_file_xdmf.write_mesh(mesh)

# Update ghost values
u_h.x.scatter_forward()
p_h.x.scatter_forward()

# Write initial condition
u_h1 = Function(V1)
u_h1.interpolate(u_h)
u_file_xdmf.write_function(u_h1, t)
# p_file_xdmf.write_function(p_h, t)

# Time stepping loop
t = 0.0
num_steps = 10
for n in range(num_steps):

  # Update time
  t += dt
  if mesh.comm.rank == 0:
    print("Solving time step {:d} (time: {:.4f})".format(n, t), flush=True)

  # Inlet velocity update
  inlet_velocity = InletVelocity(t)
  u_inlet.interpolate(inlet_velocity)

  # Assemble rhs
  b.zeroEntries()
  _assemble_vector_nest_vec(b, L)
  b.assemble()

  # Modify ('lift') the RHS for Dirichlet boundary conditions
  apply_lifting_nest(b, a, bcs=bcs)

  # Sum contributions for vector entries that are share across
  # parallel processes
  for b_sub in b.getNestSubVecs():
    b_sub.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)

  # Set Dirichlet boundary condition values in the RHS vector
  bcs0 = bcs_by_block(extract_function_spaces(L), bcs)
  set_bc_nest(b, bcs0)

  # Compute solution
  ksp.solve(b, x)
  u_h.x.scatter_forward()
  p_h.x.scatter_forward()

  # Update previous time step
  u0.x.array[:] = u_h.x.array

  # Write to file
  u_h1.interpolate(u_h)
  u_file_xdmf.write_function(u_h1, t)
  # p_file_xdmf.write_function(p_h, t)

# Close files
u_file_xdmf.close()
p_file_xdmf.close()

Running in parallel I obtain these results:

I have followed all the tutorial and forums recomendations to correctly handle this without success. Could someone give me a hand with this?

Thanks in advance!

PS: forgot to mention that I’m using a docker image with a nightly build of fenicsx. Running

python3 -c "import dolfinx; print(dolfinx.__version__)"

Gives me 0.9.0.0

I fixed the issue and I will post here the solution just for the record.

If I replace the rhs assembling inside the temporal loop by:

    # Assemble rhs
    option = 4
    if option == 1:
      b.zeroEntries()
      assemble_vector_nest(b, L)
    elif option == 2:
      b.zeroEntries()
      b = assemble_vector_nest(b, L)
    elif option == 3:
      _assemble_vector_nest_vec(b, L)
    elif option == 4:
      b = assemble_vector_nest(L) # works!

the solver only works with option 4. This means that the rhs vector b must be redefined at each time step (I don’t know why but I was expecting to re-use the PETSc vector). If anyone knows why this happens, I would be grateful to hear it.

ZeroEntries doesn’t always do what it should do (don’t ask me why).
Usually using:

  with b.localForm() as b_loc:
        b_loc.set(0)

then assembling should work.

1 Like