Missing Cells When Reading in Mesh in Parallel

Hello,

I am currently having some issues reading in a mesh with about 400,000 elements in parallel inside of fenicsX using XDMFFile. My code is as follows:

from petsc4py import PETSc
import numpy as np
from ufl import (TestFunction, TrialFunction, div, dot, dx, grad, nabla_grad, inner, outer, split, Identity, derivative, CellDiameter)
from basix.ufl import element, mixed_element
from dolfinx import la, default_scalar_type, log
from dolfinx.fem import (
    Constant,
    Expression,
    Function,
    dirichletbc,
    DirichletBC,
    functionspace,
    locate_dofs_topological,
)
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
from dolfinx.io import XDMFFile, VTXWriter
from mpi4py import MPI
from pathlib import Path
from petsc4py.PETSc import ScalarType
import adios4dolfinx


################# Read in Mesh
with XDMFFile(MPI.COMM_WORLD, "Aneur_400k_X.xdmf", "r") as xdmf:
    mesh = xdmf.read_mesh(name="Grid")
    cd = xdmf.read_meshtags(mesh, name="Grid")
mesh.topology.create_connectivity(mesh.topology.dim, mesh.topology.dim - 1)

with XDMFFile(MPI.COMM_WORLD, "Aneur_400k_X_facet.xdmf", "r") as xdmf:
    fd = xdmf.read_meshtags(mesh, name="Grid")
mesh.topology.create_connectivity(mesh.topology.dim - 1, mesh.topology.dim)

tdim = mesh.topology.dim
fdim = tdim - 1

################# Define Finite Elements

# Taylor-Hood Elements
K = 1
U_el = element("CG", mesh.basix_cell(), K + 1, shape = (mesh.geometry.dim,)) # Velocity Element
P_el = element("CG", mesh.basix_cell(), K) # Pressure Element


################# Define Function Space
W_el = mixed_element([U_el, P_el]) # Define Mixed element
W = functionspace(mesh, W_el) # Define Mixed Function Space


################# Define Trial and Test Functions
du = TrialFunction(W) # For Jacobian
up = Function(W)
u,p = split(up)

vq = TestFunction(W)
v,q = split(vq)


################# Define Function Spaces for Runge-Kutta Terms
V_sub, _ = W.sub(0).collapse()
u_1 = Function(V_sub)

################ Set up Initial Conditions using initial value for each RK Term
U_initial = 0.1 # Initial Speed
n_in = [-0.3419, 0.9251, 0.1651]
for i, vel in enumerate([-U_initial*n_in[0], -U_initial*n_in[1], -U_initial*n_in[2]]):
    u_1.sub(i).interpolate(lambda x, vel_in= vel: np.full(x.shape[1], vel_in, dtype=np.float64))

#### Check if Velocity was assigned properly
folder = Path("../scratch")
folder.mkdir(exist_ok=True, parents=True)
vtx_u = VTXWriter(mesh.comm, folder / "Vel_check.bp", [u_1], engine="BP4")
vtx_u.write(0.0)

My mesh was converted from a gmsh file to xdmf using:

from dolfinx.io import gmshio
from mpi4py import MPI
import meshio
import numpy as np

mesh, cell_markers, facet_markers = gmshio.read_from_msh("Aneur_400k_X.msh", MPI.COMM_WORLD, gdim=3)

def create_mesh(mesh, cell_type, prune_z=False):
    cells = mesh.get_cells_type(cell_type)
    cell_data = mesh.get_cell_data("gmsh:physical", cell_type)
    points = mesh.points[:, :3] if prune_z else mesh.points
    out_mesh = meshio.Mesh(points=points, cells={cell_type: cells}, cell_data={"name_to_read": [cell_data.astype(np.int32)]})
    return out_mesh
    
    
#if proc == 0:
# Read in mesh
msh = meshio.read("Aneur_400k_X.msh")

# Create and save one file for the mesh, and one file for the facets
tetra_mesh = create_mesh(msh, "tetra", prune_z=True) # Mesh domain
triangle_mesh = create_mesh(msh, "triangle", prune_z=True) # Mesh 2D surface
meshio.write("Aneur_400k_X", tetra_mesh)
meshio.write("Aneur_400k_X_facet.xdmf", triangle_mesh)
MPI.COMM_WORLD.barrier()

When I read in the mesh in serial, everything is fine and the mesh look correct in the output:

However, when I read the mesh in parallel, there are missing cells, as can be seen in the photos below:


I have tried reading in simple, smaller meshes in parallel using the above code, and it works fine. However, I have been reading in multiple different meshes for my geometry in parallel and have continued to see missing cells.

I appreciate any help, thanks!

Please avoid using meshio, and simply use

mesh, cell_markers, facet_markers = gmshio.read_from_msh("Aneur_400k_X.msh", MPI.COMM_WORLD, gdim=3)

in your main file. At the very least, this would allow us to determine if (1) there is something wrong in the conversion, or (2) there is an error/limitation in XDMFFile.

Hi Francesco,

Thanks for replying. I was actually just trying that:

from petsc4py import PETSc
import numpy as np
from ufl import (TestFunction, TrialFunction, div, dot, dx, grad, nabla_grad, inner, outer, split, Identity, derivative, CellDiameter)
from basix.ufl import element, mixed_element
from dolfinx import la, default_scalar_type, log
from dolfinx.fem import (
    Constant,
    Expression,
    Function,
    dirichletbc,
    DirichletBC,
    functionspace,
    locate_dofs_topological,
)
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
from dolfinx.io import XDMFFile, VTXWriter
from mpi4py import MPI
from pathlib import Path
from petsc4py.PETSc import ScalarType
import adios4dolfinx
from dolfinx.io import gmshio


################# Read in Mesh
mesh, cd, fd = gmshio.read_from_msh("Aneur_400k.msh", MPI.COMM_WORLD, gdim=3)

tdim = mesh.topology.dim
fdim = tdim - 1

################# Define Finite Elements

# Taylor-Hood Elements
K = 1
U_el = element("CG", mesh.basix_cell(), K + 1, shape = (mesh.geometry.dim,)) # Velocity Element
P_el = element("CG", mesh.basix_cell(), K) # Pressure Element


################# Define Function Space
W_el = mixed_element([U_el, P_el]) # Define Mixed element
W = functionspace(mesh, W_el) # Define Mixed Function Space


################# Define Trial and Test Functions
du = TrialFunction(W) # For Jacobian
up = Function(W)
u,p = split(up)

vq = TestFunction(W)
v,q = split(vq)


################# Define Function Spaces for Runge-Kutta Terms
V_sub, _ = W.sub(0).collapse()
u_1 = Function(V_sub)

################ Set up Initial Conditions using initial value for each RK Term
U_initial = 0.1 # Initial Speed
n_in = [-0.3419, 0.9251, 0.1651]
for i, vel in enumerate([-U_initial*n_in[0], -U_initial*n_in[1], -U_initial*n_in[2]]):
    u_1.sub(i).interpolate(lambda x, vel_in= vel: np.full(x.shape[1], vel_in, dtype=np.float64))

#### Check if Velocity was assigned properly
folder = Path("../scratch")
folder.mkdir(exist_ok=True, parents=True)
vtx_u = VTXWriter(mesh.comm, folder / "Vel_check.bp", [u_1], engine="BP4")
vtx_u.write(0.0)

Importing with gmshio seems to be fine in parallel, I did not seem to not notice any missing cells when I inspected the output.

Great. If you want to debug further, please export to XDMFFile the mesh (and the corresponding mesh tags) you read with gmshio, and then try to re-load them back in. There is no need to create two separate xdmf files.

If that works, it should mean that the issue was introduced with the conversion by meshio. We don’t maintain meshio, so it’s not up to us to fix that.

If that doesn’t work, it should mean that something odd is happening when writing and/or read from xdmf file.

I debugged further using:

from petsc4py import PETSc
import numpy as np
from ufl import (TestFunction, TrialFunction, div, dot, dx, grad, nabla_grad, inner, outer, split, Identity, derivative, CellDiameter)
from basix.ufl import element, mixed_element
from dolfinx import la, default_scalar_type, log
from dolfinx.fem import (
    Constant,
    Expression,
    Function,
    dirichletbc,
    DirichletBC,
    functionspace,
    locate_dofs_topological,
)
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
from dolfinx.io import XDMFFile, VTXWriter
from mpi4py import MPI
from pathlib import Path
from petsc4py.PETSc import ScalarType
import adios4dolfinx
from dolfinx.io import gmshio


################# Read in Mesh
mesh_in, cd_in, fd_in = gmshio.read_from_msh("Aneur_400k.msh", MPI.COMM_WORLD, gdim=3)
mesh_in.name = "Grid"
cd_in.name = "Cells"
fd_in.name = "Facets"

################# Write Mesh
with XDMFFile(mesh_in.comm, "mesh1.xdmf", "w") as xdmf:
    xdmf.write_mesh(mesh_in)
    xdmf.write_meshtags(fd_in, mesh_in.geometry)
    xdmf.write_meshtags(cd_in, mesh_in.geometry)
    
with XDMFFile(MPI.COMM_WORLD, "mesh1.xdmf", "r") as xdmf:
    mesh_out = xdmf.read_mesh(name="Grid")
    cd_out = xdmf.read_meshtags(mesh_out, name="Cells")
    mesh_out.topology.create_connectivity(mesh_out.topology.dim, mesh_out.topology.dim - 1)
    
    fd_out = xdmf.read_meshtags(mesh_out, name = "Facets")
    mesh_out.topology.create_connectivity(mesh_out.topology.dim - 1, mesh_out.topology.dim)
    
    
with XDMFFile(mesh_out.comm, "mesh2.xdmf", "w") as xdmf:
    xdmf.write_mesh(mesh_out)
    xdmf.write_meshtags(fd_out, mesh_out.geometry)
    xdmf.write_meshtags(cd_out, mesh_out.geometry)
    
# Taylor-Hood Elements
K = 1
U_el = element("CG", mesh_out.basix_cell(), K + 1, shape = (mesh_out.geometry.dim,)) # Velocity Element
P_el = element("CG", mesh_out.basix_cell(), K) # Pressure Element


################# Define Function Space
W_el = mixed_element([U_el, P_el]) # Define Mixed element
W = functionspace(mesh_out, W_el) # Define Mixed Function Space


################# Define Trial and Test Functions
du = TrialFunction(W) # For Jacobian
up = Function(W)
u,p = split(up)

vq = TestFunction(W)
v,q = split(vq)


################# Define Function Spaces for Runge-Kutta Terms
V_sub, _ = W.sub(0).collapse()
u_1 = Function(V_sub)

U_initial = 0.1 # Initial Speed
n_in = [-0.3419, 0.9251, 0.1651]
for i, vel in enumerate([-U_initial*n_in[0], -U_initial*n_in[1], -U_initial*n_in[2]]):
    u_1.sub(i).interpolate(lambda x, vel_in= vel: np.full(x.shape[1], vel_in, dtype=np.float64))

vtx_u = VTXWriter(mesh_out.comm, "Vel_check.bp", [u_1], engine="BP4")
vtx_u.write(0.0)

Interestingly enough, both of the XDMF mesh outputs when this code is ran in parallel looks great. However, the file output by VTX in this script is missing cells when ran in parallel (but works fine in serial). Hence, considering meshes read in from gmshio work with VTX in parallel, are meshes read in from the XDMF format just not compatible with VTX when ran in parallel? I would use XDMF to output stuff, but it wouldn’t be great since XDMF requires linear functions spaces and I need higher order fields for my research.

You’ll need to provide a mesh for anyone to be able to reproduce. At the same time, if only the 400k mesh shows the issue and it requires a cluster, not everyone (myself, for instance) will have a cluster at their disposal to test this.

It might be that you are seeing a visualization artifact of VTX with Paraview.
Could you check that the number of local cells and sum of them equate to the number of global cells in your mesh, i.e.
Check

cell_map = mesh.topology.index_map(mesh.topology.dim)
num_cells_local = cell_map.size_local
num_cells_global = cell_map.size_global
accum_cells = mesh.comm.allreduce(num_cells_local, op=MPI.SUM)
assert accum_cells == num_cells_global

and check that this is the number of cells reported in paraview?
As you can see from the VTX source code, we store only owned cells:

same as the XDMFFile: dolfinx/cpp/dolfinx/io/xdmf_mesh.cpp at 1dd03964fa6ca0e893d71466ec66ffd1186c1698 · FEniCS/dolfinx · GitHub

Hi Francesco,

The mesh can be found at:

I have had success locally running the debugging script I sent on just a few processes (i.e. mpirun -n 2 python3 iotest.py). This successfully reproduces the error.

Hi Dokken,

I tried this:

from petsc4py import PETSc
import numpy as np
from ufl import (TestFunction, TrialFunction, div, dot, dx, grad, nabla_grad, inner, outer, split, Identity, derivative, CellDiameter)
from basix.ufl import element, mixed_element
from dolfinx import la, default_scalar_type, log
from dolfinx.fem import (
    Constant,
    Expression,
    Function,
    dirichletbc,
    DirichletBC,
    functionspace,
    locate_dofs_topological,
)
from dolfinx.fem.petsc import NonlinearProblem
from dolfinx.nls.petsc import NewtonSolver
from dolfinx.io import XDMFFile, VTXWriter, gmshio
from mpi4py import MPI
from pathlib import Path
from petsc4py.PETSc import ScalarType
import adios4dolfinx


################# Read in Mesh
#mesh, cd, fd = gmshio.read_from_msh("Aneur_400k.msh", MPI.COMM_WORLD, gdim=3)
with XDMFFile(MPI.COMM_WORLD, "Aneur_400k.xdmf", "r") as xdmf:
    mesh = xdmf.read_mesh(name="Grid")
    cd = xdmf.read_meshtags(mesh, name="Cells")
    mesh.topology.create_connectivity(mesh.topology.dim, mesh.topology.dim - 1)
    
    fd = xdmf.read_meshtags(mesh, name = "Facets")
    mesh.topology.create_connectivity(mesh.topology.dim - 1, mesh.topology.dim)

tdim = mesh.topology.dim
fdim = tdim - 1

################# Define Finite Elements

# Taylor-Hood Elements
K = 1
U_el = element("CG", mesh.basix_cell(), K + 1, shape = (mesh.geometry.dim,)) # Velocity Element
P_el = element("CG", mesh.basix_cell(), K) # Pressure Element


################# Define Function Space
W_el = mixed_element([U_el, P_el]) # Define Mixed element
W = functionspace(mesh, W_el) # Define Mixed Function Space


################# Define Trial and Test Functions
du = TrialFunction(W) # For Jacobian
up = Function(W)
u,p = split(up)

vq = TestFunction(W)
v,q = split(vq)


################# Define Function Spaces for Runge-Kutta Terms
V_sub, _ = W.sub(0).collapse()
u_1 = Function(V_sub)

################ Set up Initial Conditions using initial value for each RK Term
U_initial = 0.1 # Initial Speed
n_in = [-0.3419, 0.9251, 0.1651]
for i, vel in enumerate([-U_initial*n_in[0], -U_initial*n_in[1], -U_initial*n_in[2]]):
    u_1.sub(i).interpolate(lambda x, vel_in= vel: np.full(x.shape[1], vel_in, dtype=np.float64))

#### Check if Velocity was assigned properly
vtx_u = VTXWriter(mesh.comm, "Vel_check.bp", [u_1], engine="BP4")
vtx_u.write(0.0)


cell_map = mesh.topology.index_map(mesh.topology.dim)
num_cells_local = cell_map.size_local
num_cells_global = cell_map.size_global
accum_cells = mesh.comm.allreduce(num_cells_local, op=MPI.SUM)
assert accum_cells == num_cells_global

print(num_cells_local)
print(accum_cells)
print(num_cells_global)

When I load the mesh in parallel using XDMF, accum_cells does equal num_cells_global. However, this number does not match the number of cells in paraview. The number of cells printed for accum_cells/num_cells_global is 401854, the correct number of cells displayed in gmsh, but in paraview there are 403766 cells reported. However, when I run in parallel using gmshio or serially with XDMF, the number displayed in paraview matches that of the number of accum/global cells.

When I run the XDMF import in parallel and do:

cell_map = mesh.topology.index_map(mesh.topology.dim)
num_cells_local = cell_map.size_local
num_ghost= cell_map.num_ghosts
local_sum = num_cells_local+num_ghost
global_sum = mesh.comm.allreduce(local_sum, op=MPI.SUM)

print(global_sum)

I obtain an answer of 403766, the number of cells reported in paraview. Hence, it looks like it is outputting cells that aren’t owned by the process.

Furthermore, when I read the mesh in parallel using gmshio, there are no ghost cells, which might be why VTX is working with gmshio and not XDMF.

Right, so you are writing a function to file, not a mesh to file. That will explain the duplicate cells:

as VTXWriter writes a local chunk of the mesh (owned + ghosts) to file, as otherwise it would get rather tricky to avoid floating nodes.