Hello @dokken,

Thanks for your answer.

I was able to find the bug in the dofmap.links() call above, hence now by running in parallel the code the code works and gives decent speedups.

Basically what I miss now to continue is a map between local cells in each processor and the global cells in the mesh (or perhaps a serial mesh which is only on the root processor). I have been giving a look at Gather solutions in parallel in FEniCSX - #2 by dokken but it is not clear to me how you construct this map.

The code is below.

```
import dolfinx
from mpi4py import MPI
import numpy as np
import cppyy
def get_num_entities(mesh, tdim):
# Create all connectivities manually (it used to exist a
# create_connectivity_all function)
for d0 in range(tdim):
for d1 in range(tdim):
mesh.topology.create_connectivity(d0, d1)
num_owned_entities = mesh.topology.index_map(tdim).size_local
num_ghost_entities = mesh.topology.index_map(tdim).num_ghosts
num_entities = num_owned_entities + num_ghost_entities
return num_entities
def get_num_cells(mesh):
tdim = mesh.topology.dim
return get_num_entities(mesh, tdim)
cppyy.cppdef(
"""
void* double_array(const std::vector<double>& v) {
double* pf = (double*)malloc(sizeof(double)*v.size());
for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
return pf;
}"""
)
cppyy.cppdef(
"""
void* int_array(const std::vector<int>& v) {
int* pf = (int*)malloc(sizeof(int)*v.size());
for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
return pf;
}"""
)
def create_double_array(vec):
array = cppyy.gbl.double_array(vec)
v = np.frombuffer(array, dtype=np.float64, count=len(vec))
return v
def create_int_array(vec):
array = cppyy.gbl.int_array(vec)
v = np.frombuffer(array, dtype=np.int32, count=len(vec))
return v
def create_list_of_arrays(v):
w = [None] * len(v)
for i in range(len(v)):
w[i] = create_double_array(v[i])
return w
# Main function
def generate_qr(mesh, NN, domain, degree):
cppyy.add_include_path("/usr/include/algoim/algoim")
if domain in ["square", "circle", "sphere"]:
hppfile = f"utils/{domain}.hpp"
else: # it is a gyroid, specific struct is specified later
hppfile = "utils/gyroids.hpp"
cppyy.include(hppfile)
do_map = True
do_scale = True
do_verbose = False
gdim = mesh.geometry.dim
num_cells = get_num_cells(mesh)
num_loc_vertices = 2**gdim
dofmap = mesh.geometry.dofmap
x = mesh.geometry.x
cell_coords = np.zeros((num_loc_vertices, gdim))
t = dolfinx.common.Timer()
for idx in range(num_cells):
dofs = dofmap.links(idx)
for j in range(num_loc_vertices):
cell_coords[j] = x[dofs[j], 0:gdim]
if gdim == 2:
xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1])])
xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1])])
else:
xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1]), min(cell_coords[:, 2])])
xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1]), max(cell_coords[:, 2])])
cppyy.gbl.run(
xmin, xmax, int(num_cells), domain, gdim, idx, int(idx), degree, do_verbose, do_map, do_scale
)
qr_pts_ = create_list_of_arrays(cppyy.gbl.get_qr_pts())
qr_w_ = create_list_of_arrays(cppyy.gbl.get_qr_w())
qr_pts_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_pts_bdry())
qr_w_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_w_bdry())
qr_n_ = create_list_of_arrays(cppyy.gbl.get_qr_n())
cut_cells_ = create_int_array(cppyy.gbl.get_cut_cells())
uncut_cells_ = create_int_array(cppyy.gbl.get_uncut_cells())
outside_cells_ = create_int_array(cppyy.gbl.get_outside_cells())
imap = mesh.topology.index_map(2)
print(imap.local_range)
# local_range = np.asarray(imap.local_range, dtype=np.int32) * dofmap.index_map_bs
# ranges = mesh.comm.gather(local_range, root=0)
# if mesh.comm.rank == 0:
# print(ranges)
qr_pts = mesh.comm.gather(qr_pts_, root=0)
qr_w = mesh.comm.gather(qr_w_, root=0)
qr_pts_bdry = mesh.comm.gather(qr_pts_bdry_, root=0)
qr_w_bdry = mesh.comm.gather(qr_w_bdry_, root=0)
qr_n = mesh.comm.gather(qr_n_, root=0)
cut_cells = mesh.comm.gather(cut_cells_, root=0)
uncut_cells = mesh.comm.gather(uncut_cells_, root=0)
outside_cells = mesh.comm.gather(outside_cells_, root=0)
if (mesh.comm.rank == 0):
qr_pts = [item for sublist in qr_pts for item in sublist]
qr_w = [item for sublist in qr_w for item in sublist]
qr_pts_bdry = [item for sublist in qr_pts_bdry for item in sublist]
qr_w_bdry = [item for sublist in qr_w_bdry for item in sublist]
qr_n = [item for sublist in qr_n for item in sublist]
cut_cells = [item for sublist in cut_cells for item in sublist]
uncut_cells = [item for sublist in uncut_cells for item in sublist]
outside_cells = [item for sublist in outside_cells for item in sublist]
print(f"Time for getting quadrature rule {t.elapsed()[0]}")
# Domain and mesh
xmin = np.array([0, 0])
xmax = np.array([1, 1])
N = 128
phi = "schoen-gyroid"
gdim = len(xmin)
if gdim == 2:
cell_type = dolfinx.mesh.CellType.quadrilateral
mesh_generator = dolfinx.mesh.create_rectangle
else:
cell_type = dolfinx.mesh.CellType.hexahedron
mesh_generator = dolfinx.mesh.create_box
NN = np.array([N] * gdim, dtype=np.int32)
mesh = mesh_generator(MPI.COMM_WORLD, np.array([xmin, xmax]), NN, cell_type, ghost_mode=dolfinx.cpp.mesh.GhostMode.none)
generate_qr(mesh, NN, phi, 1)
```