Hello @dokken,
Thanks for your answer.
I was able to find the bug in the dofmap.links() call above, hence now by running in parallel the code the code works and gives decent speedups.
Basically what I miss now to continue is a map between local cells in each processor and the global cells in the mesh (or perhaps a serial mesh which is only on the root processor). I have been giving a look at Gather solutions in parallel in FEniCSX - #2 by dokken but it is not clear to me how you construct this map.
The code is below.
import dolfinx
from mpi4py import MPI
import numpy as np
import cppyy
def get_num_entities(mesh, tdim):
# Create all connectivities manually (it used to exist a
# create_connectivity_all function)
for d0 in range(tdim):
for d1 in range(tdim):
mesh.topology.create_connectivity(d0, d1)
num_owned_entities = mesh.topology.index_map(tdim).size_local
num_ghost_entities = mesh.topology.index_map(tdim).num_ghosts
num_entities = num_owned_entities + num_ghost_entities
return num_entities
def get_num_cells(mesh):
tdim = mesh.topology.dim
return get_num_entities(mesh, tdim)
cppyy.cppdef(
"""
void* double_array(const std::vector<double>& v) {
double* pf = (double*)malloc(sizeof(double)*v.size());
for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
return pf;
}"""
)
cppyy.cppdef(
"""
void* int_array(const std::vector<int>& v) {
int* pf = (int*)malloc(sizeof(int)*v.size());
for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
return pf;
}"""
)
def create_double_array(vec):
array = cppyy.gbl.double_array(vec)
v = np.frombuffer(array, dtype=np.float64, count=len(vec))
return v
def create_int_array(vec):
array = cppyy.gbl.int_array(vec)
v = np.frombuffer(array, dtype=np.int32, count=len(vec))
return v
def create_list_of_arrays(v):
w = [None] * len(v)
for i in range(len(v)):
w[i] = create_double_array(v[i])
return w
# Main function
def generate_qr(mesh, NN, domain, degree):
cppyy.add_include_path("/usr/include/algoim/algoim")
if domain in ["square", "circle", "sphere"]:
hppfile = f"utils/{domain}.hpp"
else: # it is a gyroid, specific struct is specified later
hppfile = "utils/gyroids.hpp"
cppyy.include(hppfile)
do_map = True
do_scale = True
do_verbose = False
gdim = mesh.geometry.dim
num_cells = get_num_cells(mesh)
num_loc_vertices = 2**gdim
dofmap = mesh.geometry.dofmap
x = mesh.geometry.x
cell_coords = np.zeros((num_loc_vertices, gdim))
t = dolfinx.common.Timer()
for idx in range(num_cells):
dofs = dofmap.links(idx)
for j in range(num_loc_vertices):
cell_coords[j] = x[dofs[j], 0:gdim]
if gdim == 2:
xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1])])
xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1])])
else:
xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1]), min(cell_coords[:, 2])])
xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1]), max(cell_coords[:, 2])])
cppyy.gbl.run(
xmin, xmax, int(num_cells), domain, gdim, idx, int(idx), degree, do_verbose, do_map, do_scale
)
qr_pts_ = create_list_of_arrays(cppyy.gbl.get_qr_pts())
qr_w_ = create_list_of_arrays(cppyy.gbl.get_qr_w())
qr_pts_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_pts_bdry())
qr_w_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_w_bdry())
qr_n_ = create_list_of_arrays(cppyy.gbl.get_qr_n())
cut_cells_ = create_int_array(cppyy.gbl.get_cut_cells())
uncut_cells_ = create_int_array(cppyy.gbl.get_uncut_cells())
outside_cells_ = create_int_array(cppyy.gbl.get_outside_cells())
imap = mesh.topology.index_map(2)
print(imap.local_range)
# local_range = np.asarray(imap.local_range, dtype=np.int32) * dofmap.index_map_bs
# ranges = mesh.comm.gather(local_range, root=0)
# if mesh.comm.rank == 0:
# print(ranges)
qr_pts = mesh.comm.gather(qr_pts_, root=0)
qr_w = mesh.comm.gather(qr_w_, root=0)
qr_pts_bdry = mesh.comm.gather(qr_pts_bdry_, root=0)
qr_w_bdry = mesh.comm.gather(qr_w_bdry_, root=0)
qr_n = mesh.comm.gather(qr_n_, root=0)
cut_cells = mesh.comm.gather(cut_cells_, root=0)
uncut_cells = mesh.comm.gather(uncut_cells_, root=0)
outside_cells = mesh.comm.gather(outside_cells_, root=0)
if (mesh.comm.rank == 0):
qr_pts = [item for sublist in qr_pts for item in sublist]
qr_w = [item for sublist in qr_w for item in sublist]
qr_pts_bdry = [item for sublist in qr_pts_bdry for item in sublist]
qr_w_bdry = [item for sublist in qr_w_bdry for item in sublist]
qr_n = [item for sublist in qr_n for item in sublist]
cut_cells = [item for sublist in cut_cells for item in sublist]
uncut_cells = [item for sublist in uncut_cells for item in sublist]
outside_cells = [item for sublist in outside_cells for item in sublist]
print(f"Time for getting quadrature rule {t.elapsed()[0]}")
# Domain and mesh
xmin = np.array([0, 0])
xmax = np.array([1, 1])
N = 128
phi = "schoen-gyroid"
gdim = len(xmin)
if gdim == 2:
cell_type = dolfinx.mesh.CellType.quadrilateral
mesh_generator = dolfinx.mesh.create_rectangle
else:
cell_type = dolfinx.mesh.CellType.hexahedron
mesh_generator = dolfinx.mesh.create_box
NN = np.array([N] * gdim, dtype=np.int32)
mesh = mesh_generator(MPI.COMM_WORLD, np.array([xmin, xmax]), NN, cell_type, ghost_mode=dolfinx.cpp.mesh.GhostMode.none)
generate_qr(mesh, NN, phi, 1)