Parallel computation of quadrature rules

Hello @dokken,

Thanks for your answer.

I was able to find the bug in the dofmap.links() call above, hence now by running in parallel the code the code works and gives decent speedups.

Basically what I miss now to continue is a map between local cells in each processor and the global cells in the mesh (or perhaps a serial mesh which is only on the root processor). I have been giving a look at Gather solutions in parallel in FEniCSX - #2 by dokken but it is not clear to me how you construct this map.

The code is below.

import dolfinx
from mpi4py import MPI
import numpy as np
import cppyy

def get_num_entities(mesh, tdim):
    # Create all connectivities manually (it used to exist a
    # create_connectivity_all function)
    for d0 in range(tdim):
        for d1 in range(tdim):
            mesh.topology.create_connectivity(d0, d1)
    num_owned_entities = mesh.topology.index_map(tdim).size_local
    num_ghost_entities = mesh.topology.index_map(tdim).num_ghosts
    num_entities = num_owned_entities + num_ghost_entities
    return num_entities


def get_num_cells(mesh):
    tdim = mesh.topology.dim
    return get_num_entities(mesh, tdim)


cppyy.cppdef(
    """
    void* double_array(const std::vector<double>& v) {
      double* pf = (double*)malloc(sizeof(double)*v.size());
      for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
      return pf;
    }"""
)

cppyy.cppdef(
    """
    void* int_array(const std::vector<int>& v) {
      int* pf = (int*)malloc(sizeof(int)*v.size());
      for (std::size_t i = 0; i < v.size(); ++i) pf[i] = v[i];
      return pf;
    }"""
)


def create_double_array(vec):
    array = cppyy.gbl.double_array(vec)
    v = np.frombuffer(array, dtype=np.float64, count=len(vec))
    return v


def create_int_array(vec):
    array = cppyy.gbl.int_array(vec)
    v = np.frombuffer(array, dtype=np.int32, count=len(vec))
    return v


def create_list_of_arrays(v):
    w = [None] * len(v)
    for i in range(len(v)):
        w[i] = create_double_array(v[i])
    return w


# Main function
def generate_qr(mesh, NN, domain, degree):
    cppyy.add_include_path("/usr/include/algoim/algoim")
    if domain in ["square", "circle", "sphere"]:
        hppfile = f"utils/{domain}.hpp"
    else:  # it is a gyroid, specific struct is specified later
        hppfile = "utils/gyroids.hpp"

    cppyy.include(hppfile)
    do_map = True
    do_scale = True
    do_verbose = False

    gdim = mesh.geometry.dim
    num_cells = get_num_cells(mesh)

    num_loc_vertices = 2**gdim
    dofmap = mesh.geometry.dofmap
    x = mesh.geometry.x
    cell_coords = np.zeros((num_loc_vertices, gdim))

    t = dolfinx.common.Timer()
    
    for idx in range(num_cells):
        dofs = dofmap.links(idx)
        for j in range(num_loc_vertices):
            cell_coords[j] = x[dofs[j], 0:gdim]
        if gdim == 2:
            xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1])])
            xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1])])
        else:
            xmin = np.array([min(cell_coords[:, 0]), min(cell_coords[:, 1]), min(cell_coords[:, 2])])
            xmax = np.array([max(cell_coords[:, 0]), max(cell_coords[:, 1]), max(cell_coords[:, 2])])
    
        cppyy.gbl.run(
            xmin, xmax, int(num_cells), domain, gdim, idx, int(idx), degree, do_verbose, do_map, do_scale
        )
    
    qr_pts_ = create_list_of_arrays(cppyy.gbl.get_qr_pts())
    qr_w_ = create_list_of_arrays(cppyy.gbl.get_qr_w())
    qr_pts_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_pts_bdry())
    qr_w_bdry_ = create_list_of_arrays(cppyy.gbl.get_qr_w_bdry())
    qr_n_ = create_list_of_arrays(cppyy.gbl.get_qr_n())
    
    cut_cells_ = create_int_array(cppyy.gbl.get_cut_cells())
    uncut_cells_ = create_int_array(cppyy.gbl.get_uncut_cells())
    outside_cells_ = create_int_array(cppyy.gbl.get_outside_cells())

    imap = mesh.topology.index_map(2)
    print(imap.local_range)
    # local_range = np.asarray(imap.local_range, dtype=np.int32) * dofmap.index_map_bs
    # ranges = mesh.comm.gather(local_range, root=0)

    # if mesh.comm.rank == 0:
    #     print(ranges)

    qr_pts = mesh.comm.gather(qr_pts_, root=0)
    qr_w = mesh.comm.gather(qr_w_, root=0)
    qr_pts_bdry = mesh.comm.gather(qr_pts_bdry_, root=0)
    qr_w_bdry = mesh.comm.gather(qr_w_bdry_, root=0)
    qr_n = mesh.comm.gather(qr_n_, root=0)
    cut_cells = mesh.comm.gather(cut_cells_, root=0)
    uncut_cells = mesh.comm.gather(uncut_cells_, root=0)
    outside_cells = mesh.comm.gather(outside_cells_, root=0)
    if (mesh.comm.rank == 0):
        qr_pts = [item for sublist in qr_pts for item in sublist]
        qr_w = [item for sublist in qr_w for item in sublist]
        qr_pts_bdry = [item for sublist in qr_pts_bdry for item in sublist]
        qr_w_bdry = [item for sublist in qr_w_bdry for item in sublist]
        qr_n = [item for sublist in qr_n for item in sublist]
        cut_cells = [item for sublist in cut_cells for item in sublist]
        uncut_cells = [item for sublist in uncut_cells for item in sublist]
        outside_cells = [item for sublist in outside_cells for item in sublist]

        print(f"Time for getting quadrature rule {t.elapsed()[0]}")

# Domain and mesh
xmin = np.array([0, 0])
xmax = np.array([1, 1])
N = 128
phi = "schoen-gyroid"

gdim = len(xmin)

if gdim == 2:
    cell_type = dolfinx.mesh.CellType.quadrilateral
    mesh_generator = dolfinx.mesh.create_rectangle
else:
    cell_type = dolfinx.mesh.CellType.hexahedron
    mesh_generator = dolfinx.mesh.create_box

NN = np.array([N] * gdim, dtype=np.int32)
mesh = mesh_generator(MPI.COMM_WORLD, np.array([xmin, xmax]), NN, cell_type, ghost_mode=dolfinx.cpp.mesh.GhostMode.none)
generate_qr(mesh, NN, phi, 1)