Cell numbering: inconsistent gmsh <=> dolfinx numbering

As you can see here, this has nothing to do with DOLFINx.
This is extracting each cell (entity) in GMSH in the order, and getting the nodal definition of them.

Here is a slightly adapted version of `dolfinx.io.gmshio.model_to_mesh, that can read in scalar fields or vector fields, as long as they are defined with physical groups.

"""
Extract GMSH data (float values) from a mesh and assign to dolfinx mesh
Author: Jørgen S. Dokken
SPDX-License-Identifier: LGPL-3.0-or-later
Certain functions for topology extraction is adapted from DOLFINx (dolfinx.io.gmshio)
and is licensed under the LGPL-3.0-or-later license. The original code can be found at
https://github.com/FEniCS/dolfinx/blob/main/python/dolfinx/io/gmshio.py
"""

import dolfinx.io.gmshio
import dolfinx.io.gmshio
import numpy as np
from mpi4py import MPI
from dolfinx import fem
import gmsh
from mpi4py import MPI
import typing
import numpy.typing as npt


class ExtendedTopologyDict(dolfinx.io.gmshio.TopologyDict):
    element_tags: npt.NDArray[typing.Any]


def extract_topology_and_markers(
    model, name: typing.Optional[str] = None
) -> tuple[dict[int, ExtendedTopologyDict], dict[str, tuple[int, int]]]:
    """Extract all entities tagged with a physical marker in the gmsh model.

    Returns a nested dictionary where the first key is the gmsh MSH
    element type integer. Each element type present in the model
    contains the cell topology of the elements and corresponding
    markers.

    Args:
        model: Gmsh model.
        name: Name of the gmsh model. If not set the current
            model will be used.

    Returns:
        A tuple ``(topologies, physical_groups)``, where ``topologies`` is a
        nested dictionary where each key corresponds to a gmsh cell
        type. Each cell type found in the mesh has a 2D array containing
        the topology of the marked cell and a list with the
        corresponding markers. ``physical_groups`` is a dictionary where the key
        is the physical name and the value is a tuple with the dimension
        and tag.

    """
    if name is not None:
        model.setCurrent(name)

    # Get the physical groups from gmsh in the form [(dim1, tag1),
    # (dim1, tag2), (dim2, tag3),...]
    phys_grps = model.getPhysicalGroups()
    topologies: dict[int, ExtendedTopologyDict] = {}
    # Create a dictionary with the physical groups where the key is the
    # physical name and the value is a tuple with the dimension and tag
    physical_groups: dict[str, tuple[int, int]] = {}
    for dim, tag in phys_grps:
        # Get the entities of dimension `dim`, dim=0 -> Points, dim=1 -
        # >Lines, dim=2 -> Triangles/Quadrilaterals, etc.
        entities = model.getEntitiesForPhysicalGroup(dim, tag)
        for entity in entities:
            # Get cell type, list of cells with given tag and topology
            # of tagged cells NOTE: Assumes that each entity only have
            # one cell-type,
            # i.e. facets of prisms and pyramid meshes are not supported
            (entity_types, entity_tags, entity_topologies) = model.mesh.getElements(
                dim, tag=entity
            )
            assert len(entity_types) == 1

            # Determine number of local nodes per element to create the
            # topology of the elements
            properties = model.mesh.getElementProperties(entity_types[0])
            name, dim, _, num_nodes, _, _ = properties

            # Array of shape (num_elements,num_nodes_per_element)
            # containing the topology of the elements on this entity.
            # NOTE: Gmsh indexing starts with one, we therefore subtract
            # 1 from each node to use zero-based numbering
            topology = entity_topologies[0].reshape(-1, num_nodes) - 1

            # Create marker array of length of number of tagged cells
            marker = np.full_like(entity_tags[0], tag)

            # Group element topology and markers of the same entity type
            entity_type = entity_types[0]
            if entity_type in topologies.keys():
                topologies[entity_type]["topology"] = np.concatenate(
                    (topologies[entity_type]["topology"], topology), axis=0
                )
                topologies[entity_type]["cell_data"] = np.hstack(
                    [topologies[entity_type]["cell_data"], marker]
                )
                topologies[entity_type]["element_tags"] = np.hstack(
                    [topologies[entity_type]["element_tags"], entity_tags[0]]
                )
            else:
                topologies[entity_type] = {
                    "topology": topology,
                    "cell_data": marker,
                    "element_tags": entity_tags[0],
                }

        physical_groups[model.getPhysicalName(dim, tag)] = (dim, tag)

    return topologies, physical_groups


def create_msh_file():
    # Create geometric description of mesh with physical groups
    gmsh.initialize()
    rectangle = gmsh.model.occ.addRectangle(0, 0, 0, 1, 1)
    gmsh.model.occ.synchronize()
    gmsh.model.addPhysicalGroup(2, [rectangle], 1)
    gmsh.model.mesh.generate(2)
    # Add data to physical group
    phys_grps = gmsh.model.getPhysicalGroups()
    all_2D_tags = []
    all_2D_topologies = []
    for dim, tag in phys_grps:
        if dim == 2:
            entities = gmsh.model.getEntitiesForPhysicalGroup(dim, tag)
            for entity in entities:
                # All marked 2D tags
                (entity_types, entity_tags, entity_topologies) = (
                    gmsh.model.mesh.getElements(dim, tag=entity)
                )
                # Reshape topology to (num_cells, num_nodes_per_cell)
                properties = gmsh.model.mesh.getElementProperties(entity_types[0])
                name, dim, _, num_nodes, _, _ = properties
                all_2D_tags.append(entity_tags)
                all_2D_topologies.append(entity_topologies[0].reshape(-1, num_nodes))

    tags_for_computation = np.hstack(all_2D_tags)
    topologies_for_computation = np.vstack(all_2D_topologies)
    node_tags, xyz, _ = gmsh.model.mesh.getNodes()

    tag2idx = {_tag: _idx for (_idx, _tag) in enumerate(node_tags)}
    vertices = xyz.reshape(-1, 3)
    cells = np.vectorize(tag2idx.get)(topologies_for_computation)
    barycenters = np.einsum("tvx -> tx", vertices[cells]) / vertices[cells].shape[2]
    g_arr = barycenters[:, :2]
    view_tag = gmsh.view.add("g(x, y)")
    gmsh.view.addModelData(
        view_tag, 0, "", "ElementData", tags_for_computation[0], g_arr
    )

    # NOTE: Needs this special write to write view data
    gmsh.view.write(view_tag, "mesh.msh")
    # gmsh.fltk.run()

    gmsh.finalize()


# Create mesh

comm = MPI.COMM_WORLD
rank = 0
gdim = 2
dtype = np.float64
if comm.rank == 0:
    gmsh.initialize()
    create_msh_file()
comm.Barrier()
if comm.rank == rank:
    gmsh.initialize()
    gmsh.merge("mesh.msh")
    tags = gmsh.view.getTags()
    assert len(tags) == 1

    # Extract geometry
    x = dolfinx.io.gmshio.extract_geometry(gmsh.model)

    # Extract topology
    topologies, physical_groups = extract_topology_and_markers(gmsh.model)

    # Extract Gmsh cell id, dimension of cell and number of nodes to
    # cell for each
    num_cell_types = len(topologies.keys())
    cell_information = dict()
    cell_dimensions = np.zeros(num_cell_types, dtype=np.int32)
    for i, element in enumerate(topologies.keys()):
        _, dim, _, num_nodes, _, _ = gmsh.model.mesh.getElementProperties(element)
        cell_information[i] = {"id": element, "dim": dim, "num_nodes": num_nodes}
        cell_dimensions[i] = dim

    # Sort elements by ascending dimension
    perm_sort = np.argsort(cell_dimensions)
    # Broadcast cell type data and geometric dimension
    cell_id = cell_information[perm_sort[-1]]["id"]
    tdim = cell_information[perm_sort[-1]]["dim"]

    num_nodes = cell_information[perm_sort[-1]]["num_nodes"]
    cell_id, num_nodes = comm.bcast([cell_id, num_nodes], root=rank)
    cells = np.asarray(topologies[cell_id]["topology"], dtype=np.int64)
    cell_values = np.asarray(topologies[cell_id]["cell_data"], dtype=np.int32)
    physical_groups = comm.bcast(physical_groups, root=rank)
else:
    cell_id, num_nodes = comm.bcast([None, None], root=rank)
    cells, x = (
        np.empty([0, num_nodes], dtype=np.int32),
        np.empty([0, gdim], dtype=dtype),
    )
    cell_values = np.empty((0,), dtype=np.int32)
    physical_groups = comm.bcast(None, root=rank)

# Create distributed mesh
ufl_domain = dolfinx.io.gmshio.ufl_mesh(cell_id, gdim, dtype=dtype)
gmsh_cell_perm = dolfinx.io.gmshio.cell_perm_array(
    dolfinx.cpp.mesh.to_type(str(ufl_domain.ufl_cell())), num_nodes
)
cells = cells[:, gmsh_cell_perm].copy()
partitioner = dolfinx.cpp.mesh.create_cell_partitioner(
    dolfinx.mesh.GhostMode.shared_facet
)
mesh = dolfinx.mesh.create_mesh(
    comm, cells, x[:, :gdim].astype(dtype, copy=False), ufl_domain, partitioner
)

# Create MeshTags for cells
local_entities, local_values = dolfinx.io.gmshio.distribute_entity_data(
    mesh, mesh.topology.dim, cells, cell_values
)
mesh.topology.create_connectivity(mesh.topology.dim, 0)
adj = dolfinx.graph.adjacencylist(local_entities)
ct = dolfinx.mesh.meshtags_from_entities(
    mesh, mesh.topology.dim, adj, local_values.astype(np.int32, copy=False)
)
ct.name = "Cell tags"


def find_position(data, values):
    """
    Find the position in values of each entry in data
    Example:
        .. highlight:: python
        .. code-block:: python
            values = np.array([4, 5, 1, 3, 2], dtype=np.int32)
            data = np.array([1, 2, 3, 4, 5, 2, 1], dtype=np.int32)
            b = find_position(data, values) # [2,4,3,0,1,4 2]
    """
    if len(data) == 0:
        return np.zeros(0, dtype=np.int32)
    return (values == data[:, None]).argmax(1)


if comm.rank == 0:
    # Extract the new data in order
    cell_data = topologies[cell_id]["element_tags"]
    tags = gmsh.view.getTags()
    model_data = gmsh.view.get_model_data(tags[0], 0)
    _, tags, data, time, num_components = model_data
    position = find_position(cell_data, tags)
    sorted_data = np.asarray(data).reshape(-1, num_components)[position]
else:
    cell_data = np.zeros(0, dtype=np.int64)
    num_components = 0

num_components = comm.bcast(num_components, root=0)
if comm.rank != 0:
    sorted_data = np.zeros((0, num_components), dtype=dtype)

# Gather the data on all processes
# NOTE: This could be done in a more efficient way
sorted_data = comm.bcast(sorted_data, root=0)
cell_data = sorted_data[mesh.topology.original_cell_index, :]

Q = fem.functionspace(mesh, ("DG", 0, (num_components,)))
q = fem.Function(Q)
q.x.array[:] = cell_data.flatten()

with dolfinx.io.XDMFFile(mesh.comm, "data.xdmf", "w") as xdmf:
    xdmf.write_mesh(mesh)
    xdmf.write_function(q)