Different result when summing forms and assembling vs assembling summed forms

When using a custom quadrature routine on each cell in a mesh, I get a different result for the \ell^1 norm when assembling two forms and adding the resulting matrices versus assembling the sum of the forms. Consider the following MWE:

#Import packages
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx import mesh as dmesh, fem
import dolfinx.fem.petsc
import ufl
import numpy as np
import basix

def verts_to_bbox(verts):
    assert verts.shape == (3, 4)
    xL, xU = verts[0].min(), verts[0].max()
    yL, yU = verts[1].min(), verts[1].max()
    bbox = np.round([xL, yL, xU, yU], decimals=9)
    return bbox

#Problem parameters
nx, ny = 2, 2 #Number of quadrilaterals in x and y directions

#Define quadrature rule for each quadrilateral
x_vals = np.linspace(0., 1., nx+1)
y_vals = np.linspace(0., 1., ny+1)
quad = {}
for i in range(nx):
    for j in range(ny):
        bbox = np.round([x_vals[i], y_vals[j], x_vals[i+1], y_vals[j+1]], decimals=9)
        qp, qw = basix.make_quadrature(basix.CellType.quadrilateral, (i+j)%4+1)
        quad[tuple(bbox)] = [qp, qw]

#Create mesh
mesh = dmesh.create_unit_square(MPI.COMM_WORLD, nx, ny, dmesh.CellType.quadrilateral)
comm = mesh.comm
tdim = mesh.topology.dim
imap = mesh.topology.index_map(tdim)
all_cells = np.arange(imap.size_local+imap.num_ghosts, dtype=np.int32)
cell_tags = dmesh.meshtags(mesh, tdim, entities=all_cells, values=all_cells)
dx_all = ufl.Measure('dx', domain=mesh, subdomain_id=tuple(all_cells), subdomain_data=cell_tags)

#Define function space
V = fem.functionspace(mesh, ('CG', 1))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)

#Define form0
form0 = ufl.inner(ufl.grad(u), ufl.grad(v))*dx_all

#Define form1
c2v = dmesh.entities_to_geometry(mesh, tdim, all_cells, False)
verts = mesh.geometry.x
form1 = PETSc.ScalarType(0.)
for cut_cell in all_cells:

    #Get bounding box
    cell_verts = verts[c2v[cut_cell]].T
    bbox = verts_to_bbox(cell_verts)

    #Create boundary term
    qp, qw = quad[tuple(bbox)]
    metadata={'quadrature_rule': 'custom', 'quadrature_points': qp, 'quadrature_weights':qw}
    dx_cell = ufl.Measure('dx', domain=mesh, subdomain_id=cut_cell, subdomain_data=cell_tags, metadata=metadata)
    form1 += u*v*dx_cell

#Assemble form0
mat0 = fem.petsc.assemble_matrix(fem.form(form0))
mat0.assemble()
_, _, data0 = mat0.getValuesCSR()
mat0_l1 = comm.allreduce(np.sum(np.abs(data0)), op=MPI.SUM)

#Assemble form1
mat1 = fem.petsc.assemble_matrix(fem.form(form1))
mat1.assemble()
_, _, data1 = mat1.getValuesCSR()
mat1_l1 = comm.allreduce(np.sum(np.abs(data1)), op=MPI.SUM)

#Sum assembled matrices
mat0.axpy(1., mat1)
_, _, sum_data01 = mat0.getValuesCSR()
sum_mat01_l1 = comm.allreduce(np.sum(np.abs(sum_data01)), op=MPI.SUM)

#Assemble form0+form1
form01 = form0+form1
mat01 = fem.petsc.assemble_matrix(fem.form(form01))
mat01.assemble()
_, _, data01 = mat01.getValuesCSR()
mat01_l1 = comm.allreduce(np.sum(np.abs(data01)), op=MPI.SUM)

print(comm.rank, mat0_l1, mat1_l1, sum_mat01_l1, mat01_l1)

Summing the assembled matrices and then taking the norm gives a consistent result when using a different number of ranks. On the other hand, assembling the sum of the forms gives inconsistent results. When running in serial the result is:
0 21.333333333333314 1.0000000000000002 21.124999999999986 32.24999999999998

while on two ranks the result is

0 21.333333333333318 1.0 21.12499999999999 21.8611111111111
1 21.333333333333318 1.0 21.12499999999999 21.8611111111111

Is this a known bug or am I doing something completely wrong?

I think what is happening here is that the quadrature rule gets enforce in both integrals when summing them.
Here is an MWE:

from mpi4py import MPI
import dolfinx
import numpy as np
import ufl

nx = 2
ny = 2
# Create mesh
mesh = dolfinx.mesh.create_unit_square(
    MPI.COMM_WORLD, nx, ny, dolfinx.mesh.CellType.quadrilateral
)
comm = mesh.comm
tdim = mesh.topology.dim
imap = mesh.topology.index_map(tdim)
all_cells = np.arange(imap.size_local + imap.num_ghosts, dtype=np.int32)
cell_tags = dolfinx.mesh.meshtags(mesh, tdim, entities=all_cells, values=all_cells)
dx_all = ufl.Measure(
    "dx", domain=mesh, subdomain_id=tuple(all_cells), subdomain_data=cell_tags
)

# Define function space
V = dolfinx.fem.functionspace(mesh, ("CG", 1))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)

# Define form0
form0 = ufl.inner(ufl.grad(u), ufl.grad(v)) * dx_all


qp = np.array([[0.3, 0], [0.7, 0.1]], dtype=mesh.geometry.x.dtype)
qw = np.array([0.5, 0.5], dtype=mesh.geometry.x.dtype)
metadata = {
    "quadrature_rule": "custom",
    "quadrature_points": qp,
    "quadrature_weights": qw,
}
dx_cell = ufl.Measure(
    "dx",
    domain=mesh,
    subdomain_id=0,
    subdomain_data=cell_tags,
    metadata=metadata,
)
form1 = u * v * dx_cell

form_sum = dolfinx.fem.form(form0 + form1)

resulting in the generated kernel

// Code for integral integral_c29cdaf17aa57ba9bffd6392f67f79eb5c34da5c

void tabulate_tensor_integral_c29cdaf17aa57ba9bffd6392f67f79eb5c34da5c(double* restrict A,
                                    const double* restrict w,
                                    const double* restrict c,
                                    const double* restrict coordinate_dofs,
                                    const int* restrict entity_local_index,
                                    const uint8_t* restrict quadrature_permutation)
{
// Quadrature rules
static const double weights_c46[2] = {0.5, 0.5};
// Precomputed values of basis functions and precomputations
// FE* dimensions: [permutation][entities][points][dofs]
static const double FE0_C0_Qc46[1][1][2][4] = {{{{0.7, 0.2999999999999999, 0.0, 0.0},
  {0.2700000000000001, 0.63, 0.02999999999999997, 0.06999999999999992}}}};
static const double FE1_C0_D10_Qc46[1][1][2][4] = {{{{-1.0, 1.0, 0.0, 0.0},
  {-0.9, 0.9, -0.09999999999999998, 0.09999999999999981}}}};
static const double FE1_C1_D01_Qc46[1][1][2][4] = {{{{-0.7, -0.3, 0.7, 0.2999999999999999},
  {-0.3, -0.7, 0.3, 0.7}}}};
for (int iq = 0; iq < 2; ++iq)
{
  // ------------------------ 
  // Section: Jacobian
  // Inputs: coordinate_dofs, FE1_C1_D01_Qc46, FE1_C0_D10_Qc46
  // Outputs: J_c0, J_c3, J_c1, J_c2
  double J_c0 = 0.0;
  double J_c3 = 0.0;
  double J_c1 = 0.0;
  double J_c2 = 0.0;
  {
    for (int ic = 0; ic < 4; ++ic)
    {
      J_c0 += coordinate_dofs[(ic) * 3] * FE1_C0_D10_Qc46[0][0][iq][ic];
      J_c3 += coordinate_dofs[(ic) * 3 + 1] * FE1_C1_D01_Qc46[0][0][iq][ic];
      J_c1 += coordinate_dofs[(ic) * 3] * FE1_C1_D01_Qc46[0][0][iq][ic];
      J_c2 += coordinate_dofs[(ic) * 3 + 1] * FE1_C0_D10_Qc46[0][0][iq][ic];
    }
  }
  // ------------------------ 
  // ------------------------ 
  // Section: Intermediates
  // Inputs: J_c0, J_c3, J_c1, J_c2
  // Outputs: fw0
  double fw0 = 0;
  {
    double sv_c46_0 = J_c0 * J_c3;
    double sv_c46_1 = J_c1 * J_c2;
    double sv_c46_2 = -sv_c46_1;
    double sv_c46_3 = sv_c46_0 + sv_c46_2;
    double sv_c46_4 = fabs(sv_c46_3);
    fw0 = sv_c46_4 * weights_c46[iq];
  }
  // ------------------------ 
  // ------------------------ 
  // Section: Tensor Computation
  // Inputs: FE0_C0_Qc46, fw0
  // Outputs: A
  {
    double temp_0[4] = {0};
    for (int j = 0; j < 4; ++j)
    {
      temp_0[j] = fw0 * FE0_C0_Qc46[0][0][iq][j];
    }
    for (int j = 0; j < 4; ++j)
    {
      for (int i = 0; i < 4; ++i)
      {
        A[4 * (i) + (j)] += FE0_C0_Qc46[0][0][iq][i] * temp_0[j];
      }
    }
  }
  // ------------------------ 
}

}

If you want them to use different quadrature rules, you have to modify the metadata of your first measure, i.e.

from mpi4py import MPI
import dolfinx
import numpy as np
import ufl

nx = 2
ny = 2
# Create mesh
mesh = dolfinx.mesh.create_unit_square(
    MPI.COMM_WORLD, nx, ny, dolfinx.mesh.CellType.quadrilateral
)
comm = mesh.comm
tdim = mesh.topology.dim
imap = mesh.topology.index_map(tdim)
all_cells = np.arange(imap.size_local + imap.num_ghosts, dtype=np.int32)
cell_tags = dolfinx.mesh.meshtags(mesh, tdim, entities=all_cells, values=all_cells)
metadata_all = {"quadrature_rule": "default"}

dx_all = ufl.Measure(
    "dx",
    domain=mesh,
    subdomain_id=tuple(all_cells),
    subdomain_data=cell_tags,
    metadata=metadata_all,
)

# Define function space
V = dolfinx.fem.functionspace(mesh, ("CG", 1))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)

# Define form0
form0 = ufl.inner(ufl.grad(u), ufl.grad(v)) * dx_all


qp = np.array([[0.3, 0], [0.7, 0.1]], dtype=mesh.geometry.x.dtype)
qw = np.array([0.5, 0.5], dtype=mesh.geometry.x.dtype)
metadata = {
    "quadrature_rule": "custom",
    "quadrature_points": qp,
    "quadrature_weights": qw,
}
dx_cell = ufl.Measure(
    "dx",
    domain=mesh,
    subdomain_id=7,
    subdomain_data=cell_tags,
    metadata=metadata,
)
form1 = u * v * dx_cell

form_sum = dolfinx.fem.form(form0 + form1)

Thanks, I’ve been trying to figure this out for too long. I never thought to look at the generated kernel. I assumed the “default” behavior when no metadata is provided is to use the default quadrature rule, not to use the quadrature rule of another measure.

Interestingly, your suggestion did not work for me. I’ve modified your MWE to show what I mean:

from mpi4py import MPI
import dolfinx
import numpy as np
import ufl

nx = 2
ny = 1
# Create mesh
mesh = dolfinx.mesh.create_unit_square(
    MPI.COMM_WORLD, nx, ny, dolfinx.mesh.CellType.quadrilateral
)
comm = mesh.comm
tdim = mesh.topology.dim
imap = mesh.topology.index_map(tdim)
all_cells = np.arange(imap.size_local + imap.num_ghosts, dtype=np.int32)
cell_tags = dolfinx.mesh.meshtags(mesh, tdim, entities=all_cells, values=all_cells)
metadata_all = {"quadrature_rule": "default"}
dx_all = ufl.Measure(
    "dx",
    domain=mesh,
    subdomain_id=tuple(all_cells),
    subdomain_data=cell_tags,
    metadata=metadata_all,
)

# Define function space
V = dolfinx.fem.functionspace(mesh, ("CG", 1))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)

# Define form0
form0 = u * v * dx_all

# Define form1
qp1 = np.array([[0.3, 0], [0.7, 0.1]], dtype=mesh.geometry.x.dtype)
qw1 = np.array([0.5, 0.5], dtype=mesh.geometry.x.dtype)
metadata = {
    "quadrature_rule": "custom",
    "quadrature_points": qp1,
    "quadrature_weights": qw1,
}
dx1 = ufl.Measure(
    "dx",
    domain=mesh,
    subdomain_id=1,
    subdomain_data=cell_tags,
    metadata=metadata,
)
form1 = u * v * dx1

#Assemble
mat0_np = dolfinx.fem.assemble_matrix(dolfinx.fem.form(form0)).to_dense()
mat1_np = dolfinx.fem.assemble_matrix(dolfinx.fem.form(form1)).to_dense()
mat01_np = dolfinx.fem.assemble_matrix(dolfinx.fem.form(form0+form1)).to_dense()

#Print
print(np.allclose(mat0_np + mat1_np, mat01_np))
print('Assemble individually and then add:\n', np.round(mat0_np+mat1_np, decimals=3))
print('Assemble sum of forms:\n', np.round(mat01_np, decimals=3))

Running the above shows that even when adding the metadata as you suggest, the results are still quite different. The output from running the above using dolfinx 0.9.0 from Conda is:

False
Assemble individually and then add:
[[0.056 0.028 0.028 0.014 0. 0. ]
[0.028 0.056 0.014 0.028 0. 0. ]
[0.028 0.014 0.252 0.151 0.03 0.019]
[0.014 0.028 0.151 0.233 0.019 0.039]
[0. 0. 0.03 0.019 0.056 0.028]
[0. 0. 0.019 0.039 0.028 0.057]]
Assemble sum of forms:
[[0.056 0.028 0.028 0.014 0. 0. ]
[0.028 0.056 0.014 0.028 0. 0. ]
[0.028 0.014 0.167 0.083 0.056 0.028]
[0.014 0.028 0.083 0.167 0.028 0.056]
[0. 0. 0.056 0.028 0.111 0.056]
[0. 0. 0.028 0.056 0.056 0.111]]