# Parallel Sparse PETSc Matrix Generation with dofs as rows and columns

Hi there,

I’m trying to generate parallel sparse PETSC matrix. I am able to calculate nonzero rows and columns and their values. Here is the MWE:

``````from petsc4py import PETSc
import dolfin as dolf
import numpy as np

dolf.parameters["mesh_partitioner"] = "ParMETIS"

def mshr(el):

mesh = dolf.UnitIntervalMesh(el)

def l_boundary_func(x, on_boundary):
x = x[0]
return on_boundary and dolf.near(x, 0.)

def r_boundary_func(x, on_boundary):
x = x[0]
return on_boundary and dolf.near(x, 1.)

boundaries = dolf.MeshFunction('size_t', mesh, mesh.topology().dim() - 1)

l_boundary = dolf.AutoSubDomain(l_boundary_func)
r_boundary = dolf.AutoSubDomain(r_boundary_func)

l_boundary.mark(boundaries, 1)
r_boundary.mark(boundaries, 2)

# ________________________________________________________________________________

def fl_subdomain_func(x):
x = x[0]
x_f = 0.25
a_f = 0.025
return x_f - a_f - dolf.DOLFIN_EPS <= x <= x_f + a_f + dolf.DOLFIN_EPS

subdomains = dolf.MeshFunction('size_t', mesh, mesh.topology().dim())

subdomains.set_all(1)

fl_subdomain = dolf.AutoSubDomain(fl_subdomain_func)
fl_subdomain.mark(subdomains, 0)

return mesh, boundaries, subdomains

degree = 1

mesh, boundaries, subdomains = mshr(30)

CG = dolf.FiniteElement('CG', mesh.ufl_cell(), degree)
W = dolf.FunctionSpace(mesh, dolf.MixedElement([CG, CG]))

def assemble_left_vector(fl):

(v_1, v_2) = dolf.TestFunction(W)
dx = dolf.Measure('dx', subdomain_data=subdomains)

V = dolf.FunctionSpace(mesh, 'CG', 1)
const = dolf.interpolate(dolf.Constant(1), V)
V_fl = dolf.assemble(const * dx(fl))

a_1 =  dolf.assemble(v_1 / V_fl * dx(fl))
a_2 =  dolf.assemble(v_2 / V_fl * dx(fl))

dofmap = W.dofmap()

def helper_func(v, dofmap):
indices = np.flatnonzero(v) # Returns the indices which are nonzero
values = v[indices]
my_list = []
for index, value in zip(indices, values):
my_list.append([dofmap.dofs()[index], value])
v = np.array(my_list)
return v

a_1 = helper_func(a_1, dofmap)
a_2 = helper_func(a_2, dofmap)

a = (a_1, a_2)

return a

def assemble_right_vector(x):

v = np.array([[0, 0, 1]])  # row
dimension = mesh.topology().dim()
if dimension == 1:
v = np.array([[1]])
elif dimension == 2:
v = np.array([[1, 0]])
v = v.transpose()  # column
b = [np.array([]), np.array([])]
cell_index = mesh.bounding_box_tree().compute_first_entity_collision(dolf.Point(x))
if cell_index <= mesh.num_entities(dimension):
cell = dolf.Cell(mesh, cell_index)

b = []

for j in range(2):

dofmap = W.sub(j).dofmap()
cell_dofs = dofmap.cell_dofs(cell_index)
element = W.sub(j).element()
d_dx = element.evaluate_basis_derivatives_all(1, x, cell.get_vertex_coordinates(), cell.orientation())
d_dx = d_dx.reshape((len(cell_dofs), -1))
d_dv = np.dot(d_dx, v)
d_dv = d_dv[:, 0]

my_list = []
for i, dof in enumerate(cell_dofs):
my_list.append([dofmap.tabulate_local_to_global_dofs()[dof], d_dv[i]])
my_vec = np.array(my_list)
b.append(my_vec)
else:
# print("The cell for the subdomain is not in the mesh")
pass
return (*b, )

a = assemble_left_vector(0)
b = assemble_right_vector(0.2)

a = np.concatenate(a)
b = np.concatenate(b)

nnz = len(a)*len(b)

row = np.zeros(nnz)
col = np.zeros(nnz)
val = np.zeros(nnz)

for i, c in enumerate(a):
for j, d in enumerate(b):
row[i * len(b) + j] = c[0]
col[i * len(b) + j] = d[0]
val[i * len(b) + j] = c[1] * d[1]

row = row.astype(dtype='int32')
col = col.astype(dtype='int32')

global_size = W.dim()
istart, iend = W.dofmap().ownership_range()
local_size = iend - istart  # local size
print(iend,istart)
mat = PETSc.Mat().create(PETSc.COMM_WORLD) # MPI.COMM_SELF
mat.setSizes([(local_size, global_size), (local_size, global_size)])
mat.setType('aij')
mat.setUp()
for i in range(len(row)):
mat.setValue(row[i],col[i],val[i])
mat.assemblyBegin()
mat.assemblyEnd()
``````

What I am doing is;

1. I am getting nonzero rows and their values as vector a
2. Getting nonzero columns and their values as vector b
3. Outer product of a and b should be my PETSc matrix by using corresponding rows and columns while others are zero.

For example when I run this code as serial, I get the a and b vectors like;

``````A:  [[44.   0.5]
[46.   0.5]
[45.   0.5]
[47.   0.5]]
B:  [[ 50. -30.]
[ 48.  30.]
[ 51. -30.]
[ 49.  30.]]
``````

Then, when I take the outer product of a and b, the nonzero elements of PETSc matrix should be;

``````            48   49   50   51     # Nonzero columns
44    [[15.  15. -15. -15.]
45    [ 15.  15. -15. -15.]
46    [ 15.  15. -15. -15.]
47    [ 15.  15. -15. -15.]]
# Nonzero
# rows
``````

while the other entries are zero.

It works perfectly fine for 1 or 2 cores. However when I use 4 cores, one of the vectors(a or b) becomes zero due to mesh partition and it yields zero entries instead of nonzero entries. So my matrix becomes wrong in parallel.

How can I overcome this problem?

Any help would be much appreciated.

The code you have posted here does not run, and produces the error message:

``````Traceback (most recent call last):
File "petsc_matrix.py", line 120, in <module>
a = assemble_left_vector(0)
File "petsc_matrix.py", line 77, in assemble_left_vector
a_1 = helper_func(a_1, dofmap)
File "petsc_matrix.py", line 69, in helper_func
indices = np.flatnonzero(v) # Returns the indices which are nonzero
File "/usr/lib/python3/dist-packages/numpy/core/numeric.py", line 896, in flatnonzero
return a.ravel().nonzero()[0]
AttributeError: 'dolfin.cpp.la.Vector' object has no attribute 'ravel'
``````

Please make sure that your code runs when you post it. I would also strongly suggest that you either:
a) Document the helper functions `assemble_left_vector`, `assemble_right_vector`, and explain the purpose of these functions.
b) Remove all helper functions and make a program that runs line by line (as the helper functions are only called once) and document the different stages of the code.

I would also prefer if the output you are showing

is reproducable by the code.

The code I have posted is working in my system interestingly.

``````from petsc4py import PETSc
import dolfin as dolf
import numpy as np

dolf.parameters["mesh_partitioner"] = "ParMETIS"
dolf.parameters["ghost_mode"] = "shared_facet"
dolf.parameters["ghost_mode"] = "shared_vertex"

def mshr(el):
"""
This function handles creation of mesh, subdomains and boundaries
"""

mesh = dolf.UnitIntervalMesh(el)

def l_boundary_func(x, on_boundary):
x = x[0]
return on_boundary and dolf.near(x, 0.)

def r_boundary_func(x, on_boundary):
x = x[0]
return on_boundary and dolf.near(x, 1.)

boundaries = dolf.MeshFunction('size_t', mesh, mesh.topology().dim() - 1)

l_boundary = dolf.AutoSubDomain(l_boundary_func)
r_boundary = dolf.AutoSubDomain(r_boundary_func)

l_boundary.mark(boundaries, 1)
r_boundary.mark(boundaries, 2)

# ________________________________________________________________________________

def fl_subdomain_func(x):
x = x[0]
x_f = 0.25
a_f = 0.025
return x_f - a_f - dolf.DOLFIN_EPS <= x <= x_f + a_f + dolf.DOLFIN_EPS

subdomains = dolf.MeshFunction('size_t', mesh, mesh.topology().dim())

subdomains.set_all(1)

fl_subdomain = dolf.AutoSubDomain(fl_subdomain_func)
fl_subdomain.mark(subdomains, 0)

return mesh, boundaries, subdomains

degree = 1

mesh, boundaries, subdomains = mshr(30)

CG = dolf.FiniteElement('CG', mesh.ufl_cell(), degree)
W = dolf.FunctionSpace(mesh, dolf.MixedElement([CG, CG]))

def assemble_a(fl):
"""
This function returns vector a's nonzero dofs and their values as a list
"""

# Assemble vector a for subdomain that has
# nonzero entries between 0.225<x<0.275
(v_1, v_2) = dolf.TestFunction(W)
dx = dolf.Measure('dx', subdomain_data=subdomains)
V = dolf.FunctionSpace(mesh, 'CG', 1)
const = dolf.interpolate(dolf.Constant(1), V)
V_fl = dolf.assemble(const * dx(fl))
a_1 =  dolf.assemble(v_1 / V_fl * dx(fl))
a_2 =  dolf.assemble(v_2 / V_fl * dx(fl))

aa1 = a_1.get_local() # convert to numpy array
aa2 = a_2.get_local() # convert to numpy array
indices1 = np.flatnonzero(aa1) #find nonzero indices
indices2 = np.flatnonzero(aa2) #find nonzero indices
aa1 = list(zip(indices1,aa1[indices1])) #zip nonzero indices with their values
aa2 = list(zip(indices2,aa2[indices2])) #zip nonzero indices with their values
a = (aa1, aa2)
a = np.concatenate(a)
return a

def assemble_b(x):
"""
This function finds the basis function's derivative at point x
and returns the relevant dof and derivative as a list
"""

v = np.array([[1]])
cell_index = mesh.bounding_box_tree().compute_first_entity_collision(dolf.Point(x))
B = [np.array([]), np.array([])]
if cell_index <= mesh.num_entities(1):
cell = dolf.Cell(mesh, cell_index)
B = []
for j in range(2):
# Evaluate derivatives for each function spaces
dofmap = W.sub(j).dofmap()
cell_dofs = dofmap.cell_dofs(cell_index)
element = W.sub(j).element()
d_dx = element.evaluate_basis_derivatives_all(1, x, cell.get_vertex_coordinates(), cell.orientation())
d_dx = d_dx.reshape((len(cell_dofs), -1))
d_dv = np.dot(d_dx, v)
d_dv = d_dv[:, 0]

my_list = []
for i, dof in enumerate(cell_dofs):
my_list.append([dofmap.tabulate_local_to_global_dofs()[dof], d_dv[i]])
B.append(my_list)

B = np.concatenate(B)
return (*B, )

a = assemble_a(0)
b = assemble_b(0.2)

print("a: ",a)
print("b: ",b)
# COMPLEX MATRIX GENERATION

nnz = len(a)*len(b)

row = np.zeros(nnz)
col = np.zeros(nnz)
val = np.zeros(nnz)

for i, c in enumerate(a):
for j, d in enumerate(b):
row[i * len(b) + j] = c[0]
col[i * len(b) + j] = d[0]
val[i * len(b) + j] = c[1] * d[1]

row = row.astype(dtype='int32')
col = col.astype(dtype='int32')

global_size = W.dim()
istart, iend = W.dofmap().ownership_range()
local_size = iend - istart  # local size
mat = PETSc.Mat().create(PETSc.COMM_WORLD)
mat.setSizes([(local_size, global_size), (local_size, global_size)])
mat.setType('aij')
mat.setUp()
for i in range(len(row)):
mat.setValue(row[i],col[i],val[i])
mat.assemblyBegin()
mat.assemblyEnd()

# CHECK CREATED MATRIX THAT HAS 16 ENTRIES
read_row = [item[0] for item in a]
read_col = [item[0] for item in b]
``````

When you run this script with 4 cores, it will be more obvious to see the problem;

``````a:  []
b:  ()
NONZERO MATRIX ENTRIES:(SHOULD BE 16 ENTRY)
[]
a:  [[14.   0.5]
[15.   0.5]]
b:  ()
NONZERO MATRIX ENTRIES:(SHOULD BE 16 ENTRY)
[]
a:  []
b:  ()
NONZERO MATRIX ENTRIES:(SHOULD BE 16 ENTRY)
[]
a:  [[0.  0.5]
[1.  0.5]]
b:  (array([  4., -30.]), array([ 2., 30.]), array([  5., -30.]), array([ 3., 30.]))
[[-15.  15. -15.  15.]
[-15.  15. -15.  15.]]
``````

In the second core, one of the vectors becomes zero, hence the outer product results in zero. Because my subdomain (0.225<x<0.275) is not fall into same mesh with x=0.2(I evaluate gradient of basis function at that point) during mesh partition of parallel execution. So only 8 value could be obtained instead of 16 values.

The problem is about mesh partition. It looks llike my subdomain(0.225<x<0.275) and the point(x=0.2) I am evaluating gradient should be in the same mesh while parallelization. I have asked another question in here that may be more clear.

The key here is that you are only access local (owned) indices of the vector (when you call `get_local()`, not the ghosted values. This means that you are ignoring several contributions to your matrix.

I do not know of an easy way of getting those ghost values in dolfin, however, here is a code that does the same thing for dolfinx.

``````from petsc4py import PETSc
from mpi4py import MPI
import dolfinx
import dolfinx.geometry
import basix
import numpy as np
import ufl
el = 30

mesh = dolfinx.UnitIntervalMesh(MPI.COMM_WORLD, el, dolfinx.cpp.mesh.GhostMode.shared_facet)
# mesh = dolfinx.UnitSquareMesh(MPI.COMM_WORLD, el, el)
#mesh = dolfinx.UnitCubeMesh(MPI.COMM_WORLD, el, el, el)

def fl_subdomain_func(x, eps=1e-16):
x = x[0]
x_f = 0.25
a_f = 0.025
return np.logical_and(x_f - a_f - eps <= x, x <= x_f + a_f + eps)

tdim = mesh.topology.dim
marked_cells = dolfinx.mesh.locate_entities(mesh, tdim, fl_subdomain_func)
fl = 1
subdomain = dolfinx.MeshTags(mesh, tdim, marked_cells, np.full(len(marked_cells), fl, dtype=np.int32))

degree = 1
CG = ufl.FiniteElement("CG", mesh.ufl_cell(), degree)
W = dolfinx.FunctionSpace(mesh, ufl.MixedElement([CG, CG]))

#This function returns a's nonzero dofs and their values as a list
v_1, v_2 = ufl.TestFunctions(W)
dx = ufl.Measure("dx", subdomain_data=subdomain)

V_fl = MPI.COMM_WORLD.allreduce(dolfinx.fem.assemble_scalar(dolfinx.Constant(mesh, 1)*dx(fl)), op=MPI.SUM)
b_1 = dolfinx.Function(W)
b_1.x.array[:] = 0
a_1 = dolfinx.fem.assemble_vector(b_1.vector, v_1/V_fl*dx(fl))
b_2 = dolfinx.Function(W)
b_2.x.array[:] = 0
a_2 = dolfinx.fem.assemble_vector(b_2.vector, v_2/V_fl*dx(fl))

# NOTE: This includes ghosted dofs
aa1 = b_1.x.array
aa2 = b_2.x.array
indices1 = np.flatnonzero(aa1)
indices2 = np.flatnonzero(aa2)

aa1 = list(zip(indices1, aa1[indices1]))
aa2 = list(zip(indices2, aa2[indices2]))
a =  np.concatenate((aa1, aa2))

print(MPI.COMM_WORLD.rank, a, W.dofmap.index_map.size_local)

point = np.array([[0.2, 0, 0]])
v = np.array([[0, 0, 1]]).T
if tdim == 1:
v = np.array([[1]])
elif tdim == 2:
v = np.array([[1, 0]]).T

# Finds the basis function's derivative at point x
# and returns the relevant dof and derivative as a list
bb_tree = dolfinx.geometry.BoundingBoxTree(mesh, tdim)
cell_candidates = dolfinx.geometry.compute_collisions_point(bb_tree, point[0])
# Choose one of the cells that contains the point
cell = dolfinx.geometry.select_colliding_cells(mesh, cell_candidates, point[0], 1)

# Data required for pull back of coordinate
gdim = mesh.geometry.dim
num_local_cells = mesh.topology.index_map(tdim).size_local
num_dofs_x = mesh.geometry.dofmap.links(0).size  # NOTE: Assumes same cell geometry in whole mesh
t_imap = mesh.topology.index_map(tdim)
num_cells = t_imap.size_local + t_imap.num_ghosts
x = mesh.geometry.x
x_dofs = mesh.geometry.dofmap.array.reshape(num_cells, num_dofs_x)
cell_geometry = np.zeros((num_dofs_x, gdim), dtype=np.float64)
points_ref = np.zeros((1, tdim))

# Data required for evaluation of derivative
ct = dolfinx.cpp.mesh.to_string(mesh.topology.cell_type)
element = basix.create_element(basix.finite_element.string_to_family(
"Lagrange", ct), basix.cell.string_to_type(ct), degree, basix.LagrangeVariant.equispaced)
dofmaps = [W.sub(i).dofmap for i  in range(2)]
coordinate_element = basix.create_element(basix.finite_element.string_to_family(
"Lagrange", ct), basix.cell.string_to_type(ct), 1, basix.LagrangeVariant.equispaced)

point_ref = None
B = []
if len(cell) > 0:
cell = cell[0]
# Only add contribution if cell is owned
if cell < num_local_cells:
# Map point in cell back to reference element
cell_geometry[:] = x[x_dofs[cell], :gdim]
point_ref = mesh.geometry.cmap.pull_back(point[:,:gdim], cell_geometry)
dphi = coordinate_element.tabulate(1, point_ref)[1:,0,:]
J = np.dot(cell_geometry.T, dphi.T)
Jinv = np.linalg.inv(J)
for j in range(2):
cell_dofs = dofmaps[j].cell_dofs(cell)
global_dofs = dofmaps[j].index_map.local_to_global(cell_dofs)
# Compute gradient on physical element by multiplying by J^(-T)
d_dx = (Jinv.T @ element.tabulate(1, point_ref)[1:, 0, :]).T
d_dv = np.dot(d_dx, v)[:, 0]
for i in range(len(cell_dofs)):
B.append([global_dofs[i], d_dv[i]])
else:
print(MPI.COMM_WORLD.rank, "Ghost", cell)

print(np.asarray(B))
``````

Regarding you question about mesh partitioning, there is functionality to control this in dolfinx, and @IgorBaratta can probably provide you with an example of how to control it.

2 Likes

Dear @dokken,

Thanks for your interest. It looks like I need to migrate from dolfin to dolfinx.

Now, I am trying to install dolfinx to our Ubuntu 18.04 server. Since I do not have root access, the installation is very daunting. What would you suggest at this point.

What is the most convenient way for me to make the dolfinx run on our Ubuntu 18.04 server?

Dear @IgorBaratta, could you please provide some examples about controlling parallel mesh partitioning for dolfinx?