Modify matrix diagonal -- dolfinx version for A.ident_zeros()

Hi all,
I have an assembled matrix with zero rows, where I’d like to insert ones on the diagonal. In older versions there was the A.ident_zeros() function which did exactly that, and fast. Such a function seems not to be implemented in dolfinx, so I tried to find a workaround with PETSc, using:

  • A.setValueLocal()
  • A.setValuesLocalCSR/RCV/IJV()
  • A.setDiagonal()

While they all seem to work, they are extremely (and equally) slow. I assume because the diagonal NNZ have to be allocated and added after the assembly of the matrix. I tried to set A.setOption(A.Option.FORCE_DIAGONAL_ENTRIES, 1) hoping that this would allocate the full diagonal, but without a noticeable effect.
Is there an efficient way to achieve this that works on large 3D problems and in parallel?

Here is a MWE illustrating what I tried. It works fine for N = 8..16, but I need a solution that also works for, for instance, N = 128.

import time

import dolfinx
import numpy as np
import ufl
from dolfinx import cpp, fem
from mpi4py import MPI
from petsc4py import PETSc

N = 8
degree = 1
mesh = dolfinx.mesh.create_unit_cube(MPI.COMM_WORLD, N, N, N)
V = dolfinx.fem.FunctionSpace(mesh, ("CG", degree))
u = ufl.TrialFunction(V)
v = ufl.TestFunction(V)

# form is defined on the boundary => matrix wil have many zero diagonal entries
a_bnd = ufl.inner(u, v) * ufl.ds
form = fem.form(a_bnd)

sp = fem.create_sparsity_pattern(form)
sp.assemble()

A = cpp.la.petsc.create_matrix(mesh.comm, sp)
# NOTE: matrix is creaed with MatOptions KEEP_NONZERO_PATTERN and
# NEW_NONZERO_ALLOCATION_ERR
A.setOption(A.Option.FORCE_DIAGONAL_ENTRIES, 1)
# A.setOption(A.Option.NEW_NONZERO_LOCATIONS, 1)
# if this option is not set to False, will give error. Apparently FORCE_DIAGONAL_ENTRIES does not work
A.setOption(A.Option.NEW_NONZERO_ALLOCATION_ERR, 0)
fem.petsc.assemble_matrix(A, form)
A.assemble()

print(f"min(diag) = {A.getDiagonal().array.min()}")
print(f"number of diagonal zeros = {sum(A.getDiagonal().array == 0)}")

# breakpoint()
start = time.perf_counter()
IS_zeros = A.findZeroRows()
end = time.perf_counter()
print(f"Find zero rows: {end-start}s")
idx_zeros = IS_zeros.getIndices()
local_range = A.getOwnershipRange()
local_idx = idx_zeros - local_range[0]


def setDiagonal(A):
    start = time.perf_counter()
    diag = A.getDiagonal()
    diag.setValuesLocal(local_idx, np.ones_like(local_idx, dtype=PETSc.ScalarType))
    A.setDiagonal(diag)
    end = time.perf_counter()
    print(f"Set diagonal: {end-start}s")
    A.assemble()


def setValue(A):
    start = time.perf_counter()
    for i in local_idx:
        # print(i)
        A.setValueLocal(i, i, 1.0)
    end = time.perf_counter()
    print(f"Time setValueLocal: {end - start}s")
    A.assemble()


def setValuesLocalCSR(A):
    indptr = np.zeros(A.local_size[0] + 1, PETSc.IntType)
    indptr[-1] = len(local_idx)

    for k in range(len(local_idx) - 1):
        indptr[local_idx[k] + 1: local_idx[k + 1] + 1] = k + 1
    indptr[local_idx[-1] + 1: -1] = k + 2

    # print(indptr)

    start = time.perf_counter()
    A.setValuesLocalCSR(
        indptr,
        local_idx,
        np.ones((len(local_idx), 1), dtype=PETSc.ScalarType),
        addv=PETSc.InsertMode.ADD_VALUES,
    )
    end = time.perf_counter()
    print(f"Time setValuesLocalCSR: {end - start}s")
    A.assemble()


def setValuesLocalRCV(A):
    start = time.perf_counter()
    A.setValuesLocalRCV(
        local_idx.reshape(-1, 1),
        local_idx.reshape(-1, 1),
        np.ones((len(local_idx), 1), dtype=PETSc.ScalarType),
        addv=PETSc.InsertMode.INSERT_VALUES,
    )
    end = time.perf_counter()
    print(f"Time setValuesLocalRCV: {end - start}s")
    A.assemble()


# NOTE: only call ONE of the functions per run of the script!

setDiagonal(A)
# setValue(A)
# setValuesLocalCSR(A)
# setValuesLocalRCV(A)

#
print(f"min(diag) = {A.getDiagonal().array.min()}")
print(f"number of diagonal zeros = {sum(A.getDiagonal().array == 0)}")
print(f"norm(A) = {A.norm()}")

PS: I also tried to insert the values into A before the call of A.assemble(), obtaining the zero rows/diagonal from an assembled auxiliary matrix, but the result appears to be the same.

Thank you!

You can find a version of ident_zeros() for DOLFINx in this post.

I would suggest the following:

import time

import dolfinx
import numpy as np
import ufl
from dolfinx import cpp, fem
from mpi4py import MPI
from petsc4py import PETSc

N = 75
degree = 1
mesh = dolfinx.mesh.create_unit_cube(MPI.COMM_WORLD, N, N, N)
V = dolfinx.fem.FunctionSpace(mesh, ("CG", degree))
u = ufl.TrialFunction(V)
v = ufl.TestFunction(V)
print(
    f"N={N}, Num dofs global {V.dofmap.index_map.size_global*V.dofmap.index_map_bs}")
# form is defined on the boundary => matrix wil have many zero diagonal entries
a_bnd = ufl.inner(u, v) * ufl.ds
form = fem.form(a_bnd)

# Find all dofs associated to the facets we are integrating over
exterior_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)
boundary_blocks = dolfinx.fem.locate_dofs_topological(
    V, V.mesh.topology.dim - 1, exterior_facets)

# Set diagonal for everything not associated with a facet in sparsity pattern
imap = V.dofmap.index_map
all_blocks = np.arange(imap.size_local, dtype=np.int32)
zero_blocks = np.delete(all_blocks, boundary_blocks)
start = time.perf_counter()
sp = fem.create_sparsity_pattern(form)
sp.insert_diagonal(zero_blocks)
sp.assemble()
end = time.perf_counter()
print(f"Create sparsity pattern {end-start}")

start = time.perf_counter()
A = cpp.la.petsc.create_matrix(mesh.comm, sp)
A.setOption(A.Option.NEW_NONZERO_ALLOCATION_ERR, 1)
fem.petsc.assemble_matrix(A, form)
A.assemble(A.AssemblyType.FLUSH)
end = time.perf_counter()
print(f"Assemble boundary {end-start}")


def bc_diag(A):
    bc = dolfinx.fem.dirichletbc(
        dolfinx.fem.Constant(mesh, PETSc.ScalarType(1.)), zero_blocks, V)
    start = time.perf_counter()
    dolfinx.cpp.fem.petsc.insert_diagonal(A, V._cpp_object, [bc], 1.)
    end = time.perf_counter()
    print(f"BC insert diag {end-start}s")


bc_diag(A)
start = time.perf_counter()
A.assemble()
end = time.perf_counter()
print(f"Matrix communication {end-start}")
#
print(f"min(diag) = {A.getDiagonal().array.min()}")
print(f"number of diagonal zeros = {sum(A.getDiagonal().array == 0)}")
print(f"norm(A) = {A.norm()}")

This is leveraging that you actually know what degrees of freedom that will give contributions to your assembly.
With output:

N=75, Num dofs global 438976
Create sparsity pattern 0.28873315098462626
Assemble boundary 0.22457787103485316
BC insert diag 0.08238964097108692s
Matrix communication 0.030512886005453765
min(diag) = 5.925925925925912e-05
number of diagonal zeros = 0
norm(A) = 636.5720700068672

3 Likes

Great, thank you! Very neat approach. Just one thing, running your example in parallel gives an error in np.delete(all_blocks, boundary_blocks), since the indices in boundary_blocks exceed the size of all_blocks.
@CastriMik this looks identical to what I did in the setDiagonal() variant, doesn’t it?

Yes, sorry for that (I completely missed that function).

Just change this to all_blocks = np.arange(imap.size_local+imap.num_ghosts, dtype=np.int32)

3 Likes