Hi all,
I have an assembled matrix with zero rows, where I’d like to insert ones on the diagonal. In older versions there was the A.ident_zeros()
function which did exactly that, and fast. Such a function seems not to be implemented in dolfinx, so I tried to find a workaround with PETSc, using:
A.setValueLocal()
A.setValuesLocalCSR/RCV/IJV()
A.setDiagonal()
While they all seem to work, they are extremely (and equally) slow. I assume because the diagonal NNZ have to be allocated and added after the assembly of the matrix. I tried to set A.setOption(A.Option.FORCE_DIAGONAL_ENTRIES, 1)
hoping that this would allocate the full diagonal, but without a noticeable effect.
Is there an efficient way to achieve this that works on large 3D problems and in parallel?
Here is a MWE illustrating what I tried. It works fine for N = 8..16
, but I need a solution that also works for, for instance, N = 128
.
import time
import dolfinx
import numpy as np
import ufl
from dolfinx import cpp, fem
from mpi4py import MPI
from petsc4py import PETSc
N = 8
degree = 1
mesh = dolfinx.mesh.create_unit_cube(MPI.COMM_WORLD, N, N, N)
V = dolfinx.fem.FunctionSpace(mesh, ("CG", degree))
u = ufl.TrialFunction(V)
v = ufl.TestFunction(V)
# form is defined on the boundary => matrix wil have many zero diagonal entries
a_bnd = ufl.inner(u, v) * ufl.ds
form = fem.form(a_bnd)
sp = fem.create_sparsity_pattern(form)
sp.assemble()
A = cpp.la.petsc.create_matrix(mesh.comm, sp)
# NOTE: matrix is creaed with MatOptions KEEP_NONZERO_PATTERN and
# NEW_NONZERO_ALLOCATION_ERR
A.setOption(A.Option.FORCE_DIAGONAL_ENTRIES, 1)
# A.setOption(A.Option.NEW_NONZERO_LOCATIONS, 1)
# if this option is not set to False, will give error. Apparently FORCE_DIAGONAL_ENTRIES does not work
A.setOption(A.Option.NEW_NONZERO_ALLOCATION_ERR, 0)
fem.petsc.assemble_matrix(A, form)
A.assemble()
print(f"min(diag) = {A.getDiagonal().array.min()}")
print(f"number of diagonal zeros = {sum(A.getDiagonal().array == 0)}")
# breakpoint()
start = time.perf_counter()
IS_zeros = A.findZeroRows()
end = time.perf_counter()
print(f"Find zero rows: {end-start}s")
idx_zeros = IS_zeros.getIndices()
local_range = A.getOwnershipRange()
local_idx = idx_zeros - local_range[0]
def setDiagonal(A):
start = time.perf_counter()
diag = A.getDiagonal()
diag.setValuesLocal(local_idx, np.ones_like(local_idx, dtype=PETSc.ScalarType))
A.setDiagonal(diag)
end = time.perf_counter()
print(f"Set diagonal: {end-start}s")
A.assemble()
def setValue(A):
start = time.perf_counter()
for i in local_idx:
# print(i)
A.setValueLocal(i, i, 1.0)
end = time.perf_counter()
print(f"Time setValueLocal: {end - start}s")
A.assemble()
def setValuesLocalCSR(A):
indptr = np.zeros(A.local_size[0] + 1, PETSc.IntType)
indptr[-1] = len(local_idx)
for k in range(len(local_idx) - 1):
indptr[local_idx[k] + 1: local_idx[k + 1] + 1] = k + 1
indptr[local_idx[-1] + 1: -1] = k + 2
# print(indptr)
start = time.perf_counter()
A.setValuesLocalCSR(
indptr,
local_idx,
np.ones((len(local_idx), 1), dtype=PETSc.ScalarType),
addv=PETSc.InsertMode.ADD_VALUES,
)
end = time.perf_counter()
print(f"Time setValuesLocalCSR: {end - start}s")
A.assemble()
def setValuesLocalRCV(A):
start = time.perf_counter()
A.setValuesLocalRCV(
local_idx.reshape(-1, 1),
local_idx.reshape(-1, 1),
np.ones((len(local_idx), 1), dtype=PETSc.ScalarType),
addv=PETSc.InsertMode.INSERT_VALUES,
)
end = time.perf_counter()
print(f"Time setValuesLocalRCV: {end - start}s")
A.assemble()
# NOTE: only call ONE of the functions per run of the script!
setDiagonal(A)
# setValue(A)
# setValuesLocalCSR(A)
# setValuesLocalRCV(A)
#
print(f"min(diag) = {A.getDiagonal().array.min()}")
print(f"number of diagonal zeros = {sum(A.getDiagonal().array == 0)}")
print(f"norm(A) = {A.norm()}")
PS: I also tried to insert the values into A
before the call of A.assemble()
, obtaining the zero rows/diagonal from an assembled auxiliary matrix, but the result appears to be the same.
Thank you!