I would like to speed up the code that assembles a jacobian matrix
that consists of bilinear and linear forms.
The bottleneck in the code below is the iniital assembly of the
jacobian, because the sparsity pattern of the full matrix A is not
known in advance, but it is known for all its contained bilinear and
linear forms.
Is it possible to somehow merge the sparsity patterns of the block
matrices and the block vectors and use them for creating the matrix A
using e.g. dolfinx.fem.create_matrix
?
Since my jacobian consists of both bilinear and linear forms, I can’t use
dolfinx.fem.create_matrix_block
, right?
import time
import numpy as np
import ufl
from dolfinx import fem, mesh
from dolfinx.fem.petsc import create_matrix, create_vector
from mpi4py import MPI
from petsc4py import PETSc
from ufl import ds, dx, inner, nabla_grad
Print = PETSc.Sys.Print
def ass_linear_form_into_vec(vec, form):
fem.petsc.assemble_vector(vec, fem.form(form))
vec.assemble()
def ass_bilinear_form(mat, form, bcs, diagonal):
# TODO ask if there is a public interface for assembling matrices into an
# existing PETSc matrix.
fem.petsc._assemble_matrix_mat(
mat,
fem.form(form),
bcs=bcs,
diagonal=diagonal,
)
mat.assemble()
def main():
msh = mesh.create_unit_interval(MPI.COMM_WORLD, nx=3000)
V = fem.FunctionSpace(msh, ("Lagrange", 3))
u = ufl.TrialFunction(V)
v = ufl.TestFunction(V)
bcs = []
L = fem.petsc.assemble_matrix(
fem.form(-inner(nabla_grad(u), nabla_grad(v)) * dx),
bcs=bcs,
diagonal=PETSc.ScalarType(0),
)
n = L.getSize()[0]
et = PETSc.Vec().createSeq(n)
et.setValue(500, 1.0)
df_dv = et.array.real
dg_dw = et.array.real
A = PETSc.Mat().create(MPI.COMM_WORLD)
# A.setOption(PETSc.Mat.Option.HERMITIAN, False)
N = 2 * n + 2
A.setSizes([N, N])
A.setUp()
b = fem.Function(V)
b.x.array[:] = 1.234
# create bilinear and linear forms
bilin_form = -inner(nabla_grad(u), nabla_grad(v)) * dx
mat_dFRe_dv = create_matrix(fem.form(bilin_form))
mat_dFRe_dw = create_matrix(fem.form(bilin_form))
mat_dFIm_dv = create_matrix(fem.form(bilin_form))
mat_dFIm_dw = create_matrix(fem.form(bilin_form))
ass_bilinear_form(mat_dFRe_dv, bilin_form, bcs=bcs, diagonal=1.0)
ass_bilinear_form(mat_dFRe_dw, bilin_form, bcs=bcs, diagonal=0.0)
ass_bilinear_form(mat_dFIm_dv, bilin_form, bcs=bcs, diagonal=0.0)
ass_bilinear_form(mat_dFIm_dw, bilin_form, bcs=bcs, diagonal=1.0)
lin_form = inner(b, v) * dx
vec_dFRe_dk = create_vector(fem.form(lin_form))
vec_dFRe_ds = create_vector(fem.form(lin_form))
vec_dFIm_dk = create_vector(fem.form(lin_form))
vec_dFIm_ds = create_vector(fem.form(lin_form))
ass_linear_form_into_vec(vec_dFRe_dk, lin_form)
fem.set_bc(vec_dFRe_dk, bcs)
ass_linear_form_into_vec(vec_dFRe_ds, lin_form)
fem.set_bc(vec_dFRe_ds, bcs)
ass_linear_form_into_vec(vec_dFIm_dk, lin_form)
fem.set_bc(vec_dFIm_dk, bcs)
ass_linear_form_into_vec(vec_dFIm_ds, lin_form)
fem.set_bc(vec_dFIm_ds, bcs)
Print("Fill real jacobian with values")
t0 = time.monotonic()
# TODO is it possible to determine the sparisty pattern of the whole jacobian A?
# the jacobian consists of four block matrices (bilinar forms) and 6 linear
# forms (2 row vectors and 4 column vectors)
block_mats = [
# matrix, row offset, col offset
(mat_dFRe_dv, 0, 0),
(mat_dFRe_dw, 0, n),
(mat_dFIm_dv, n, 0),
(mat_dFIm_dw, n, n),
]
def insert_values(target_matrix, block_matrix, row_offset, col_offset):
rows_ind, cols, values = block_matrix.getValuesCSR()
cols_to = cols + col_offset
rows_ind_to = np.empty(
N + 1, dtype=np.int32
) # +1 because rows_ind_to[0] is always 0
_idx = 0
rows_ind_to[_idx : _idx + row_offset] = 0
_idx = _idx + row_offset
rows_ind_to[_idx : _idx + len(rows_ind)] = rows_ind
_idx = _idx + len(rows_ind)
rows_ind_to[_idx:] = len(values)
t_setCSR_0 = time.monotonic()
target_matrix.setValuesCSR(
rows_ind_to, cols_to, values, addv=PETSc.InsertMode.ADD
)
Print(f"single setValuesCSR call took {time.monotonic()-t_setCSR_0:.2f}s")
t_insert_0 = time.monotonic()
for block_mat, row_offset, col_offset in block_mats:
insert_values(A, block_mat, row_offset, col_offset)
Print(f"insert_values() loop took {time.monotonic()-t_insert_0:.2e}s")
# column vectors
A.setValues(
range(n),
2 * n,
vec_dFRe_dk,
addv=PETSc.InsertMode.ADD,
)
A.setValues(
range(n),
2 * n + 1,
vec_dFRe_ds,
addv=PETSc.InsertMode.ADD,
)
A.setValues(
range(n, 2 * n),
2 * n,
vec_dFIm_dk,
addv=PETSc.InsertMode.ADD,
)
A.setValues(
range(n, 2 * n),
2 * n + 1,
vec_dFIm_ds,
addv=PETSc.InsertMode.ADD,
)
# row vectors
A.setValues(
2 * n,
range(n),
df_dv,
addv=PETSc.InsertMode.ADD,
)
A.setValues(
2 * n + 1,
range(n, 2 * n),
dg_dw,
addv=PETSc.InsertMode.ADD,
)
print(
f"insert_values loop + A.setValues for linear forms took "
f"{time.monotonic()-t_insert_0:.2e}s"
)
# scalars
# this is needed s.t. PETSc doesn't
# complain that the diagonl entry is missing
# (see https://lists.mcs.anl.gov/pipermail/petsc-users/2016-October/030704.html)
A.setValue(2 * n, 2 * n, 0j, addv=PETSc.InsertMode.ADD)
A.setValue(2 * n + 1, 2 * n + 1, 0j, addv=PETSc.InsertMode.ADD)
t1 = time.monotonic()
A.assemble()
print(
f"fill+assembly of J took {time.monotonic()-t0:.2e}s "
f"(assemble {time.monotonic()-t1:.2e}s)"
)
if __name__ == "__main__":
main()
The current output of this code block is
Fill real jacobian with values
single setValuesCSR call took 2.79s
single setValuesCSR call took 9.57s
single setValuesCSR call took 6.47s
single setValuesCSR call took 16.46s
insert_values() loop took 3.53e+01s
insert_values loop + A.setValues for linear forms took 3.91e+01s
fill+assembly of J took 3.91e+01s (assemble 9.00e-04s)