Dot product without assembling vectors every time

Hello everyone,

I have a question about how to perform an operation more efficiently in dolfinx.

I am solving an unsteady PDE, and at each iteration I have to compute a dot product of two vectors. The way I am doing it works, but it gets a bit expensive when I increase the resolution of the mesh. I guess this is because I am assembling these two vectors at every time step. Please see below a minimal example of how I compute the Solution variable at every iteration:

from dolfinx import fem, mesh, io, plot, cpp, graph
import ufl
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
from petsc4py.PETSc import ScalarType
rank = MPI.COMM_WORLD.rank

# Define mesh
domain = mesh.create_rectangle(MPI.COMM_WORLD, [[0, 0],[1, 1]], [100, 100])
n = ufl.FacetNormal(domain)

# Define function space
el1 = ufl.VectorElement("Lagrange", domain.ufl_cell(), 2)
el2 = ufl.FiniteElement("Lagrange", domain.ufl_cell(), 1)
mel = ufl.MixedElement([el1, el2])
Y = fem.FunctionSpace(domain, mel)

# Define trial and test functions
u, p = ufl.TrialFunctions(Y)
v, q = ufl.TestFunctions(Y)

# Define solution vector
q_n = fem.Function(Y)
u_n = q_n.sub(0)
p_n = q_n.sub(1)

def sigma(u, p, Re):
    return (ufl.grad(u) + ufl.transpose(ufl.grad(u)) - fem.Constant(domain,2.0/3.0)*ufl.Identity(u.geometric_dimension())*ufl.div(u))/Re - p*ufl.Identity(u.geometric_dimension())

# Define vectors
V1 = fem.petsc.assemble_vector( fem.form(ufl.dot(ufl.dot(sigma(u_n,p_n,1000),n),v)*ufl.ds) )
V2 = fem.petsc.assemble_vector( fem.form(ufl.dot(n ,v)*ufl.ds) )

# Compute solution    
Solution = V1.dot(V2)

As you see, I am assembling V1 and V2. I have checked that this is what is making the code slow. Is there any way of obtaining the Solution more efficiently?

Thank you very much in advance!

It would be good to get the actual code you are running at every time step, with timings (as I don’t see why this dot product in itself should be expensive).

Thanks @dokken for the quick reply.

Below is the code (I tried to simplify as much as I could, so it’s a non-physical problem). The last three lines are the important ones. You can see that assembling the vectors V1 and V2 at every iteration is what is taking most of the computation time.

import numpy as np
from mpi4py import MPI
import dolfinx
from dolfinx.fem import (Constant, Function, FunctionSpace, apply_lifting, assemble_matrix, 
                         assemble_scalar, assemble_vector, locate_dofs_topological, set_bc, 
                         dirichletbc, form)
from dolfinx import fem
from dolfinx import io
from ufl import (inner, div, dx, nabla_grad, sym, Identity, as_tensor, Dn, grad, ds, rhs, lhs, Measure, transpose, as_tensor, 
                dot, FacetNormal, indices)
from petsc4py import PETSc
import ufl
rank = MPI.COMM_WORLD.rank

# Parameters
L = 1
num_steps = 2000 # number of steps
dt = 1e-3 # time step
T = num_steps*dt # simulation time
Re = 10000
gdim = 2

# Initialise meshing process
import gmsh
gmsh.initialize()

ms = 0.1
# Geometry
if rank == 0:
    # Points
    gmsh.model.occ.addPoint(0, 0, 0, ms, 101)
    gmsh.model.occ.addPoint(L, 0, 0, ms, 102)
    gmsh.model.occ.addPoint(L, L, 0, ms, 103)
    gmsh.model.occ.addPoint(0, L, 0, ms, 104)
    gmsh.model.occ.addLine(101, 102, 1001) 
    gmsh.model.occ.addLine(102, 103, 1002) 
    gmsh.model.occ.addLine(103, 104, 1003)
    gmsh.model.occ.addLine(104, 101, 1004)
    gmsh.model.occ.addCurveLoop([1001, 1002, 1003, 1004], 50001)
    gmsh.model.occ.addPlaneSurface([50001], tag=500001)
    gmsh.model.occ.synchronize()

# Domain markers
fluid_marker = 1
if rank == 0:
    volumes = gmsh.model.getEntities(dim=gdim)
    gmsh.model.addPhysicalGroup(volumes[0][0], [volumes[0][1]], fluid_marker)
    gmsh.model.setPhysicalName(volumes[0][0], fluid_marker, "Fluid")

# Generate mesh
if rank == 0:
    gmsh.model.mesh.generate(gdim)
    gmsh.model.mesh.optimize("Netgen")

# Create Dolfinx mesh and facet tags
from dolfinx.io import (VTXWriter, distribute_entity_data, gmshio)
mesh, cell_markers, ft = io.gmshio.model_to_mesh(gmsh.model, MPI.COMM_WORLD, 0, gdim=gdim)

# Define normal vector
n = FacetNormal(mesh)

''' Define function spaces '''
from ufl import VectorElement, FiniteElement, MixedElement, TrialFunctions, TestFunctions
# Velocity element space
v_cg2 = VectorElement('Lagrange', mesh.ufl_cell(), 2) # Order 2
V = FunctionSpace(mesh, v_cg2)
# Pressure element space
p_cg1 = FiniteElement('Lagrange', mesh.ufl_cell(), 1) # Order 1
Q = FunctionSpace(mesh, p_cg1)
# Mixed element space
mel = MixedElement([v_cg2, p_cg1])
Y = FunctionSpace(mesh,mel)
# Trial and test functions
u, p = TrialFunctions(Y)
v, q = TestFunctions(Y)

# Create initial condition
q_n = Function(Y)
q_n.name = "q_n"
p_n = q_n.sub(1)
p_n.name = "p_n"
u_n = q_n.sub(0)
u_n.name = "u_n"

# Create facet to cell connectivity required to determine boundary facets
tdim = mesh.topology.dim
fdim = tdim - 1
mesh.topology.create_connectivity(fdim, tdim)
boundary_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)

# Boundary conditions
u_D_W = fem.Function(Y).sub(0)
bc = [fem.dirichletbc(u_D_W, fem.locate_dofs_topological(Y.sub(0), fdim, boundary_facets))]

# Stress tensor
def tao(u, Re):
    return ( (ufl.grad(u) + ufl.transpose(ufl.grad(u))  - fem.Constant(mesh,2.0/3.0)*ufl.Identity(u.geometric_dimension())*ufl.div(u))/Re )

def sigma(u, p, Re):
    return (ufl.grad(u) + ufl.transpose(ufl.grad(u)) - fem.Constant(mesh,2.0/3.0)*ufl.Identity(u.geometric_dimension())*ufl.div(u))/Re - p*ufl.Identity(u.geometric_dimension())

# Numerical parameters
k = Constant(mesh, dt)

# Variational Form 
T_comp = ufl.inner(u,v)*ufl.dx
T_comp += ufl.inner(p,q)*ufl.dx
S_comp = ufl.inner(p,ufl.div(v))*ufl.dx
S_comp -= ufl.inner(tao(u,Re),ufl.grad(v))*ufl.dx
S_comp -= ufl.inner(ufl.div(u),q)*ufl.dx
B_comp = ufl.inner(ufl.dot(sigma(u,p,Re),n),v)*ufl.ds

T_comp_old = ufl.inner(u_n,v)*ufl.dx
T_comp_old += ufl.inner(p_n,q)*ufl.dx
S_comp_old = ufl.inner(p_n,ufl.div(v))*ufl.dx
S_comp_old -= ufl.inner(tao(u_n,Re),ufl.grad(v))*ufl.dx
S_comp_old -= ufl.inner(ufl.div(u_n),q)*ufl.dx
B_comp_old = ufl.inner(ufl.dot(sigma(u_n,p_n,Re),n),v)*ufl.ds

Form = (1/k)*(T_comp - T_comp_old) - (1/2)*(S_comp + S_comp_old) - (1/2)*(B_comp + B_comp_old)

# Bilinear and linear form
a = fem.form(ufl.lhs(Form))
L = fem.form(ufl.rhs(Form))

# Assemble left-hand side matrix A and right-hand side vector b
A = fem.petsc.assemble_matrix(a, bcs=bc)
A.assemble()
b = fem.petsc.create_vector(L)

# Linear system solver
solver = PETSc.KSP().create(mesh.comm)
solver.setOperators(A)
solver.setType(PETSc.KSP.Type.GMRES)

import tqdm.notebook
progress = tqdm.tqdm(desc="Solving PDE", total=num_steps)
t = 0
for iterations in range(num_steps):
    progress.update(1)
    t += dt

    # Update the right hand side reusing the initial vector
    with b.localForm() as loc_b:
        loc_b.set(0)
    fem.petsc.assemble_vector(b, L)
    
    # Apply Dirichlet boundary condition to the vector
    apply_lifting(b, [a], [bc])
    b.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
    set_bc(b, bc)

    # Solve linear problem
    solver.solve(b, q_n.vector)
    q_n.x.scatter_forward()
    u_n.x.scatter_forward()
    p_n.x.scatter_forward()

    # Define vectors
    V1 = fem.petsc.assemble_vector( fem.form(ufl.dot(ufl.dot(sigma(u_n,p_n,Re),n),v)*ufl.ds) )
    V2 = fem.petsc.assemble_vector( fem.form(ufl.dot(n ,v)*ufl.ds) )

    # Compute solution    
    Solution = V1.dot(V2)

There are many optimizations you can do here

  1. Move the form(....) definition outside the for loop
  2. Create the vector you assemble into outside the loop.

Your current code yields:

Assemble: 3.22e-03
Dot: 4.31e-06

Optimized code that should work in parallel:

import time
from ufl import VectorElement, FiniteElement, MixedElement, TrialFunctions, TestFunctions
import gmsh
import numpy as np
from mpi4py import MPI
import dolfinx
from dolfinx.fem import (Constant, Function, FunctionSpace, apply_lifting,
                         set_bc)
from dolfinx import fem
from dolfinx import io
from ufl import (FacetNormal, indices)
from petsc4py import PETSc
import ufl
rank = MPI.COMM_WORLD.rank

# Parameters
L = 1
num_steps = 2000  # number of steps
dt = 1e-3  # time step
T = num_steps*dt  # simulation time
Re = 10000
gdim = 2

# Initialise meshing process
gmsh.initialize()

ms = 0.1
# Geometry
if rank == 0:
    # Points
    gmsh.model.occ.addPoint(0, 0, 0, ms, 101)
    gmsh.model.occ.addPoint(L, 0, 0, ms, 102)
    gmsh.model.occ.addPoint(L, L, 0, ms, 103)
    gmsh.model.occ.addPoint(0, L, 0, ms, 104)
    gmsh.model.occ.addLine(101, 102, 1001)
    gmsh.model.occ.addLine(102, 103, 1002)
    gmsh.model.occ.addLine(103, 104, 1003)
    gmsh.model.occ.addLine(104, 101, 1004)
    gmsh.model.occ.addCurveLoop([1001, 1002, 1003, 1004], 50001)
    gmsh.model.occ.addPlaneSurface([50001], tag=500001)
    gmsh.model.occ.synchronize()

# Domain markers
fluid_marker = 1
if rank == 0:
    volumes = gmsh.model.getEntities(dim=gdim)
    gmsh.model.addPhysicalGroup(volumes[0][0], [volumes[0][1]], fluid_marker)
    gmsh.model.setPhysicalName(volumes[0][0], fluid_marker, "Fluid")

# Generate mesh
if rank == 0:
    gmsh.model.mesh.generate(gdim)
    gmsh.model.mesh.optimize("Netgen")

# Create Dolfinx mesh and facet tags
mesh, cell_markers, ft = io.gmshio.model_to_mesh(
    gmsh.model, MPI.COMM_WORLD, 0, gdim=gdim)

# Define normal vector
n = FacetNormal(mesh)

''' Define function spaces '''
# Velocity element space
v_cg2 = VectorElement('Lagrange', mesh.ufl_cell(), 2)  # Order 2
V = FunctionSpace(mesh, v_cg2)
# Pressure element space
p_cg1 = FiniteElement('Lagrange', mesh.ufl_cell(), 1)  # Order 1
Q = FunctionSpace(mesh, p_cg1)
# Mixed element space
mel = MixedElement([v_cg2, p_cg1])
Y = FunctionSpace(mesh, mel)
# Trial and test functions
u, p = TrialFunctions(Y)
v, q = TestFunctions(Y)

# Create initial condition
q_n = Function(Y)
q_n.name = "q_n"
p_n = q_n.sub(1)
p_n.name = "p_n"
u_n = q_n.sub(0)
u_n.name = "u_n"

# Create facet to cell connectivity required to determine boundary facets
tdim = mesh.topology.dim
fdim = tdim - 1
mesh.topology.create_connectivity(fdim, tdim)
boundary_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)

# Boundary conditions
u_D_W = fem.Function(Y).sub(0)
bc = [fem.dirichletbc(u_D_W, fem.locate_dofs_topological(
    Y.sub(0), fdim, boundary_facets))]

# Stress tensor


def tao(u, Re):
    return ((ufl.grad(u) + ufl.transpose(ufl.grad(u)) - fem.Constant(mesh, 2.0/3.0)*ufl.Identity(len(u))*ufl.div(u))/Re)


def sigma(u, p, Re):
    return (ufl.grad(u) + ufl.transpose(ufl.grad(u)) - fem.Constant(mesh, 2.0/3.0)*ufl.Identity(len(u))*ufl.div(u))/Re - p*ufl.Identity(len(u))


# Numerical parameters
k = Constant(mesh, dt)

# Variational Form
T_comp = ufl.inner(u, v)*ufl.dx
T_comp += ufl.inner(p, q)*ufl.dx
S_comp = ufl.inner(p, ufl.div(v))*ufl.dx
S_comp -= ufl.inner(tao(u, Re), ufl.grad(v))*ufl.dx
S_comp -= ufl.inner(ufl.div(u), q)*ufl.dx
B_comp = ufl.inner(ufl.dot(sigma(u, p, Re), n), v)*ufl.ds

T_comp_old = ufl.inner(u_n, v)*ufl.dx
T_comp_old += ufl.inner(p_n, q)*ufl.dx
S_comp_old = ufl.inner(p_n, ufl.div(v))*ufl.dx
S_comp_old -= ufl.inner(tao(u_n, Re), ufl.grad(v))*ufl.dx
S_comp_old -= ufl.inner(ufl.div(u_n), q)*ufl.dx
B_comp_old = ufl.inner(ufl.dot(sigma(u_n, p_n, Re), n), v)*ufl.ds

Form = (1/k)*(T_comp - T_comp_old) - (1/2) * \
    (S_comp + S_comp_old) - (1/2)*(B_comp + B_comp_old)

# Bilinear and linear form
a = fem.form(ufl.lhs(Form))
L = fem.form(ufl.rhs(Form))

# Assemble left-hand side matrix A and right-hand side vector b
A = fem.petsc.assemble_matrix(a, bcs=bc)
A.assemble()
b = fem.petsc.create_vector(L)

# Linear system solver
solver = PETSc.KSP().create(mesh.comm)
solver.setOperators(A)
solver.setType(PETSc.KSP.Type.GMRES)
t = 0
V1_form = fem.form(
    fem.form(ufl.dot(ufl.dot(sigma(u_n, p_n, Re), n), v)*ufl.ds))
V1 = fem.Function(Y)
V2_form = fem.form(ufl.dot(n, v)*ufl.ds)
V2 = fem.Function(Y)
for iterations in range(num_steps):
    t += dt

    # Update the right hand side reusing the initial vector
    with b.localForm() as loc_b:
        loc_b.set(0)
    fem.petsc.assemble_vector(b, L)

    # Apply Dirichlet boundary condition to the vector
    apply_lifting(b, [a], [bc])
    b.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES,
                  mode=PETSc.ScatterMode.REVERSE)
    set_bc(b, bc)

    # Solve linear problem
    solver.solve(b, q_n.vector)
    q_n.x.scatter_forward()
    u_n.x.scatter_forward()
    p_n.x.scatter_forward()
    start = time.perf_counter()

    # Define vectors
    with V1.vector.localForm() as loc_v1, V2.vector.localForm() as loc_v2:
        loc_v1.set(0)
        loc_v2.set(0)
    fem.petsc.assemble_vector(V1.vector, V1_form)
    fem.petsc.assemble_vector(V2.vector, V2_form)
    V1.vector.ghostUpdate(addv=PETSc.InsertMode.ADD,
                          mode=PETSc.ScatterMode.REVERSE)
    V2.vector.ghostUpdate(addv=PETSc.InsertMode.ADD,
                          mode=PETSc.ScatterMode.REVERSE)
    end = time.perf_counter()
    print(f"Assemble: {end-start:.2e}")

    # Compute solution
    start = time.perf_counter()
    Solution = V1.vector.dot(V2.vector)
    end = time.perf_counter()
    print(f"Dot: {end-start:.2e}")
Assemble: 6.61e-05
Dot: 2.71e-06
Assemble: 6.55e-05
Dot: 2.47e-06

Thank you very much.
What version of petsc4py.PETSc.Vec are you using?

I get the error:

AttributeError: ‘petsc4py.PETSc.Vec’ object has no attribute ‘vector’

when I do V1.vector for example.

Note that I defined V1 as

not a petsc vector,
and then not define V1 through assembly:

You are right, I did a mistake.

Thank you very much for this, it is working very well now!