Efficient way to sum up fem.function's

Hello,
I was wondering if

from dolfinx import fem, mesh

msh = mesh.create_unit_square(MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)
V = fem.FunctionSpace(msh, ("Lagrange", 1))

a = fem.Function(V)
b = fem.Function(V)
c = fem.Function(V)
d = fem.Function(V)

a.x.array[:] = b.x.array[:] - 5*c.x.array[:] + 2*d.x.array[:] 

is the most effective way to sum up several fem.functions.

I have something similar in a time loop and it seems it really slows down the computation.

This should be rather fast (as it is just accessing numpy arrays).

Consider for instance this following minimal example:

from dolfinx import fem, mesh
from mpi4py import MPI
import time
N = 100
msh = mesh.create_unit_cube(
    MPI.COMM_WORLD, N, N, N)
V = fem.FunctionSpace(msh, ("Lagrange", 1))

a = fem.Function(V)
b = fem.Function(V)
c = fem.Function(V)
d = fem.Function(V)


start = time.perf_counter()
a.x.array[:] = b.x.array - 5*c.x.array + 2*d.x.array
end = time.perf_counter()
print(len(a.x.array), end-start)

Running this on one process yields

1030301 0.03194394599995576

With four processes

265155 0.005039124996983446
269989 0.005096022003272083
268685 0.005063231998065021
270001 0.005096314998809248

If you have an example where you can time that this operation is slow, please add it to the post.

Ok, so I am solving a homogenous linear wave equation

image

image

image

and for the time derivatives I am considering the central scheme

image

or Newmark’s scheme solving for image

image

Code:

from mpi4py import MPI
from dolfinx import fem, mesh

import dolfinx.fem.petsc
import ufl, time

import numpy as np

msh = mesh.create_unit_square(MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)

t = fem.Constant(msh, 0.)
dt = fem.Constant(msh, 5e-3)
T = 1 
c = 1

V = fem.FunctionSpace(msh, ("Lagrange", 1))
f = fem.Function(V)

uh = fem.Function(V)
v = ufl.TestFunction(V)
u = ufl.TrialFunction(V)

def u_init(x, sigma=0.05, mu=0.5):
    return np.exp(-0.5*((x[0]-mu)/sigma)**2)*np.exp(-0.5*((x[1]-mu)/sigma)**2) 
    
TIME_SCHEME = "central"

if TIME_SCHEME == "central":
    u_n = fem.Function(V)
    u_nn = fem.Function(V)
        
    dudtdt = (u - 2 * u_n + u_nn) / dt**2
    du = (u + u_nn) / 2

    u_n.interpolate(u_init)
    u_nn.interpolate(u_init)

elif TIME_SCHEME == "Newmark":
    u_n = fem.Function(V)
    dudt_n = fem.Function(V)
    dudtdt_n = fem.Function(V)

    u_temp = fem.Function(V)
    dudt_temp = fem.Function(V)

    beta = 0.25
    gamma = 0.5

    dudtdt = u
    du = u_n + dudt_n*dt + ((1-beta)*dudtdt_n + 2*beta*dudtdt) * dt**2/2

    u_n.interpolate(u_init)
    dudt_n.interpolate(u_init)
    dudtdt_n.interpolate(u_init)
    
F = 1 / c**2 * ufl.inner(dudtdt, v) * ufl.dx \
    + ufl.inner(ufl.grad(du), ufl.grad(v)) * ufl.dx \
    - ufl.dot(f, v) * ufl.dx 

a, L = ufl.system(F)
petsc_options = {"ksp_type": "preonly",
                "pc_type": "lu", "pc_factor_mat_solver_type": "mumps"}
problem = dolfinx.fem.petsc.LinearProblem(a, L, u=uh, bcs=[], petsc_options=petsc_options)

start = time.perf_counter()
while t.value < T:
    t.value += dt.value
    problem.solve()
                            
    if TIME_SCHEME == "central":
        u_nn.x.array[:] = u_n.x.array
        u_n.x.array[:] = uh.x.array  
        
    elif TIME_SCHEME == "Newmark":
        u_temp.x.array[:] = u_n.x.array + dudt_n.x.array*dt + ((1-beta)*dudtdt_n.x.array + 2*beta*uh.x.array) * dt**2/2
        dudt_temp.x.array[:] = dudt_n.x.array + ((1-gamma)*dudtdt_n.x.array + gamma*uh.x.array) * dt

        u_n.x.array[:] = u_temp.x.array
        dudt_n.x.array[:] = dudt_temp.x.array
        dudtdt_n.x.array[:] = uh.x.array

end = time.perf_counter()
print(end-start)

Running this code with

TIME_SCHEME = "central"

takes

0.33813453299990215

but with

TIME_SCHEME = "Newmark"

it takes

40.131102617999204

This is because you are not wrapping dt as a float when multiplying it with a numpy array, creating large ufl expressions.

Consider the slightly modified MWE:

from mpi4py import MPI
from dolfinx import fem, mesh

import dolfinx.fem.petsc
import ufl
import time

import numpy as np

msh = mesh.create_unit_square(
    MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)

t = fem.Constant(msh, 0.)
dt = fem.Constant(msh, 5e-3)
T = 1
c = 1

V = fem.FunctionSpace(msh, ("Lagrange", 1))
f = fem.Function(V)

uh = fem.Function(V)
v = ufl.TestFunction(V)
u = ufl.TrialFunction(V)


def u_init(x, sigma=0.05, mu=0.5):
    return np.exp(-0.5*((x[0]-mu)/sigma)**2)*np.exp(-0.5*((x[1]-mu)/sigma)**2)


TIME_SCHEME = "Newmark"

if TIME_SCHEME == "central":
    u_n = fem.Function(V)
    u_nn = fem.Function(V)

    dudtdt = (u - 2 * u_n + u_nn) / dt**2
    du = (u + u_nn) / 2

    u_n.interpolate(u_init)
    u_nn.interpolate(u_init)

elif TIME_SCHEME == "Newmark":
    u_n = fem.Function(V)
    dudt_n = fem.Function(V)
    dudtdt_n = fem.Function(V)

    u_temp = fem.Function(V)
    dudt_temp = fem.Function(V)

    beta = 0.25
    gamma = 0.5

    dudtdt = u
    du = u_n + dudt_n*dt + ((1-beta)*dudtdt_n + 2*beta*dudtdt) * dt**2/2

    u_n.interpolate(u_init)
    dudt_n.interpolate(u_init)
    dudtdt_n.interpolate(u_init)

F = 1 / c**2 * ufl.inner(dudtdt, v) * ufl.dx \
    + ufl.inner(ufl.grad(du), ufl.grad(v)) * ufl.dx \
    - ufl.dot(f, v) * ufl.dx

a, L = ufl.system(F)
petsc_options = {"ksp_type": "preonly",
                 "pc_type": "lu", "pc_factor_mat_solver_type": "mumps"}
problem = dolfinx.fem.petsc.LinearProblem(
    a, L, u=uh, bcs=[], petsc_options=petsc_options)

start_loop = time.perf_counter()
times = []
while t.value < T:
    t.value += dt.value
    problem.solve()

    start = time.perf_counter()
    if TIME_SCHEME == "central":
        u_nn.x.array[:] = u_n.x.array
        u_n.x.array[:] = uh.x.array

    elif TIME_SCHEME == "Newmark":
        dt_c = float(dt)
        u_temp.x.array[:] = u_n.x.array + dudt_n.x.array*dt_c + \
            ((1-beta)*dudtdt_n.x.array + 2*beta*uh.x.array) * dt_c**2/2
        dudt_temp.x.array[:] = dudt_n.x.array + \
            ((1-gamma)*dudtdt_n.x.array + gamma*uh.x.array) * dt_c

        u_n.x.array[:] = u_temp.x.array
        dudt_n.x.array[:] = dudt_temp.x.array
        dudtdt_n.x.array[:] = uh.x.array
    end = time.perf_counter()
    times.append(end-start)
end_loop = time.perf_counter()
print(f"{TIME_SCHEME = }, Total loop: {end_loop-start_loop: .3e}, Assign: {sum(times): .3e}")

yielding

TIME_SCHEME = 'Newmark', Total loop:  5.241e-01, Assign:  1.689e-02
TIME_SCHEME = 'central', Total loop:  5.722e-01, Assign:  3.877e-03

By the way, so when should I use

dt = fem.Constant(msh, 5e-3) 

instead of just

dt = 5e-3

Dolfinx.fem.Constant should be used when you work with variational forms (ufl), which in turn will be assembled. If you work with raw data (the underlying numpy arrays, you should convert the constant to a float.

1 Like