Hello,
I was wondering if
from dolfinx import fem, mesh
msh = mesh.create_unit_square(MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)
V = fem.FunctionSpace(msh, ("Lagrange", 1))
a = fem.Function(V)
b = fem.Function(V)
c = fem.Function(V)
d = fem.Function(V)
a.x.array[:] = b.x.array[:] - 5*c.x.array[:] + 2*d.x.array[:]
is the most effective way to sum up several fem.functions.
I have something similar in a time loop and it seems it really slows down the computation.
dokken
April 20, 2024, 6:29pm
2
This should be rather fast (as it is just accessing numpy arrays).
Consider for instance this following minimal example:
from dolfinx import fem, mesh
from mpi4py import MPI
import time
N = 100
msh = mesh.create_unit_cube(
MPI.COMM_WORLD, N, N, N)
V = fem.FunctionSpace(msh, ("Lagrange", 1))
a = fem.Function(V)
b = fem.Function(V)
c = fem.Function(V)
d = fem.Function(V)
start = time.perf_counter()
a.x.array[:] = b.x.array - 5*c.x.array + 2*d.x.array
end = time.perf_counter()
print(len(a.x.array), end-start)
Running this on one process yields
1030301 0.03194394599995576
With four processes
265155 0.005039124996983446
269989 0.005096022003272083
268685 0.005063231998065021
270001 0.005096314998809248
If you have an example where you can time that this operation is slow, please add it to the post.
Ok, so I am solving a homogenous linear wave equation
and for the time derivatives I am considering the central scheme
or Newmark’s scheme solving for
Code:
from mpi4py import MPI
from dolfinx import fem, mesh
import dolfinx.fem.petsc
import ufl, time
import numpy as np
msh = mesh.create_unit_square(MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)
t = fem.Constant(msh, 0.)
dt = fem.Constant(msh, 5e-3)
T = 1
c = 1
V = fem.FunctionSpace(msh, ("Lagrange", 1))
f = fem.Function(V)
uh = fem.Function(V)
v = ufl.TestFunction(V)
u = ufl.TrialFunction(V)
def u_init(x, sigma=0.05, mu=0.5):
return np.exp(-0.5*((x[0]-mu)/sigma)**2)*np.exp(-0.5*((x[1]-mu)/sigma)**2)
TIME_SCHEME = "central"
if TIME_SCHEME == "central":
u_n = fem.Function(V)
u_nn = fem.Function(V)
dudtdt = (u - 2 * u_n + u_nn) / dt**2
du = (u + u_nn) / 2
u_n.interpolate(u_init)
u_nn.interpolate(u_init)
elif TIME_SCHEME == "Newmark":
u_n = fem.Function(V)
dudt_n = fem.Function(V)
dudtdt_n = fem.Function(V)
u_temp = fem.Function(V)
dudt_temp = fem.Function(V)
beta = 0.25
gamma = 0.5
dudtdt = u
du = u_n + dudt_n*dt + ((1-beta)*dudtdt_n + 2*beta*dudtdt) * dt**2/2
u_n.interpolate(u_init)
dudt_n.interpolate(u_init)
dudtdt_n.interpolate(u_init)
F = 1 / c**2 * ufl.inner(dudtdt, v) * ufl.dx \
+ ufl.inner(ufl.grad(du), ufl.grad(v)) * ufl.dx \
- ufl.dot(f, v) * ufl.dx
a, L = ufl.system(F)
petsc_options = {"ksp_type": "preonly",
"pc_type": "lu", "pc_factor_mat_solver_type": "mumps"}
problem = dolfinx.fem.petsc.LinearProblem(a, L, u=uh, bcs=[], petsc_options=petsc_options)
start = time.perf_counter()
while t.value < T:
t.value += dt.value
problem.solve()
if TIME_SCHEME == "central":
u_nn.x.array[:] = u_n.x.array
u_n.x.array[:] = uh.x.array
elif TIME_SCHEME == "Newmark":
u_temp.x.array[:] = u_n.x.array + dudt_n.x.array*dt + ((1-beta)*dudtdt_n.x.array + 2*beta*uh.x.array) * dt**2/2
dudt_temp.x.array[:] = dudt_n.x.array + ((1-gamma)*dudtdt_n.x.array + gamma*uh.x.array) * dt
u_n.x.array[:] = u_temp.x.array
dudt_n.x.array[:] = dudt_temp.x.array
dudtdt_n.x.array[:] = uh.x.array
end = time.perf_counter()
print(end-start)
Running this code with
TIME_SCHEME = "central"
takes
0.33813453299990215
but with
TIME_SCHEME = "Newmark"
it takes
40.131102617999204
dokken
April 20, 2024, 7:38pm
4
This is because you are not wrapping dt
as a float when multiplying it with a numpy array, creating large ufl expressions.
Consider the slightly modified MWE:
from mpi4py import MPI
from dolfinx import fem, mesh
import dolfinx.fem.petsc
import ufl
import time
import numpy as np
msh = mesh.create_unit_square(
MPI.COMM_WORLD, 30, 30, cell_type=mesh.CellType.quadrilateral)
t = fem.Constant(msh, 0.)
dt = fem.Constant(msh, 5e-3)
T = 1
c = 1
V = fem.FunctionSpace(msh, ("Lagrange", 1))
f = fem.Function(V)
uh = fem.Function(V)
v = ufl.TestFunction(V)
u = ufl.TrialFunction(V)
def u_init(x, sigma=0.05, mu=0.5):
return np.exp(-0.5*((x[0]-mu)/sigma)**2)*np.exp(-0.5*((x[1]-mu)/sigma)**2)
TIME_SCHEME = "Newmark"
if TIME_SCHEME == "central":
u_n = fem.Function(V)
u_nn = fem.Function(V)
dudtdt = (u - 2 * u_n + u_nn) / dt**2
du = (u + u_nn) / 2
u_n.interpolate(u_init)
u_nn.interpolate(u_init)
elif TIME_SCHEME == "Newmark":
u_n = fem.Function(V)
dudt_n = fem.Function(V)
dudtdt_n = fem.Function(V)
u_temp = fem.Function(V)
dudt_temp = fem.Function(V)
beta = 0.25
gamma = 0.5
dudtdt = u
du = u_n + dudt_n*dt + ((1-beta)*dudtdt_n + 2*beta*dudtdt) * dt**2/2
u_n.interpolate(u_init)
dudt_n.interpolate(u_init)
dudtdt_n.interpolate(u_init)
F = 1 / c**2 * ufl.inner(dudtdt, v) * ufl.dx \
+ ufl.inner(ufl.grad(du), ufl.grad(v)) * ufl.dx \
- ufl.dot(f, v) * ufl.dx
a, L = ufl.system(F)
petsc_options = {"ksp_type": "preonly",
"pc_type": "lu", "pc_factor_mat_solver_type": "mumps"}
problem = dolfinx.fem.petsc.LinearProblem(
a, L, u=uh, bcs=[], petsc_options=petsc_options)
start_loop = time.perf_counter()
times = []
while t.value < T:
t.value += dt.value
problem.solve()
start = time.perf_counter()
if TIME_SCHEME == "central":
u_nn.x.array[:] = u_n.x.array
u_n.x.array[:] = uh.x.array
elif TIME_SCHEME == "Newmark":
dt_c = float(dt)
u_temp.x.array[:] = u_n.x.array + dudt_n.x.array*dt_c + \
((1-beta)*dudtdt_n.x.array + 2*beta*uh.x.array) * dt_c**2/2
dudt_temp.x.array[:] = dudt_n.x.array + \
((1-gamma)*dudtdt_n.x.array + gamma*uh.x.array) * dt_c
u_n.x.array[:] = u_temp.x.array
dudt_n.x.array[:] = dudt_temp.x.array
dudtdt_n.x.array[:] = uh.x.array
end = time.perf_counter()
times.append(end-start)
end_loop = time.perf_counter()
print(f"{TIME_SCHEME = }, Total loop: {end_loop-start_loop: .3e}, Assign: {sum(times): .3e}")
yielding
TIME_SCHEME = 'Newmark', Total loop: 5.241e-01, Assign: 1.689e-02
TIME_SCHEME = 'central', Total loop: 5.722e-01, Assign: 3.877e-03
By the way, so when should I use
dt = fem.Constant(msh, 5e-3)
instead of just
dt = 5e-3
dokken
April 22, 2024, 4:38am
6
Dolfinx.fem.Constant should be used when you work with variational forms (ufl), which in turn will be assembled. If you work with raw data (the underlying numpy arrays, you should convert the constant to a float.
1 Like