Hi,
I figured out that some sections of my python code are quite slow. And I’d like to fix this. For example:
for i in range(M):
for j in range(i, M):
Mass_matrix[i, j] = assemble(kappa*Psi_array[i]*Psi_array[j]*dx)
Where Psi_array
contains fenics solutions obtained by applying different boundary conditions to the elliptic PDE.
# MINIMAL WORKING EXAMPLE
import time
import numpy as np
from ufl import *
from dolfin import *
N = 32
M = 4*N
mesh = UnitSquareMesh(N, N)
V = FunctionSpace(mesh, 'P', 1)
Psi_array = []
kappa = Function(V)
kappa.vector()[:] = 1.
# Create simple functions for easier debugging
# In this case, Mass_matrix_ij = i*j
# Stiffness_matrix_ij = 0
for i in range(M):
psi = Function(V)
psi.vector()[:] = i
Psi_array.append(psi)
Mass_matrix = np.empty((M, M))
elapsed_time =- time.time()
#----
for i in range(M):
for j in range(i, M):
Mass_matrix[i, j] = assemble(kappa*Psi_array[i]*Psi_array[j]*dx)
#----
elapsed_time += time.time()
print(f'Calculating mass matrix has taken {elapsed_time:.2f} s')
# Don't take into account transpose operation
ij_lower = np.tril_indices(M, -1)
Mass_matrix[ij_lower] = Mass_matrix.T[ij_lower]
Stiffness_matrix = np.empty((M, M))
elapsed_time =- time.time()
#----
for i in range(M):
for j in range(i, M):
Stiffness_matrix[i, j] = assemble(
kappa*dot(grad(Psi_array[i]), grad(Psi_array[j]))*dx)
#----
elapsed_time += time.time()
print(f'Calculating stiffness matrix has taken {elapsed_time:.2f} s')
# Don't take into account transpose operation
ij_lower = np.tril_indices(M, -1)
Stiffness_matrix[ij_lower] = Stiffness_matrix.T[ij_lower]
Moreover, these slow parts recur several times in the program. I wonder if it is a good idea to change nested python for-loop with its c++ equivalent?
In my problem, I could try the following workaround:
- Write cpp module
//MINIMAL NON-WORKING EXAMPLE
#include <dolfin.h>
#include "Neighborhood.h"
#define M 256
#define N 32
using namespace dolfin;
using array_2d = std::vector<std::vector<double>>;
double
build_mass_matrix(array_2d F, std::vector<double>& K) {
auto mesh = std::make_shared<UnitSquareMesh>(N, N);
auto V = std::make_shared<Neighborhood::FunctionSpace>(mesh);
auto form_defined_in_UFL_FILE = Neighborhood::<Don't know what to write here> // need you help
auto u = std::make_shared<Function>(V);
auto v = std::make_shared<Function>(V);
auto k = std::make_shared<Function>(V);
k->vector()->set_local(K);
array_2d Mass_matrix(m, std::vector<double>(m));
for (int i=0; i<M; ++i) {
u->vector()->set_local(F[i]);
for (int j=i; j<M; ++j) {
v->vector()->set_local(F[j]);
<connect u,v,k somehow with form_defined_in_UFL_FILE> // need your help
Mass_matrix[i][j] = assemble(form_defined_in_UFL_FILE);
}
}
return Mass_matrix
}
- Write UFL file
element = FiniteElement("Lagrange", triangle, 1)
u = Coefficient(element) # Maybe, I should use sth instead of Coefficient
v = Coefficient(element) # Maybe, I should use sth instead of Coefficient
k = Coefficient(element) # Maybe, I should use sth instead of Coefficient
form_define_in_UFL_FILE = k*u*v*dx # I am not sure if wrote it right
- Wrap cpp module and header with pybind11
PYBIND11_MODULE(cutils, m) {
m.doc() = "A faster construction of mass and stiffness matrices with C++";
m.def(
"build_mass_matrix",
&build_mass_matrix,
"<K\\grad(u_i), \\grad(u_j)>");
m.def(
"build_stiffness_matrix",
&build_stiffness_matrix,
"<K u_i, u_j>");
}
- Build and link libraries (I don’t know how to do this and am not sure if dolfin’s
compile_cpp_code
can help) - Exploit the written library in my python program
# Obtain Psi_array and define Kappa
Psi_values = np.empty((M, (N+1)*(N+1)), 'f8')
for i in range(M):
Psi_values[i] = Psi_array[i].vector().get_local()
Mass_matrix = build_mass_matrix(Psi_values, Kappa)
It would be great if someone could tell me what to fix in cpp implementation or show me another way how to assemble the matrices faster (maybe, just using Python - this is why I put MWE in the beginning - to present the gist of what I want to do).