Hello everyone!
I think my fenicsx code runs slowly (the CPU is only 14% used, but the memory is full). Is there any way to speed it up? (like GPU or something)
FEniCS relies on MPI to accelerate code. Have you ran the code with mpirun -n number_of_processes
?
secondly, how many processes, how much memory, and how large are the problems you are solving?
- Thanks for your advice! I’m sorry I don’t know how to describe the processes and the size of the problem, but I know that I have a 15GB memory.
Could you print the numbers:
print(f"Number of cells {mesh.topology.index_map(mesh.topology.dim).size_global}")
print(f"Number of degrees of freedom {V.dofmap.index_map.size_global * V.dofmap.index_map_bs}")
where mesh
mesh in DOLFINx, and V
the function space of your unknown(s).
Additionally, it would be useful to see what solver you are using (a minimal example would help a long way).
Thanks! I will try it now! I will also try to give a minimal example, but it may have to contain GitHub - bayswiss/autogen_LC_PML: auto-generating locally-conformal Perfectly Matched Layer for FEniCSx since I would apply PML. My code still has the problem of a infinity consequence of the integration of T, I believe a minimal example is also helpful for this.
Dear dokken,
The output is:
Number of cells 7596
Number of degrees of freedom 396145
This is my minimal example:
import numpy as np
import ufl
from dolfinx import geometry,mesh, fem
from dolfinx.fem.petsc import LinearProblem
from ufl import dx, grad, inner, Measure
from autogen_LC_PML import PML_Functions
from mpi4py import MPI
import dolfinx.fem.petsc
import basix.ufl
from dolfinx import default_scalar_type
# PML
Num_layers = 4 # number of PML elements layers
d_PML = 0.02 # total thickness of the PML layer
mesh_size_max = 0.1 # the mesh will be created entirely in gmsh. this sets its maximum size
# approximation space polynomial degree
deg = 2
# cad name
CAD_name = 'cube.step'
# PML Functions needed for the variational formulation
LAMBDA_PML, detJ, omega, k0, msh, cell_tags, facet_tags = PML_Functions(CAD_name, mesh_size_max, Num_layers, d_PML,elem_degree=deg)
dx = Measure("dx", domain=msh, subdomain_data=cell_tags, metadata={"quadrature_degree": 3 * deg})
# define parameters
D=1.1e-4
nu=1e15
m_e=9.1e-31
e=1.6e-19
omega_p=1.4e16
epsilon_0=8.854e-12
mu_0=4 * np.pi * 1e-7
c0=299792458
t = dolfinx.fem.Constant(msh, default_scalar_type(0.0))#time step
dt = dolfinx.fem.Constant(msh, default_scalar_type(1e-16))
t.value = 0
def left_boundary(x):
return np.isclose(x[0], d_PML, atol=1e-8)
def right_boundary(x):
return np.isclose(x[0], 1.0 - d_PML, atol=1e-8)
def front_boundary(x):
return np.isclose(x[1], d_PML, atol=1e-8)
def back_boundary(x):
return np.isclose(x[1], 1.0 - d_PML, atol=1e-8)
def bottom_boundary(x):
return np.isclose(x[2], d_PML, atol=1e-8)
def top_boundary(x):
return np.isclose(x[2], 1.0 - d_PML, atol=1e-8)
T_element = basix.ufl.element("Lagrange", "tetrahedron", 1)
E_element = basix.ufl.element("Lagrange", "tetrahedron", 3, shape=(3,))
Mix = basix.ufl.mixed_element([T_element, E_element])
W = dolfinx.fem.functionspace(msh, Mix)
(T, E) = ufl.TrialFunctions(W)
(v_T,v_E) = ufl.TestFunctions(W)
w = dolfinx.fem.Function(W)
w_n = dolfinx.fem.Function(W)
T_n, E_n = ufl.split(w_n)
#noundary condition
msh.topology.create_connectivity(msh.topology.dim - 1, msh.topology.dim)
left_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, left_boundary)
right_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, right_boundary)
front_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, front_boundary)
back_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, back_boundary)
bottom_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, bottom_boundary)
top_facets = mesh.locate_entities_boundary(msh, msh.topology.dim-1, top_boundary)
boundary_dofs_x_left = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(0), msh.topology.dim-1, left_facets)
boundary_dofs_x_right = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(0), msh.topology.dim-1, right_facets)
boundary_dofs_y_front = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(1), msh.topology.dim-1, front_facets)
boundary_dofs_y_back = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(1), msh.topology.dim-1, back_facets)
boundary_dofs_z_bottom = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(2), msh.topology.dim-1, bottom_facets)
boundary_dofs_z_top = dolfinx.fem.locate_dofs_topological(W.sub(1).sub(2), msh.topology.dim-1, top_facets)
bc_Tx_left = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_x_left, W.sub(0))
bc_Tx_right = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_x_right, W.sub(0))
bc_Ty_front = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_y_front, W.sub(0))
bc_Ty_back = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_y_back, W.sub(0))
bc_Tz_bottom = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_z_bottom, W.sub(0))
bc_Tz_top = dolfinx.fem.dirichletbc(dolfinx.fem.Constant(msh, default_scalar_type(300.0)), boundary_dofs_z_top, W.sub(0))
bcs = [bc_Tx_left,bc_Tx_right,bc_Ty_front,bc_Ty_back,bc_Tz_bottom,bc_Tz_top,]# bc of temperature, I consider temperature remains 300K on the boundariea
# initial condition
w_n.sub(0).interpolate(lambda x: np.full(x.shape[1], 300.0, dtype=default_scalar_type))
w_n.sub(1).interpolate(lambda x: np.zeros((3, x.shape[1]), dtype=default_scalar_type))
F_heat = (T - T_n) / dt * ufl.conj(v_T) * dx(1) + D * ufl.inner(ufl.grad(T_n), ufl.grad(v_T)) * dx(1)
F_ampere = ufl.inner(ufl.curl(E_n), v_E) * dx(1) - epsilon_0 * ufl.inner((E - E_n) / dt, v_E) * dx(1) + (4 * np.pi * e/c0) * ufl.inner(E_n, v_E) * dx(1)
F_PML_E = inner(LAMBDA_PML * grad(E), grad(v_E)) * dx(2) - detJ * k0 ** 2 * inner(E, v_E) * dx(2)
F = F_heat + F_ampere + F_PML_E
T_final = 1e-12 # 1 ps
num = 0
# building the problem
wh = dolfinx.fem.Function(W)
wh.name = "u"
a, L = ufl.system(F)
problem = dolfinx.fem.petsc.LinearProblem(a, L, u=wh, bcs=bcs)
petsc_options = {"ksp_type": "preonly","pc_type": "lu","pc_factor_mat_solver_type": "mumps"}
while t.value < T_final:# time step
t.value += dt.value
problem.solve()
total_em_energy = fem.assemble_scalar(fem.form(wh.sub(0) * dx(1)))
w_n.x.array[:] = wh.x.array # update w
num += 1
print(f"Step {num} | Time: {t.value:.2e} s | integration of T in domain: {total_em_energy:.4e} ")
Please note, as I do not have your step file, there is no way I can run this.
Please note that the initial thing you can do is to call mpirun -n 2 python3 name_of_your_script.py
and see if it gives an increased performance.
Thanks for your advice! I’m sorry that i didn’t give how cube.step is created.
It’s created by thi
import gmsh gmsh.initialize() gmsh.model.add("cube") gmsh.model.occ.addBox(0,0,0, 1,1,1) gmsh.model.occ.synchronize() gmsh.write("cube.step") gmsh.finalize()
And i will try your advice!
Thank you it helps a lot!
You can experiment with how many processes is optimal to use, as it is problem and mesh dependent.