Hi, I am recently trying to run a script on the cluster and distribute the job into multiple nodes, because I have a large model to calculate, which require large amount of memory for the solution. Just for test, I ran the helmholtz equation code below on the cluster with mpi:
from mpi4py import MPI
import dolfinx
from dolfinx import default_scalar_type,mesh
import numpy as np
from petsc4py import PETSc
from dolfinx.fem.petsc import assemble_vector
import ufl
import scifem
import dolfinx.fem as fem
from dolfinx.fem import functionspace, Function, locate_dofs_geometrical
from dolfinx.io import VTXWriter
from dolfinx.mesh import CellType, GhostMode, locate_entities
def left_part(x):
return (x[0]<0.3)
def runhelmholtz(omega,c,n):
# Wavenumber
omega0 = omega
c0 = c
k0 = omega0/c0
# Approximation space polynomial degree
deg = 1
# Number of elements in each direction of the mesh
n_elem = n
domain = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, n_elem, n_elem)
V = dolfinx.fem.functionspace(domain, ("Lagrange", deg))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)
V_ele = functionspace(domain, ("DG", 0))
u_c = dolfinx.fem.Function(V)
k = fem.Function(V_ele)
cells_l = locate_entities(domain, domain.topology.dim, left_part)
num_cells = domain.topology.index_map(2).size_local
cell_indices = list(range(num_cells))
cells_r = list(set(cell_indices) - set(cells_l))
k.x.array[cells_l] = np.full_like(cells_l, k0*(1+0.005j), dtype=default_scalar_type)
k.x.array[cells_r] = np.full_like(cells_r, k0/2*(1+0.005j), dtype=default_scalar_type)
f = dolfinx.fem.Function(V)
gamma=1
geom_dtype = domain.geometry.x.dtype
points = np.array([0.5, 0.2, 0], dtype=geom_dtype)
point_source = scifem.PointSource(V, points, magnitude=gamma)
point_source.apply_to_vector(f)
a = ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx - k ** 2 * ufl.inner(u, v) * ufl.dx
L = 1/c0**2*ufl.inner(f, v) * ufl.dx
domain.topology.create_connectivity(domain.topology.dim - 1, domain.topology.dim)
boundary_facets = dolfinx.mesh.exterior_facet_indices(domain.topology)
boundary_dofs = dolfinx.fem.locate_dofs_topological(V, domain.topology.dim - 1, boundary_facets)
# exclude the top dofs
def top_free(x):
return np.isclose(x[1],1.0)
def fix(x):
return np.logical_and(np.isclose(x[0],0.0),np.isclose(x[1],0.0))
fdim = domain.topology.dim - 1
edge_top = mesh.locate_entities_boundary(domain,fdim,top_free)
dof_fix = np.array(locate_dofs_geometrical(V, fix)).astype(np.int32)
top_dofs = dolfinx.fem.locate_dofs_topological(V, domain.topology.dim - 1, edge_top)
boundary_minus_top = np.setdiff1d(boundary_dofs, top_dofs)
#
bc = dolfinx.fem.dirichletbc(u_c, boundary_minus_top)
bcs = [bc]
amat = fem.petsc.assemble_matrix(fem.form(a), bcs=bcs)
amat.assemble()
Lmat = fem.petsc.assemble_vector(fem.form(L))
fem.petsc.apply_lifting(Lmat, [fem.form(a)], bcs=[bcs])
Lmat.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
fem.petsc.set_bc(Lmat, bcs)
Lmat.assemble()
ksp = PETSc.KSP().create(domain.comm)
ksp.setOperators(amat)
def monitor(ksp, it, rnorm):
print(f"iteration: {it} rnorm: {rnorm:.3e}")
ksp.setMonitor(monitor)
opts = PETSc.Options()
ksp.setOptionsPrefix("0_")
option_prefix = ksp.getOptionsPrefix()
# direct solver
opts[f"{option_prefix}ksp_type"] = "preonly"
opts[f"{option_prefix}pc_type"] = "lu"
opts[f"{option_prefix}pc_factor_mat_solver_type"] = "mumps"
# iterative solver
# opts[f"{option_prefix}ksp_type"] = "gmres"
# opts[f"{option_prefix}pc_type"] = "bjacobi"
# opts[f"{option_prefix}sub_pc_type"] = "ilu"
# opts[f"{option_prefix}ksp_max_it"] = 100
ksp.setFromOptions()
uh = fem.Function(V)
ksp.solve(Lmat,uh.x.petsc_vec)
# # Write both solutions for comparison
with VTXWriter(domain.comm, "uh_dirichlet_mpi.bp", [uh], engine="BP4") as vtx:
vtx.write(0.0)
omega=100*np.pi
c=2
runhelmholtz(omega,c,300)
And the sh file I used for submitting this job is:
#!/bin/bash
#
#SBATCH --nodes=2
#SBATCH --job-name=helmholtz_mpi4
#SBATCH --ntasks-per-node=4
#SBATCH -p common --account=xx
#SBATCH --output=output/%x_%j.out # Output file in the 'output' directory
#SBATCH --error=output/%x_%j.err # Error file in the 'output' directory
#SBATCH --mail-type=END,FAIL
#SBATCH --mail-user=xx.edu
source xx/miniconda3/etc/profile.d/conda.sh
conda activate fenicsx-complex-ssh
cd /hpc/group/xx/mpi_test
mpirun -np 8 python helmholtz_mpi.py -log_view
conda deactivate
It turns out that this will hang when it is solving the equation. However, when I change mpirun -np 8 into mpirun -np 4, it works, which means the calculation is done on only one node. Does anybody know what the issue is? I will really appreciate it if you can provide some advice on how to run the job on multi-node. Many thanks!