Parallel computing with mpirun

Ricky · July 20, 2020, 7:26am

Hi,
Will the mesh size or total number of meshed nodes affect the number of processors I can use? When I use different mesh sizes and mpirun -np 4, sometimes it works but sometimes doesn’t.
Thanks

dokken · July 20, 2020, 8:08am

In theory you should be able to use as many processors as there are cells in the mesh (would not advice it). Please provide a minimal working example that gives you an error message on different number of processors

Ricky · July 20, 2020, 9:06am

Thank you
Here is the minimal example. There is no error messages, but it seems take forever to run
b_u1=assemble(L_u1, tensor=b_u1) if using mpirun -np 4

from future import print_function
from fenics import *
from mshr import *
import numpy as np
import pdb
import gc
from sys import getsizeof
from numpy import array

if not has_linear_algebra_backend(“PETSc”) and not has_linear_algebra_backend(“Tpetra”):
info(“DOLFIN has not been configured with Trilinos or PETSc. Exiting.”)
exit()

if not has_krylov_solver_preconditioner(“amg”):
info("Sorry, this demo is only available when DOLFIN is compiled with AMG "
“preconditioner, Hypre or ML.”)
exit()

if has_krylov_solver_method(“minres”):
krylov_method = “minres”
elif has_krylov_solver_method(“tfqmr”):
krylov_method = “tfqmr”
else:
info("Default linear algebra backend was not compiled with MINRES or TFQMR "
“Krylov subspace method. Terminating.”)
exit()

Tf = 0.0025/0.87 # final time
num_steps = 6000 # number of time steps
dt1 = Tf / num_steps # time step size
dt1 =0.1E-6
mu_s=100000000
mu_l = 0.00392 # dynamic viscosity
#mu_s=mu_l
rho_l = 3750. # density
rho_s = 4420.
k_l=50
k_s=30.
T_s=1923.
T_l=1973.
#Cp=Constant(759.75) # specific heat ( Change later)
Cp_s=740.
Cp_l=830.
dH=286000.

Create mesh and define function spaces

#mesh = BoxMesh(Point(0.0,0.0,0.0), Point(0.0008,0.0006,0.0003),270,180,90)
mesh = BoxMesh(Point(0.0,0.0,0.0), Point(0.0003,0.0003,0.0003),40,40,40)
V = VectorElement(“CG”, mesh.ufl_cell(), 1)
Q = FiniteElement(“CG”, mesh.ufl_cell(), 1)
#pdb.set_trace()
W=FunctionSpace(mesh, V* Q)
T = FunctionSpace(mesh, ‘CG’, 1)
#q_degree = 2
#dx = dx(metadata={‘quadrature_degree’: q_degree})
#dof_x = T.tabulate_dof_coordinates().reshape(T.dim(),mesh.geometry().dim())
dof_x = T.tabulate_dof_coordinates()
xT = dof_x[:, 0]
yT = dof_x[:, 1]
zT = dof_x[:, 2]
print ('Velocity dofs = ', W.dim())
print ('Pressure dofs = ', T.dim())
#pdb.set_trace()

Define boundaries

boundary_parts = MeshFunction(“size_t”, mesh, mesh.topology().dim()-1)
tor=0.00001
top = AutoSubDomain(lambda x,on_bnd: x[2]>=0.0003-tor and on_bnd)
bot = AutoSubDomain(lambda x,on_bnd: x[2]<=0.0+tor and on_bnd)
left = AutoSubDomain(lambda x,on_bnd: x[0]<=0.0+tor and on_bnd)
right = AutoSubDomain(lambda x,on_bnd: x[0]>=0.0003-tor and on_bnd)
front = AutoSubDomain(lambda x,on_bnd: x[1]<=0.0+tor and on_bnd)
back = AutoSubDomain(lambda x,on_bnd: x[1]>=0.0003-tor and on_bnd)

top.mark(boundary_parts, 1)
bot.mark(boundary_parts, 2)
left.mark(boundary_parts, 3)
right.mark(boundary_parts, 4)
front.mark(boundary_parts, 5)
back.mark(boundary_parts, 6)

ds1 = Measure(“ds”, subdomain_id=1,subdomain_data=boundary_parts)

Define boundary conditions

top = DirichletBC(T, Constant(0), boundary_parts,1)
top_ind=np.fromiter(top.get_boundary_values().keys(),dtype=np.int)
#pdb.set_trace()

bcu_wall1 = DirichletBC(W.sub(0), Constant((0, 0,0)), boundary_parts,2)
bcu_wall2 = DirichletBC(W.sub(0), Constant((0, 0,0)), boundary_parts,3)
bcu_wall3 = DirichletBC(W.sub(0), Constant((0, 0,0)), boundary_parts,4)
bcu_wall5 = DirichletBC(W.sub(0).sub(1), Constant((0)), boundary_parts,5)
bcu_wall6 = DirichletBC(W.sub(0), Constant((0, 0,0)), boundary_parts,6)
bcu_wall4 = DirichletBC(W.sub(0).sub(2), Constant((0)), boundary_parts,1)
indd=np.fromiter(bcu_wall1.get_boundary_values().keys(),dtype=np.int)
bcu = [ bcu_wall1,bcu_wall2,bcu_wall3,bcu_wall4,bcu_wall5,bcu_wall6]

Define trial and test functions

up0 = Function(W)
u0,p0=up0.split(deepcopy=True)
u1 = as_vector((up0[0], up0[1], up0[2]))
p1 = up0[3]
up1 = Function(W)
up2 = Function(W)
upf = Function(W)
up_bc = Function(W)
up = TrialFunction(W)
u = as_vector((up[0],up[1], up[2]))
p = up[3]
vq = TestFunction(W)
v = as_vector((vq[0],vq[1], vq[2]))
q = vq[3]

Tu = TrialFunction(T)
Tv = TestFunction(T)

Define functions for solutions at previous and current time steps

T_n = Function(T)
T_ = Function(T)
T_n.interpolate(Constant(300.0))
tor1=1.e-10
up0.interpolate(Constant([tor1,tor1,tor1,tor1]))
fun_l=Function(T)
stepfn=Function(T)
mu=Function(T)
rho=Function(T)
k=Function(T)
Cp=Function(T)

U = 0.5*(u0 + u)
n = FacetNormal(mesh)
dt = Constant(dt1)

rho_c=rho_l
k_c=k_l
mu_c=mu_l
#alpham=1./2*((1-f_l(T_n,T_l,T_s,fun_l))*rho_s-f_l(T_n,T_l,T_s,fun_l)*rho_l)/((1-f_l(T_n,T_l,T_s,fun_l))*rho_s+f_l(T_n,T_l,T_s,fun_l)*rho_l)
Cp_c=Cp_l
#pdb.set_trace()

mu.vector()[:] = mu_c
rho.vector()[:] = rho_c
k.vector()[:] = k_c
Cp.vector()[:] = Cp_c

Define strain-rate tensor

def epsilon(u):
return sym(nabla_grad(u))

Define stress tensor

def sigma(u, p,mu):
return 2muepsilon(u) - p*Identity(len(u))

xT1=np.ascontiguousarray(xT, dtype=np.float32)
yT1=np.ascontiguousarray(yT, dtype=np.float32)

LSpower=85.0.36
R=3.75E-5
t = 0
#pdb.set_trace()
#flux=Expression('power/(piR2)exp(-2(x2)/R**2)’,power=power,R=R,pi=np.pi,x=xT)
x_cord = Function(T)
x_cord.vector()[:]=np.ascontiguousarray(xT, dtype=np.float32)
y_cord = Function(T)
y_cord.vector()[:]=np.ascontiguousarray(yT, dtype=np.float32)

Time-stepping

it=0
A_u1=None
AT=None
b_u1=None
bT=None
h = mesh.hmax()
def dtang_T(T_n,T):

dtang = Expression(('Tx', 'Ty','Tz'),Tx=project(T_n.dx(0),T,solver_type='cg', preconditioner_type='hypre_amg'), \
        Ty=project(T_n.dx(1),T,solver_type='cg', preconditioner_type='hypre_amg'),Tz=Function(T),degree=2)
return dtang

for n in range(num_steps):

Res = rho/dt*(u-u0) + rho*dot(u0,grad(u)) - (mu)*div(grad(u)+grad(u).T) + grad(p) 
tau_supg = Constant(((2.0/dt1)**2 + (2.0*sqrt(np.dot(u0.vector().get_local(),u0.vector().get_local()))/h)**2 + 9*(4.0*mu_l/rho_l/(h**2))**2)**(-0.5))

F_LS = (tau_supg*inner(Res,dot(u0, grad(v))-mu*div((grad(v)+grad(v).T))))*dx

F1 = rho*dot((u - u0) / dt, v)*dx + \
     rho*inner(v, dot(grad(u), u0))*dx \
     + (mu)*inner(grad(v), grad(u)+grad(u).T)*dx \
     - inner(div(v), p)*dx -inner(q, div(u))*dx- dot(Constant((-0.00008))*dtang_T(T_n,T),v)*ds1 \
     +F_LS 
#pdb.set_trace()     
a_u1 = lhs(F1)
L_u1 = rhs(F1)

#pdb.set_trace()
A_u1=assemble(a_u1, tensor=A_u1)
#pdb.set_trace()
b_u1=assemble(L_u1, tensor=b_u1)
#pdb.set_trace()
[bc.apply(A_u1,b_u1) for bc in bcu]

precond = mu*inner(grad(u)+grad(u).T, nabla_grad(v))*dx \
  +rho*inner(v, dot(grad(u), u0))*dx +dt/rho *inner(grad(p), grad(q))*dx + dt/rho *p*q*dx+(0.01/mu_l) * p*q*dx\
  +rho/dt*inner(u, v)*dx
P, btmp = assemble_system(precond, L_u1, bcu)

solver = PETScKrylovSolver(krylov_method,'amg')
solver.set_operators(A_u1, P)
solver.parameters['monitor_convergence'] = False
solver.parameters['maximum_iterations'] = 1000

solver.parameters['relative_tolerance'] =5E-3
solver.parameters['absolute_tolerance'] = 5E-5
solver.parameters["error_on_nonconvergence"] = False

solver.solve(up1.vector(), b_u1)
assign(up0, up1)

dokken · July 20, 2020, 9:10am

Please format your code by encapsulating it with ```.

Also, please remove any part of the code that is not required for the code to Get to a hanging state. For instance, try removing the matrix assembly and the boundary conditions, as well as trying to simplify the form.

Do you end up with the same issue if you use this mesh on a standard passion problem?

Ricky · July 20, 2020, 9:39am

Hope this is what you preferred. It works fine with single or two processors but end up with the issue when I use four processors

from future import print_function
from fenics import *
from mshr import *
import numpy as np
import pdb
import gc
from sys import getsizeof
from numpy import array

Tf = 0.0025/0.87 # final time
num_steps = 6000 # number of time steps
dt1 = Tf / num_steps # time step size
dt1 =0.1E-6
mu= 0.00392 # dynamic viscosity
rho= 3750. # density

mesh = BoxMesh(Point(0.0,0.0,0.0), Point(0.0003,0.0003,0.0003),40,40,40)
V = VectorElement(“CG”, mesh.ufl_cell(), 1)
Q = FiniteElement(“CG”, mesh.ufl_cell(), 1)
W=FunctionSpace(mesh, V* Q)

Define trial and test functions

up0 = Function(W)
u0,p0=up0.split(deepcopy=True)
u1 = as_vector((up0[0], up0[1], up0[2]))
p1 = up0[3]
up1 = Function(W)
up2 = Function(W)
upf = Function(W)
up_bc = Function(W)
up = TrialFunction(W)
u = as_vector((up[0],up[1], up[2]))
p = up[3]
vq = TestFunction(W)
v = as_vector((vq[0],vq[1], vq[2]))
q = vq[3]

tor1=1.e-10
up0.interpolate(Constant([tor1,tor1,tor1,tor1]))
boundary_parts = MeshFunction(“size_t”, mesh, mesh.topology().dim()-1)
tor=0.00001
top = AutoSubDomain(lambda x,on_bnd: x[2]>=0.0003-tor and on_bnd)
top.mark(boundary_parts, 1)
ds1 = Measure(“ds”, subdomain_id=1,subdomain_data=boundary_parts)
dt = Constant(dt1)
T = FunctionSpace(mesh, ‘CG’, 1)
T_n = Function(T)
h = mesh.hmax()
def dtang_T(T_n,T):

dtang = Expression(('Tx', 'Ty','Tz'),Tx=project(T_n.dx(0),T,solver_type='cg', preconditioner_type='hypre_amg'), \
        Ty=project(T_n.dx(1),T,solver_type='cg', preconditioner_type='hypre_amg'),Tz=Function(T),degree=2)
return dtang

Res = rho/dt*(u-u0) + rhodot(u0,grad(u)) - (mu)div(grad(u)+grad(u).T) + grad§
tau_supg = Constant(((2.0/dt1)**2 + (2.0sqrt(np.dot(u0.vector().get_local(),u0.vector().get_local()))/h)*2 + 9(4.0mu/rho/(h**2))2)(-0.5))

F_LS = (tau_supginner(Res,dot(u0, grad(v))-mudiv((grad(v)+grad(v).T))))*dx

F1 = rho*dot((u - u0) / dt, v)dx +
rhoinner(v, dot(grad(u), u0))*dx
+ (mu)*inner(grad(v), grad(u)+grad(u).T)*dx
- inner(div(v), p)*dx -inner(q, div(u))*dx- dot(Constant((-0.00008))*dtang_T(T_n,T),v)*ds1
+F_LS
a_u1 = lhs(F1)
L_u1 = rhs(F1)
A_u1=assemble(a_u1)
b_u1=assemble(L_u1)

dokken · July 20, 2020, 9:57am

Way better. Stille please encapsulate your code such that it gets formatted.
Example :

a="s"

Is achieved by using ``` before and after the code

Ricky · July 20, 2020, 10:21am

from __future__ import print_function
from fenics import *
from mshr import *
import numpy as np
import pdb
import gc
from sys import getsizeof
from numpy import array


Tf = 0.0025/0.87           # final time
num_steps = 6000    # number of time steps
dt1 = Tf / num_steps # time step size
dt1 =0.1E-6
mu= 0.00392             # dynamic viscosity
rho= 3750.            # density
mesh = BoxMesh(Point(0.0,0.0,0.0), Point(0.0003,0.0003,0.0003),40,40,40)
V = VectorElement("CG", mesh.ufl_cell(), 1)
Q  = FiniteElement("CG", mesh.ufl_cell(), 1)
W=FunctionSpace(mesh, V* Q)

# Define trial and test functions
up0 = Function(W)
u0,p0=up0.split(deepcopy=True)
u1 = as_vector((up0[0], up0[1], up0[2]))
p1  = up0[3]
up1 = Function(W)
up2 = Function(W)
upf = Function(W)
up_bc = Function(W)
up  = TrialFunction(W)
u   = as_vector((up[0],up[1], up[2]))
p   = up[3]
vq  = TestFunction(W)
v   = as_vector((vq[0],vq[1], vq[2]))
q   = vq[3]

tor1=1.e-10
up0.interpolate(Constant([tor1,tor1,tor1,tor1]))
boundary_parts = MeshFunction("size_t", mesh, mesh.topology().dim()-1)
tor=0.00001
top = AutoSubDomain(lambda x,on_bnd: x[2]>=0.0003-tor  and on_bnd)
top.mark(boundary_parts, 1)
ds1 = Measure("ds", subdomain_id=1,subdomain_data=boundary_parts)
dt   = Constant(dt1)
T = FunctionSpace(mesh, 'CG', 1)
T_n = Function(T)
h = mesh.hmax()
def dtang_T(T_n,T):

    dtang = Expression(('Tx', 'Ty','Tz'),Tx=project(T_n.dx(0),T,solver_type='cg', preconditioner_type='hypre_amg'), \
            Ty=project(T_n.dx(1),T,solver_type='cg', preconditioner_type='hypre_amg'),Tz=Function(T),degree=2)
    return dtang

Res = rho/dt*(u-u0) + rho*dot(u0,grad(u)) - (mu)*div(grad(u)+grad(u).T) + grad(p) 
tau_supg = Constant(((2.0/dt1)**2 + (2.0*sqrt(np.dot(u0.vector().get_local(),u0.vector().get_local()))/h)**2 + 9*(4.0*mu/rho/(h**2))**2)**(-0.5))

F_LS = (tau_supg*inner(Res,dot(u0, grad(v))-mu*div((grad(v)+grad(v).T))))*dx

F1 = rho*dot((u - u0) / dt, v)*dx + \
     rho*inner(v, dot(grad(u), u0))*dx \
     + (mu)*inner(grad(v), grad(u)+grad(u).T)*dx \
     - inner(div(v), p)*dx -inner(q, div(u))*dx- dot(Constant((-0.00008))*dtang_T(T_n,T),v)*ds1 \
     +F_LS      
a_u1 = lhs(F1)
L_u1 = rhs(F1)
A_u1=assemble(a_u1)
b_u1=assemble(L_u1)

dokken · July 21, 2020, 8:06pm

The problem is

Ricky:

def dtang_T(T_n,T):

    dtang = Expression(('Tx', 'Ty','Tz'),Tx=project(T_n.dx(0),T,solver_type='cg', preconditioner_type='hypre_amg'), \
            Ty=project(T_n.dx(1),T,solver_type='cg', preconditioner_type='hypre_amg'),Tz=Function(T),degree=2)
    return dtang

If you change this to:

dtang_T = as_vector((T_n.dx(0), T_n.dx(1), Function(T)))

Your code runs nicely.

Ricky · July 22, 2020, 4:36am

Thank you so much! it works

Topic		Replies	Views
Multiple cpus only for solving part	3	469	April 21, 2021
Nonlinear poisson as parallelisation example dolfinx mpi	18	136	January 23, 2025
Using MPI with FEniCS to distribute computational load; not to spawn multiple instances of same code	7	1697	September 20, 2021
Parallel Processing_error	5	241	June 18, 2022
Running dolfinx on parallel processors dolfinx	6	1152	April 20, 2022

Parallel computing with mpirun

Create mesh and define function spaces

Define boundaries

Define boundary conditions

Define trial and test functions

Define functions for solutions at previous and current time steps

Define strain-rate tensor

Define stress tensor

Time-stepping

Define trial and test functions

Related topics