Hello there,
I have a script that solves the 3D fluid flow over a sphere in a rectangular channel. This script is basically adapted from one of the tutorials. I split the script into two files, one part that I run in serial to create the mesh, subdomains and boundaries,
python3 laminar_flow_mwe_serial.py
and another that is supposed to run in parallel to actually solve the problem.
# WORKS
python3 laminar_flow_mwe_parallel.py
# WORKS!
mpirun -np 1 python3 laminar_flow_mwe_parallel.py
# DOES NOT WORK!
mpirun -np 2 python3 laminar_flow_mwe_parallel.py
The issue I have is that, whenever I try to use more than 1 process to run the script, it freezes. to be specific, the processes are created, they are using about 100% of CPU each, and in this case each uses about 5% of my memory. But nothing else happens, this can be running forever and does not complete more than one iteration. I know that it is stuck after the first iteration because I can see that the XDMF files with the results are created, but then they do not grow in size, which should happen after each time step’s solution is added to the file (this happens when run in serial). Based on what I saw, it seems to iterate forever in the first step of the time loop (tentative velocity step).
Trying to find what the issue is, I have tested many solver/pre-conditioner combinations. None of them work in parallel, some of them work in serial (problem diverges with some solvers, I think).
I have tested this in the current docker images (both the 2019 stable
and the most recent dev
image), and also in an anaconda
installation of fenics. I also tested the 2018 version, with the same results.
Here are the two files (inserted inline b/c did not find a way to add attachments):
laminar_flow_mwe_serial.py
- Creates the mesh, subdomains and boundaries.
"""
Channel with sphere - Part 1 - Serial Code
Code to generate rectangular channel with a sphere in cross-flow. This will be used to develop and test Navier-Stokes equations.
It also creates a spherical subdomain downstream of the sphere cut-out. This will be used to test surface and volume integrals.
Part 1 - Generatese and saves the mesh, boundaries and subdomains into H5 files.
"""
from __future__ import print_function
# Environment setup
import os
# This may work to speed up mesh generation and/or matrix assembly
# DO NOT COMBINE WITH MPI
os.putenv("OMP_NUM_THREADS","4")
os.putenv("OPENBLAS_NUM_THREADS","4")
# FEniCS imports
from fenics import *
from mshr import *
import numpy as np
from dolfin import *
import matplotlib.pyplot as plt
mpi_comm = MPI.comm_world
mpi_rank = mpi_comm.Get_rank()
# Basic simulation parameters (m, kg, s)
D = 0.01 # diameter of the sphere
radius=0.5*D # radius
D_perturb = 0.005 # perturbation to create asymmetric mesh
#
n_segments = 50 # mesh segments at the surface of the sphere
n_cells_per_dir = 64 # mesh resolution for generate_mesh()
# Create mesh
box_size = np.array([12*D, 4*D, 4*D])
center = np.array([4*D,2*D+D_perturb,2*D])
channel = Box(Point(0.0, 0.0, 0.0), Point(box_size))
sphere = Sphere(Point(center), radius=D/2, segments=n_segments)
domain = channel - sphere
#
mesh = generate_mesh(domain, n_cells_per_dir)
# Define boundaries
def inflow_fun(x,xlim=0.0):
return near(x[0],xlim)
def outflow_fun(x,xlim=box_size[0]):
return near(x[0],xlim)
def walls_fun(x,xlim=box_size):
return (
near(x[1],0.0) or
near(x[1],xlim[1]) or
near(x[2],0.0) or
near(x[2],xlim[2])
)
def sphere_fun(x,center=center,radius = radius):
return (
x[0] >= center[0] - radius and
x[0] <= center[0] + radius and
x[1] >= center[1] - radius and
x[1] <= center[1] + radius and
x[2] >= center[2] - radius and
x[2] <= center[2] + radius
)
# This is here so we can use it when we need to define an additional subdomain
dummy_position = 0.5 * box_size
# placed 2*D downstream of the sphere
dummy_position[0] = center[0] + 4 * radius
#
def dummy_subdomain_fun(x,center=dummy_position,radius=radius):
# start at the center of the box
# subdomain is a sphere
return (np.sqrt(np.sum(np.square(x-center))) <= radius)
class Inflow(SubDomain):
def inside(self, x, on_boundary):
return inflow_fun(x) and on_boundary
class Outflow(SubDomain):
def inside(self, x, on_boundary):
return outflow_fun(x) and on_boundary
class Walls(SubDomain):
def inside(self, x, on_boundary):
return walls_fun(x) and on_boundary
class Sphere(SubDomain):
def inside(self, x, on_boundary):
return sphere_fun(x) and on_boundary
class DummySubdomain(SubDomain):
def inside(self, x, on_boundary):
return dummy_subdomain_fun(x)
boundaries = MeshFunction('size_t', mesh, mesh.topology().dim()-1)
subdomains = MeshFunction('size_t', mesh, mesh.topology().dim())
inflow_bnd = Inflow()
outflow_bnd = Outflow()
walls_bnd = Walls()
sphere_bnd = Sphere()
dummy_subdomain = DummySubdomain()
#
# marking boundaries
boundaries.set_all(0)
inflow_bnd.mark(boundaries, 1)
outflow_bnd.mark(boundaries, 2)
walls_bnd.mark(boundaries, 3)
# the sphere could have gotten the same marker b/c they are both no-slip
sphere_bnd.mark(boundaries, 4)
# mark subdomains
subdomains.set_all(0)
dummy_subdomain.mark(subdomains, 1)
# save mesh and boundaries
# File names
basename = "channel_sphere/channel_sphere"
mesh_filename = basename + "_mesh.xdmf"
subdomains_filename = basename + "_subdomains.xdmf"
boundaries_filename = basename + "_boundaries.xdmf"
## Use XDMF files
# # write the mesh
file_mesh = XDMFFile(mpi_comm, mesh_filename)
file_mesh .write(mesh)
# write subdomains
file_subdomains = XDMFFile(mpi_comm, subdomains_filename)
file_subdomains.write(subdomains)
# write boundaries
file_boundaries = XDMFFile(mpi_comm, boundaries_filename)
file_boundaries.write(boundaries)
# # Extra, for visualization with ParaView (though XDMF can also be used here)
# File("channel_sphere/channel_sphere.pvd") << mesh
# File("channel_sphere/boundaries.pvd") << boundaries
# File("channel_sphere/subdomains.pvd") << subdomains
laminar_flow_mwe_parallel.py
- Solves 3D, transient, laminar Navier-Stokes
"""
Channel with sphere - Part 2 - Parallel Code
Code to generate rectangular channel with a sphere in cross-flow. This will be used to develop and test Navier-Stokes equations.
It also creates a spherical subdomain downstream of the sphere cut-out. This will be used to test surface and volume integrals later on.
Part 2 - Loads mesh, boundaries and subdomains from files, it then applies boundary conditions, assembles and solves.
"""
from __future__ import print_function
# Environment setup
import os
# FEniCS imports
from fenics import *
from mshr import *
import numpy as np
from dolfin import *
import matplotlib.pyplot as plt
# Define communicator and rank for this process, in case we are running with MPI
mpi_comm = MPI.comm_world
mpi_rank = mpi_comm.Get_rank()
# These are used for set_log_level() in DOLFIN
CRITICAL = 50 # errors that may lead to data corruption and such
ERROR = 40 # things that go boom
WARNING = 30 # things that may go boom later
INFO = 20 # information of general interest
PROGRESS = 16 # what's happening (broadly)
TRACE = 13 # what's happening (in detail)
DBG = 10 # sundry
#
set_log_level(INFO)
parameters["std_out_all_processes"] = True
#parameters['ghost_mode'] = 'none' #'shared_facet'
parameters["form_compiler"]["cpp_optimize"] = True
parameters["allow_extrapolation"] = True
#parameters["num_threads"] = 8
PETScOptions.set('pc_type', 'asm')
#PETScOptions.set('sub_pc_type', 'hypre_amg')
PETScOptions.set('sub_pc_type', 'ilu')
#PETScOptions.set('sub_pc_type', 'lu')
#PETScOptions.set('pc_asm_overlap', '100')
# Basic simulation parameters (m, kg, s)
T = 0.01 # final time
#mu = 8.9E-4 # dynamic viscosity of water
mu = 18.37E-6 # dynamic viscosity of air
rho = 1.0 # density (FENICS tutorial had density of 1)
D = 0.01 # diameter of the sphere
radius = 0.5*D # radius
D_perturb = 0.005 # perturbation to create asymmetric mesh
Re = 625 # Reynolds number based on channel height/width
Vmax = mu * Re / (rho * (4*D)) # Inflow velocity
#
n_segments = 50 # mesh segments at the surface of the sphere
n_cells_per_dir = 64 # mesh resolution for generate_mesh()
# Load mesh and mesh function for subdomains#
# file names
basename = "channel_sphere/channel_sphere"
mesh_filename = basename + "_mesh.xdmf"
subdomains_filename = basename + "_subdomains.xdmf"
boundaries_filename = basename + "_boundaries.xdmf"
#
# read the mesh
mesh = Mesh(mpi_comm)
with XDMFFile(mpi_comm, mesh_filename) as xdmf_infile:
xdmf_infile.read(mesh)
# read subdomains
# Define a mesh value collection of dim = 3, for the cell data
mvc_internal = MeshValueCollection("size_t", mesh=mesh, dim=3)
# read subdomain data into the mesh value collection
with XDMFFile(mpi_comm, subdomains_filename) as xdmf_infile:
xdmf_infile.read(mvc_internal)
# create a mesh function from the mesh value collection
subdomains = MeshFunction("size_t", mesh, mvc_internal)
# read boundaries
# Define a mesh value collection of dim = 2, for the facet data (boundaries)
mvc_boundaries = MeshValueCollection("size_t", mesh=mesh, dim=2)
with XDMFFile(mpi_comm, boundaries_filename) as xdmf_infile:
xdmf_infile.read(mvc_boundaries)
# create a mesh function from the mesh value collection
boundaries = MeshFunction("size_t", mesh, mvc_boundaries)
# Calculate dt as per CFL (with CFLmax = 0.5)
deltaX = mesh.hmin() # minimum mesh size
dt = 0.5*deltaX**2/mu
dt = 1E-2*dt # Just for testing
# number of time steps to reach target final time
num_steps = int(T/dt) + 1 # number of time steps
### SUBDOMAINS
# 0: Fluid
# 1: Spherical region downstream of the actual sphere
### BOUNDARIES
# 1: Inflow
# 2: Outflow
# 3: Walls
# 4: Sphere surfaces
# Define function spaces
V = VectorFunctionSpace(mesh, 'P', 2)
Q = FunctionSpace(mesh, 'P', 1)
# Define inflow profile
inflow_profile = (\
'{}*(1.0 - pow((x[2]-{})/({}),2))*(1.0 - pow((x[1]-{})/({}),2))'.format(\
Vmax, 2*D, 2*D, 2*D, 2*D), '0', '0')
# Define boundary conditions USING SUBDOMAINS FROM MESH FUNCTION
bcu_inflow = DirichletBC(V, Expression(inflow_profile, degree=2), boundaries, 1)
bcu_walls = DirichletBC(V, Constant((0, 0, 0)), boundaries, 3)
bcu_sphere = DirichletBC(V, Constant((0, 0, 0)), boundaries, 4)
bcp_outflow = DirichletBC(Q, Constant(0), boundaries, 2)
bcu = [bcu_inflow, bcu_walls, bcu_sphere]
bcp = [bcp_outflow]
# Define trial and test functions
u = TrialFunction(V)
v = TestFunction(V)
p = TrialFunction(Q)
q = TestFunction(Q)
# Define functions for solutions at previous and current time steps
u_n = Function(V)
u_ = Function(V)
p_n = Function(Q)
p_ = Function(Q)
# Define expressions used in variational forms
U = 0.5*(u_n + u) # looks like Crank-Nicholson
n = FacetNormal(mesh)
f = Constant((0, 0, 0))
k = Constant(dt)
mu = Constant(mu)
rho = Constant(rho)
# Define symmetric gradient
def epsilon(u):
return sym(nabla_grad(u))
# Define stress tensor
def sigma(u, p):
return 2*mu*epsilon(u) - p*Identity(len(u))
dx = Measure('dx', domain=mesh, subdomain_data=subdomains)
ds = Measure('ds', domain=mesh, subdomain_data=boundaries)
# dS = Measure('dS', domain=mesh, subdomain_data=subdomains)
# Define variational problem for step 1
F1 = rho*dot((u - u_n) / k, v)*dx \
+ rho*dot(dot(u_n, nabla_grad(u_n)), v)*dx \
+ inner(sigma(U, p_n), epsilon(v))*dx \
+ dot(p_n*n, v)*ds - dot(mu*nabla_grad(U)*n, v)*ds \
- dot(f, v)*dx
a1 = lhs(F1)
L1 = rhs(F1)
# Define variational problem for step 2
a2 = dot(nabla_grad(p), nabla_grad(q))*dx
L2 = dot(nabla_grad(p_n), nabla_grad(q))*dx - (1/k)*div(u_)*q*dx
# Define variational problem for step 3
a3 = dot(u, v)*dx
L3 = dot(u_, v)*dx - k*dot(nabla_grad(p_ - p_n), v)*dx
# Assemble matrices
A1 = assemble(a1)
A2 = assemble(a2)
A3 = assemble(a3)
# Apply boundary conditions to matrices
[bc.apply(A1) for bc in bcu]
[bc.apply(A2) for bc in bcp]
# Create XDMF files for visualization output
xdmffile_u = XDMFFile(mpi_comm, 'results/velocity.xdmf')
xdmffile_p = XDMFFile(mpi_comm, 'results/pressure.xdmf')
# Time-stepping
t = 0
for n in range(num_steps):
# Update current time
t += dt
if mpi_rank == 0:
print('**********\nTime: {}/{}\n'.format(t,T))
## Create solver??? Change parameters of existing default solver???
# this would be the place to do it
# Step 1: Tentative velocity step
# print("** Solving tentative velocity step...\n" )
if mpi_rank == 0:
print("** Solving tentative velocity step...\n" )
#
b1 = assemble(L1)
[bc.apply(b1) for bc in bcu]
#solve(A1, u_.vector(), b1) # fails b/c out of memory
#solve(A1, u_.vector(), b1, 'bicgstab', 'petsc_amg')
#parallel
#solve(A1, u_.vector(), b1, 'gmres', 'jacobi')
#solve(A1, u_.vector(), b1, 'gmres', 'default')
#solve(A1, u_.vector(), b1, 'gmres', 'petsc_amg')
#solve(A1, u_.vector(), b1, 'bicgstab', 'hypre_amg')
# solve(A1, u_.vector(), b1, 'superlu') # doesn't work in serial, crazy memory use
#solve(A1, u_.vector(), b1, 'bicgstab', 'default')
solve(A1, u_.vector(), b1, 'gmres')
#solve(A1, u_.vector(), b1, 'gmres', 'jacobi')
#
if mpi_rank == 0:
print("** Done!\n" )
# Step 2: Pressure correction step
# print("** Solving pressure correction step...\n" )
if mpi_rank == 0:
print("** Solving pressure correction step...\n" )
#
b2 = assemble(L2)
[bc.apply(b2) for bc in bcp]
#solve(A2, p_.vector(), b2, 'bicgstab', 'hypre_euclid')
#solve(A2, p_.vector(), b2, 'bicgstab', 'hypre_amg')
#solve(A2, p_.vector(), b2)
#solve(A1, u_.vector(), b1, 'bicgstab', 'petsc_amg')
#solve(A2, p_.vector(), b2, 'gmres', prec)
#parallel
solve(A2, p_.vector(), b2, 'gmres', 'hypre_amg')
#solve(A2, p_.vector(), b2, 'minres', 'hypre_amg')
#
if mpi_rank == 0:
print("** Done!\n" )
# print("** Done!\n" )
# Step 3: Velocity correction step
# print("** Solving velocity correction step...\n" )
if mpi_rank == 0:
print("** Solving velocity correction step...\n" )
#
b3 = assemble(L3)
#solve(A3, u_.vector(), b3, 'cg', 'sor')
#solve(A3, u_.vector(), b3, 'bicgstab', 'sor')
#solve(A3, u_.vector(), b3, 'gmres', 'default')
#parallel
# solve(A3, u_.vector(), b3, 'bicgstab', 'jacobi')
#solve(A3, u_.vector(), b3, 'gmres', 'default')
# solve(A3, u_.vector(), b1, 'gmres', 'petsc_amg')
#solve(A3, u_.vector(), b3, 'mumps', 'ilu')
solve(A3, u_.vector(), b3, 'gmres', 'jacobi')
#
if mpi_rank == 0:
print("** Done!\n" )
xdmffile_u.write(u_, t)
xdmffile_p.write(p_, t)
# Update previous solution
if mpi_rank == 0:
print("** Updating solution for next iteration...\n" )
#
u_n.assign(u_)
p_n.assign(p_)
#
if mpi_rank == 0:
print("** Done!\n" )
if mpi_rank == 0:
print('**********Time step {}/{} completed.\t Time: {}/{} \t u max: {}\n'.format(n+1, num_steps, t, T, u_.vector().max()))
Any ideas about what the problem might be? Any help is appreciated, this is driving me crazy.