I am trying to implement a large eddy simulation with the Smagorinsky turbulence model. Nevertheless, I encounter that my simulation is much slower than the one from the Navier-Stokes tutorial from Dokken, which I am using to validate my implementation. The results are almost identical (see images below) but Dokken’s code runs very fast and mine is incredibly slow.
For 200 iterations using 8 MPI threads, Dokken’s code performs at an average of ~49 iter/s.
With the same number of threads and iterations, mine starts at ~1.5 iter/s, and keeps getting slower until reaching about 0.15 iter/s at iteration 200 (6 seconds per iteration ).
The only explanation I can think of is that the calculation of |S| is very expensive, but it doesen’t seem argument enough to get a simulation 300 times slower.
The details of the model:
- Equations
where |S| = (2S_{ij}S_{ij})^{1/2} and \Delta is the mesh size.
- Code (the mesh generation is the same as in the tutorial with the exception of some modifications to L, H). Sorry for the comments in Spanish, they are not too meaningfull
import gmsh
import os
import numpy as np
import tqdm.autonotebook
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.fem import (Constant, Function, FunctionSpace,
assemble_scalar, dirichletbc, form, locate_dofs_topological, set_bc)
from dolfinx.fem.petsc import (apply_lifting, assemble_matrix, assemble_vector,
create_vector, create_matrix, set_bc)
from dolfinx.io import (VTXWriter, distribute_entity_data, gmshio)
from ufl import (FacetNormal, FiniteElement, Identity, Measure, TestFunction, TrialFunction, VectorElement,
as_vector, div, dot, ds, dx, inner, lhs, grad, nabla_grad, rhs, sym)
from ufl import sqrt, sin
# =============================================================================
# PARÁMETROS Y DOMINIO DE LA SIMULACIÓN
# =============================================================================
# Parámetros espaciales
L = 2.0
H = 0.5
c_x = c_y = 0.25
r = 0.05
gdim = 2
fdim = gdim - 1
# Mesh
mesh, _, ft = gmshio.read_from_msh("cyllinder.msh", MPI.COMM_WORLD, rank=0, gdim=gdim)
inlet_marker, outlet_marker, wall_marker, obstacle_marker = 2, 3, 4, 5
# Parámetros temporales y físicos
t = 0.0
T = 2.0
num_steps = 2000
dt = T / num_steps
dt = Constant(mesh, PETSc.ScalarType(dt))
mu = Constant(mesh, PETSc.ScalarType(1.0e-4)) # Viscosidad dinámica (cambiar)
rho = Constant(mesh, PETSc.ScalarType(1.0)) # Densidad (cambiar)
nu = Constant(mesh, PETSc.ScalarType(mu/rho)) # Viscosidad cinemática
f = Constant(mesh, PETSc.ScalarType((0, 0))) # Body forces
resolution = PETSc.ScalarType(r)
Cs = PETSc.ScalarType(0.035) # Constante de Smagorinsky
# =============================================================================
# ESPACIOS DE FUNCIONES Y FUNCIONES: ELEMENTOS TAYLOR-HOOD
# =============================================================================
vector_element = VectorElement("CG", mesh.ufl_cell(), 2)
scalar_element = FiniteElement("CG", mesh.ufl_cell(), 1)
V = FunctionSpace(mesh, vector_element)
Q = FunctionSpace(mesh, scalar_element)
# =============================================================================
# CONDICIONES DE CONTORNO
# =============================================================================
class InletVelocity():
def __init__(self, t):
self.t = t
self.tol = 1e-4
self.alpha_atm = 0.3
def __call__(self, x):
values = np.zeros((gdim, x.shape[1]),dtype=PETSc.ScalarType)
values[0] = 4 * 1.5 * sin(self.t * np.pi/8) * x[1] * (H - x[1])/H**2
# values[0] = 4.0 * ((x[1] + self.tol) / H)**self.alpha_atm
return values
# # # Condiciones de contorno de la velocidad
# Inlet
u_inlet = Function(V)
inlet_velocity = InletVelocity(t)
u_inlet.interpolate(inlet_velocity)
bcu_inflow = dirichletbc(u_inlet, locate_dofs_topological(V, fdim, ft.find(inlet_marker)))
# Walls
u_nonslip = np.array((0,) * mesh.geometry.dim, dtype=PETSc.ScalarType)
bcu_walls = dirichletbc(u_nonslip, locate_dofs_topological(V, fdim, ft.find(wall_marker)), V)
# Obstacle
bcu_obstacle = dirichletbc(u_nonslip, locate_dofs_topological(V, fdim, ft.find(obstacle_marker)), V)
bcu = [bcu_inflow, bcu_obstacle, bcu_walls]
# # # Condiciones de contorno de la presión
# Outlet
bcp_outlet = dirichletbc(PETSc.ScalarType(0.0), locate_dofs_topological(Q, fdim, ft.find(outlet_marker)), Q)
bcp = [bcp_outlet]
# =============================================================================
# DEFINICIÓN DE FUNCIONES Y PARÁMETROS
# =============================================================================
def Sij(w):
"""
# Strain-rate tensor
"""
return sym(nabla_grad(w))
def nu_T(w):
"""
Viscosidad turbulenta
"""
return resolution**2 * Cs**2 * sqrt(2.0 * inner(Sij(w), Sij(w)))
# =============================================================================
# FORMULACIÓN VARIACIONAL
# =============================================================================
# Funciones para la velocidad
u_tent = TrialFunction(V)
v = TestFunction(V)
u_ = Function(V) # u_tent disponible como función
u_.name = "u"
u_n = Function(V) # Función para almacenar el timestep anterior
# Funciones para la presión
p = TrialFunction(Q)
q = TestFunction(Q)
p_ = Function(Q) # p disponible como función
# # # PASO 1: VELOCIDAD APROXIMADA (NO INCOMPRESIBLE)
F1 = dot(u_tent - u_n, v) * dx + \
dt * inner(dot(u_n, nabla_grad(u_n)), v) * dx + \
dt * (nu + nu_T(u_n)) * inner(grad(u_tent), grad(v)) * dx #+ \
# dt * inner(Sij(u_tent), Sij(u_tent)) * inner(grad(u_tent), grad(v))
# 2.0 * dt * nu_T(u_n) * inner(Sij(u_tent), Sij(v)) * dx
a1 = form(lhs(F1))
L1 = form(rhs(F1))
A1 = create_matrix(a1)
b1 = create_vector(L1)
# # # PASO 2: CÁLCULO DE LA PRESIÓN
a2 = form(dot(grad(p), grad(q)) * dx)
L2 = form(-rho/dt * dot(div(u_), q) * dx)
A2 = assemble_matrix(a2, bcs=bcp)
A2.assemble()
b2 = create_vector(L2)
# PASO 3: INCOMPRESIBILIDAD DE LA VELOCIDAD
a3 = form(dot(u_tent, v) * dx)
L3 = form(dot(u_, v) * dx - dt/rho * dot(grad(p_), v) * dx)
A3 = assemble_matrix(a3)
A3.assemble()
b3 = create_vector(L3)
# =============================================================================
# SOLVERS
# =============================================================================
# Solver for step 1
solver1 = PETSc.KSP().create(mesh.comm)
solver1.setOperators(A1)
solver1.setType(PETSc.KSP.Type.BCGS)
pc1 = solver1.getPC()
pc1.setType(PETSc.PC.Type.JACOBI)
# Solver for step 2
solver2 = PETSc.KSP().create(mesh.comm)
solver2.setOperators(A2)
solver2.setType(PETSc.KSP.Type.MINRES)
pc2 = solver2.getPC()
pc2.setType(PETSc.PC.Type.HYPRE)
pc2.setHYPREType("boomeramg")
# Solver for step 3
solver3 = PETSc.KSP().create(mesh.comm)
solver3.setOperators(A3)
solver3.setType(PETSc.KSP.Type.CG)
pc3 = solver3.getPC()
pc3.setType(PETSc.PC.Type.SOR)
# # # Archivos de salida
vtx_u = VTXWriter(mesh.comm, "./outputs/u-les.bp", [u_])
vtx_p = VTXWriter(mesh.comm, "./outputs/p-lens.bp", [p_])
vtx_u.write(t)
vtx_p.write(t)
progress = tqdm.tqdm(desc="Solving PDE", total=num_steps)
for i in range(num_steps):
progress.update(1)
t += dt
inlet_velocity.t = t
u_inlet.interpolate(inlet_velocity)
# PASO 1
A1.zeroEntries()
assemble_matrix(A1, a1, bcs=bcu)
A1.assemble()
with b1.localForm() as loc:
loc.set(0)
assemble_vector(b1, L1)
apply_lifting(b1, [a1], [bcu])
b1.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
set_bc(b1, bcu)
solver1.solve(b1, u_.vector)
u_.x.scatter_forward()
# PASO 2
with b2.localForm() as loc:
loc.set(0)
assemble_vector(b2, L2)
apply_lifting(b2, [a2], [bcp])
b2.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
set_bc(b2, bcp)
solver2.solve(b2, p_.vector)
p_.x.scatter_forward()
# PASO 3
with b3.localForm() as loc:
loc.set(0)
assemble_vector(b3, L3)
b3.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, mode=PETSc.ScatterMode.REVERSE)
solver3.solve(b3, u_n.vector)
u_n.x.scatter_forward()
# # # ESCRIBIR EN ARCHIVO
vtx_u.write(t)
vtx_p.write(t)
vtx_u.close()
vtx_p.close()
- Results: in the left window is the solution obtained with direct numerical simulation (aka Dokken’s wonderful tutorial) and in the right one is the output of my slow ass simulation.