Different results in serial and parallel run dolfinx

fforteneto · October 21, 2020, 2:28pm

Hi,

I’m trying to simulate the transport of a quantity n flowing through a channel. I have successfully solved the problem in serial. When
I execute the same code in parallel, I obtained the same results as in a serial run for \mathrm{np} \leq 21, for \mathrm{np} \gt 21 the results for n present some strange behavior close to the inlet. Is that an issue with the DoFs ordering?

python3 stokes.py

mpirun -np 24 python3 stokes.py

stokes.py

import os
os.environ["OMP_NUM_THREADS"] = "1"
import datetime
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
from dolfinx.io import XDMFFile
import dolfinx
from dolfinx.cpp.mesh import CellType
from dolfinx import (RectangleMesh, DirichletBC, Function, FunctionSpace, NewtonSolver, NonlinearProblem, Constant, Form, log)
from dolfinx.fem import (locate_dofs_topological, assemble_matrix, assemble_vector, set_bc, apply_lifting)
from dolfinx.mesh import locate_entities_boundary
from ufl import (Identity, VectorElement, FiniteElement, MixedElement, TestFunction, TrialFunction,
                 derivative, dx, grad, div, dot, inner, sym, split)
from math import sqrt

log.set_log_level(log.LogLevel.INFO)

# Parameters
U     = 0.1
beta  = 1e-3
delta = 1e-3 

# Mesh
mesh = RectangleMesh(
    MPI.COMM_WORLD,
    [np.array([0, -0.5, 0]), np.array([10, 0.5, 0])], [200, 20],
    CellType.quadrilateral, dolfinx.cpp.mesh.GhostMode.none)

def inlet_boundary(x):
    return np.isclose(x[0], 0.0)

def outlet_boundary(x):
    return np.isclose(x[0], 10.0)

def wall_boundary(x):
    return np.logical_or(np.isclose(x[1], -0.5), np.isclose(x[1], 0.5))

# Define Function Spaces
V = VectorElement("Lagrange", mesh.ufl_cell(), 2)
Q = FiniteElement("Lagrange", mesh.ufl_cell(), 1)
N = FiniteElement("Lagrange", mesh.ufl_cell(), 1)
W = FunctionSpace(mesh, MixedElement([V, Q, N]))

# Boundary conditions
inlet_facets = locate_entities_boundary(mesh, 1, inlet_boundary)
outlet_facets = locate_entities_boundary(mesh, 1, outlet_boundary)
wall_facets = locate_entities_boundary(mesh, 1, wall_boundary)

def u_bc_inlet(x):
    return np.stack((U*np.ones(x.shape[1]), np.zeros(x.shape[1])))

def u_bc_wall(x):
    return np.stack((np.zeros(x.shape[1]), np.zeros(x.shape[1])))
	
u_bc1 = Function(W.sub(0).collapse())
u_bc1.interpolate(u_bc_inlet)
dofs1 = locate_dofs_topological((W.sub(0), FunctionSpace(mesh, V)), 1, inlet_facets)
bc1 = DirichletBC(u_bc1, dofs1, W.sub(0))

u_bc2 = Function(W.sub(0).collapse())
u_bc2.vector.set(0.0)
dofs2 = locate_dofs_topological((W.sub(0).sub(1), FunctionSpace(mesh, V).sub(1)), 1, outlet_facets)
bc2 = DirichletBC(u_bc2, dofs2, W.sub(0).sub(1))

u_bc3 = Function(W.sub(0).collapse())
u_bc3.interpolate(u_bc_wall)
dofs3 = locate_dofs_topological((W.sub(0), FunctionSpace(mesh, V)), 1, wall_facets)
bc3 = DirichletBC(u_bc3, dofs3, W.sub(0))

n_bc = Function(W.sub(2).collapse())
n_bc.vector.set(1.0)
dofs4 = locate_dofs_topological((W.sub(2), FunctionSpace(mesh, N)), 1, inlet_facets)
bc4 = DirichletBC(n_bc, dofs4, W.sub(2))

bcs = [bc1, bc2, bc3, bc4]

# Define test and trial functions
w_trial = TrialFunction(W)
w_test = TestFunction(W)
v, q, c = split(w_test)

# Define function
w = Function(W)
u, p, n = split(w)

w0 = Function(W)
u0, p0, n0 = split(w0)

dw = Function(W)

# Initial conditions
def w_init(x):
    values = np.zeros((4, x.shape[1]))
    values[0] = 0.0
    values[1] = 0.0
    values[2] = 0.0
    values[3] = 1.0
    return values

w.interpolate(w_init)

# Strain rate tensor
def D(u):
    return sym(grad(u))

# Identity tensor
I = Identity(len(u))

# Total stress
T = -p*I + 2*beta*D(u)

# Body force
f = Constant(mesh, (0, 0))

# Stokes - Equation
F = inner(grad(v), T)*dx - inner(v, f)*dx + q*div(u)*dx

F += c*dot(grad(n), u)*dx + delta*inner(grad(c), grad(n))*dx + c*Constant(mesh, 0.01)*inner(D(u),D(u))*n*dx

# Jacobian
J = derivative(F, w, w_trial)

class NonlinearEquation(NonlinearProblem):
    def __init__(self, a, L, bcs):
        super().__init__()
        self.L = Form(L)
        self.a = Form(a)
        self.bcs = bcs
        self._F = None
        self._J = None

    def form(self, x):
        x.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)

    def F(self, x):
        if self._F is None:
            self._F = assemble_vector(self.L)
        else:
            with self._F.localForm() as f_local:
                f_local.set(0.0)
            self._F = assemble_vector(self._F, self.L)
        apply_lifting(self._F, [self.a], [self.bcs], [x], -1.0)
        self._F.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
        set_bc(self._F, self.bcs, x, -1.0)
        return self._F

    def J(self, x):
        if self._J is None:
            self._J = assemble_matrix(self.a, self.bcs)
        else:
            self._J.zeroEntries()
            self._J = assemble_matrix(self._J, self.a, self.bcs)
        self._J.assemble()
        return self._J

problem = NonlinearEquation(J, F, bcs)
solver = NewtonSolver(MPI.COMM_WORLD)
w.vector.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)

# Files
u_file = XDMFFile(MPI.COMM_WORLD, "stokes/velocity.xdmf", "w")
u_file.write_mesh(mesh)

p_file = XDMFFile(MPI.COMM_WORLD, "stokes/pressure.xdmf", "w")
p_file.write_mesh(mesh)

n_file = XDMFFile(MPI.COMM_WORLD, "stokes/n.xdmf", "w")
n_file.write_mesh(mesh)

# Solver
solver.solve(problem, w.vector)

w.vector.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)

u_file.write_function(w.split()[0])
p_file.write_function(w.split()[1])
n_file.write_function(w.split()[2])
	
u_file.close()
p_file.close()
n_file.close()

dokken · October 21, 2020, 3:10pm

Please try to make this problem a minimal example.
You should start by using a built in mesh to make it simpler for people to reproduce your problem.

Please also check that your inlet Bc is correct by writing it to file.

You could Also consider: Interpolate into function, and fail in PETSc vector norm (dolfinx)

fforteneto · October 21, 2020, 5:06pm

Hi, @dokken. Thank you for your reply!. I have updated my code using a built in mesh. I checked the boundary conditions and it seems that was imposed correctly.

dokken · October 21, 2020, 5:23pm

Does the issue occur on a single time step? If so, please remove the temporal loop and all parameters related to the loop.

fforteneto · October 21, 2020, 5:54pm

I have removed the loop and update the code, the issue still occurs.

dokken · October 21, 2020, 7:08pm

I cannot reproduce your error using 21,22, 23 or 28 processors using the latest dolfinx docker image.

fforteneto · October 21, 2020, 7:11pm

I running on a ubuntu server machine with dolfinx installed from source

dokken · October 21, 2020, 7:15pm

When did you install it? What is the latest commit on dolfinx?

fforteneto · October 21, 2020, 7:22pm

Today a reinstalled dolfinx running again the commands of this post.

dokken · October 21, 2020, 7:25pm

The bug seems rather strange. Can you try using docker on your server? (As I cannot reproduce it)

fforteneto · October 21, 2020, 7:36pm

I don’t have docker installed on my server yet, I will try to install it and test the code.

fforteneto · October 21, 2020, 8:00pm

I have installed dolfinx from docker and run the code, the issue persists.

dokken · October 21, 2020, 8:37pm

Unfortunately I cannot do much as I cannot reproduce your issue.

dokken · October 21, 2020, 8:39pm

Could you try compute the norm of w before saving the components to file? I.e. w.vector.norm() and see if it changes when ran on 1,2 or 25 processors

fforteneto · October 21, 2020, 8:45pm

For np = 1, 2, and 24 (I only have 24 processors available). w.vector.norm() = 63.92201110501961, 63.9220111050196, and 62.52065094359216

edit: In fact I could run for 25 processor and the result was 63.92201110501959

dokken · October 21, 2020, 8:52pm

I actually got that number on 24 processors as well. I will have a closer look at it tomorrow.

fforteneto · October 21, 2020, 8:55pm

Thank you, for your help!

dokken · October 26, 2020, 9:58am

I would just like to give you an update:

I can confirm that there is an issue when running on 24 processors.
I’ve located that the error appears during the Newton solve, but are struggling to finding the source of the bug.

dokken · October 26, 2020, 11:14am

I’ve now been able to reproduce the issue with a much smaller example:

import dolfinx
from dolfinx import jit
import dolfinx.cpp
import dolfinx.io
import dolfinx.fem as fem
import numpy as np
from mpi4py import MPI
from petsc4py import PETSc
import ufl
import dolfinx.log as log


mesh = dolfinx.RectangleMesh(MPI.COMM_WORLD,
        [np.array([0, -0.5, 0]), np.array([10, 0.5, 0])], [5, 5],
        dolfinx.cpp.mesh.CellType.quadrilateral, dolfinx.cpp.mesh.GhostMode.none)

def inlet_boundary(x):
    return np.isclose(x[0], 0.0)

# Define Function Spaces
CG1 = ufl.FiniteElement("Lagrange", mesh.ufl_cell(), 1)
V = dolfinx.FunctionSpace(mesh, CG1)

# Boundary conditions
inlet_facets = dolfinx.mesh.locate_entities_boundary(mesh, 1, inlet_boundary)

u_bc = dolfinx.Function(V)
u_bc.vector.set(1.0)
dofs = dolfinx.fem.locate_dofs_topological(V, 1, inlet_facets)
bc = dolfinx.DirichletBC(u_bc, dofs)
bcs = [bc]

v = ufl.TestFunction(V)
u = dolfinx.Function(V)

def u_init(x):
    values = np.zeros((1, x.shape[1]))
    values[0] = 1.0
    return values

u.interpolate(u_init)
F = ufl.inner(ufl.grad(u), ufl.grad(v)) * ufl.dx

# Write out Newton solver
u.vector.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)
# Assemble initial residual
du = ufl.TrialFunction(V)
a = ufl.derivative(F, u, du)
b = dolfinx.Function(V)
b.vector.set(0.0)
fem.assemble_vector(b.vector, F)
dolfinx.fem.apply_lifting(b.vector, [a], [bcs], [u.vector], -1.0)
b.vector.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
#dolfinx.fem.set_bc(b.vector, bcs, w.vector, -1.0)
fn = b.vector.norm()
with dolfinx.io.XDMFFile(MPI.COMM_WORLD, "test.xdmf", "w") as xdmf:
    xdmf.write_mesh(mesh)
    xdmf.write_function(b)

if MPI.COMM_WORLD.rank == 0:
     print("Residual ", fn)

I’ll report this at the github repo and continue looking into it.

fforteneto · October 26, 2020, 12:19pm

Hi, @dokken. Thank you for dedicate part of your time to solve this issue. I will follow the progress on github.

Topic		Replies	Views
Serial and parallel runs give different output for fluids demos dolfinx	2	606	September 28, 2022
Different results NewtonSolve parallel mpi.comm_world dolfinx mpi	13	109	January 30, 2025
Reproducibility in parallel programming dolfinx	1	425	August 8, 2022
Legacy FEniCS works, DOLFINx doesn't in Stokes channel flow dolfinx	3	695	July 4, 2022
Inconsistent results in parallel computation dolfinx	3	481	July 5, 2021

Different results in serial and parallel run dolfinx

Related topics