Hi,
I’m experimenting a bit with FEniCS parallelization and I did a couple of tests to see how performance change initializing the mesh with MPI.COMM_WORLD or MPI.COMM_SELF. Thus, I initialized different unit square mashes:
mesh = fenics.UnitSquareMesh(comm, nx, nx)
Varying the comm
(MPI.COMM_WORLD  MPI.COMM_SELF
) and nx
. Then, I tryed to:

project
a simple expression  using
solve
to solve a simple Poisson PDE
In the following table I reported the results I got. A bit of legend:
 First column is the nx I used for the mesh;
 Second column: execution time using
MPI.COMM_SELF
 Third column: execution time using
MPI.COMM_WORLD
 Fourth column: ratio between the first execution time and the latter
*** Project with 10 processors ***
 nx  self  world  self / world 
 50  0.124116361141  0.072749234736  1.70608476628 
 100  0.362367616966  0.10394997336  3.48598085458 
 200  1.35334802978  0.226942427456  5.96339805186 
 500  9.06603579968  1.11628183536  8.12163694909 
*** Solving with 10 processors ***
 nx  self  world  self / world 
 50  0.314215470105  0.277740204707  1.13132871936 
 100  1.10572536848  0.987633908167  1.11957007484 
 200  4.79371981137  3.78673086129  1.26592567229 
 500  42.3490756564  22.4998066388  1.88219731557 
As you can see, using MPI.COMM_SELF
results in a much slower execution. Does it mean that using comm_self impairs the parallel execution?
Also, why projecting is so much slower than solving? As far as I know, project
calls solve
inside.
Thank you in advance.
Notes
 I’m using a parallel solver, more precisely
gmres
with preconditionerhypre_euclid
 The execution times I got are computed with
timeit
on 10 executions, so these are not random fluctuations
Code (if interested)
import fenics
import timeit
import numpy as np
# get comm_self and comm_world
comm_self = fenics.MPI.comm_self
comm_world = fenics.MPI.comm_world
rank = comm_world.Get_rank()
size = comm_world.Get_size()
root = 0
# use parallel solver
solver_parameters = {"linear_solver": "gmres", "preconditioner": "hypre_euclid"}
def test_project_time(mesh: fenics.Mesh):
# define expression
V = fenics.FunctionSpace(mesh, "CG", 1)
expr = fenics.Expression("x[0] < 0.5 ? 1. : 0.", degree=1)
foo = fenics.Function(V)
# project
foo.assign(fenics.project(expr, V,
solver_type=solver_parameters["linear_solver"],
preconditioner_type=solver_parameters["preconditioner"]))
def test_solving_time(mesh: fenics.Mesh):
# define problem
V = fenics.FunctionSpace(mesh, "CG", 1)
expr = fenics.Expression("x[0] < 0.5 ? 1. : 0.", degree=1)
u = fenics.TrialFunction(V)
v = fenics.TestFunction(V)
s = fenics.Function(V)
s.assign(fenics.project(expr, V))
a = fenics.dot(fenics.grad(u), fenics.grad(v)) * fenics.dx
L = s * v * fenics.dx
u = fenics.Function(V)
# solve
fenics.solve(a == L, u, solver_parameters=solver_parameters)
def print_table(table):
col_width = [max(len(x) for x in col) for col in zip(*table)]
for line in table:
print(" " + "  ".join("{:{}}".format(x, col_width[i]) for i, x in enumerate(line)) + " ")
# test for mesh size
if __name__ == "__main__":
sides = np.array([50, 100, 200, 500])
project_times_s = np.empty(sides.shape)
solving_times_s = np.empty(sides.shape)
project_times_w = np.empty(sides.shape)
solving_times_w = np.empty(sides.shape)
project_sw_ratio = np.empty(sides.shape)
solving_sw_ratio = np.empty(sides.shape)
for i, side in enumerate(sides):
if rank == root:
print(f"step {i} on 4")
# initialize mesh
nx = ny = side
mesh_self = fenics.UnitSquareMesh(comm_self, nx, ny)
mesh_world = fenics.UnitSquareMesh(comm_world, nx, ny)
# measure time
project_times_s[i] = timeit.timeit("test_project_time(mesh_self)",
setup="from __main__ import mesh_self, test_project_time",
number=10)
solving_times_s[i] = timeit.timeit("test_solving_time(mesh_self)",
setup="from __main__ import mesh_self, test_solving_time",
number=10)
project_times_w[i] = timeit.timeit("test_project_time(mesh_world)",
setup="from __main__ import mesh_world, test_project_time",
number=10)
solving_times_w[i] = timeit.timeit("test_solving_time(mesh_world)",
setup="from __main__ import mesh_world, test_solving_time",
number=10)
project_sw_ratio[i] = project_times_s[i] / project_times_w[i]
solving_sw_ratio[i] = solving_times_s[i] / solving_times_w[i]
# setup tables
project_table = [(str(side), str(project_time_s), str(project_time_w), str(sw_ratio))
for side, project_time_s, project_time_w, sw_ratio
in zip(sides, project_times_s, project_times_w, project_sw_ratio)]
project_table.insert(0, ("nx", "self", "world", "self / world"))
solving_table = [(str(side), str(solving_time_s), str(solving_time_w), str(sw_ratio))
for side, solving_time_s, solving_time_w, sw_ratio
in zip(sides, solving_times_s, solving_times_w, solving_sw_ratio)]
solving_table.insert(0, ("nx", "self", "world", "self / world"))
# print tables
if rank == root:
print(f"*** Project with {size} processors ***")
print_table(project_table)
print(f"*** Solving with {size} processors ***")
print_table(solving_table)