Hi all,
The following is a simplified code that solves a Reaction-Diffusion system. My main code is actually a Reaction-Diffusion-Advection but I think I can get my point across with this toy problem. It is time dependent but the following code focuses on only time step.
from dolfin import *
import numpy as np
import time
import logging
#MPI parameters
comm = MPI.comm_world
num_processes= comm.Get_size()
this_process = comm.Get_rank()
###############################################################
#Solver configs
class Problem(NonlinearProblem):
def __init__(self, J, F, bcs):
self.bilinear_form = J
self.linear_form = F
self.bcs = bcs
NonlinearProblem.__init__(self)
def F(self, b, x):
assemble(self.linear_form, tensor=b)
for bc in self.bcs:
bc.apply(b, x)
def J(self, A, x):
assemble(self.bilinear_form, tensor=A)
for bc in self.bcs:
bc.apply(A)
###############################################################
###############################################################
class CustomSolver1(NewtonSolver):
def __init__(self):
NewtonSolver.__init__(self, mesh.mpi_comm(),
PETScKrylovSolver(), PETScFactory.instance())
def solver_setup(self, A, P, problem, iteration):
self.linear_solver().set_operator(A)
PETScOptions.set("ksp_max_it", 1000)
PETScOptions.set("ksp_initial_guess_nonzero", "true")
PETScOptions.set("ksp_type", "gmres")
PETScOptions.set("ksp_monitor")
PETScOptions.set("pc_type", "hypre")
PETScOptions.set("pc_hypre_type", "euclid")
self.linear_solver().set_from_options()
###############################################################
#Mesh
mesh = UnitSquareMesh(200,200)
###############################################################
# define a mixed function space
V = VectorFunctionSpace(mesh,'P',1)
S1 = FunctionSpace(mesh,'P',1)
###############################################################
#Defining Mixed Function Space
P1 = FiniteElement('P', triangle,1)
P2 = FiniteElement('P', triangle,2)
element1 = MixedElement([P1,P1,P1,P1])
Mixed_Space1 = FunctionSpace(mesh, element1)
#######################################################################
#Functions
U = Function(Mixed_Space1)
U_n = Function(Mixed_Space1)
Tn, Th, Tc, Tr = split(U)
Tn_n, Th_n, Tc_n, Tr_n = split(U_n)
v1, v2, v3, v4 = TestFunctions(Mixed_Space1)
#######################################################################
#Dimension of the space
d = V.dim()
###############################################################
#PDE Parameters dimensional
D_Tn, D_Th, D_Tc, D_Tr = 4.32e-6, 4.32e-6, 4.32e-6, 4.32e-6
kappa_Tn,kappa_Th, kappa_Tc, kappa_Tr = 2, 2, 2, 2
##############################################################
#Initial conditions
Tn_0 = Expression('Init1', degree=0, Init1=3)
Th_0 = Expression('Init2', degree=0, Init2=2)
Tc_0 = Expression('Init3', degree=0, Init3=1)
Tr_0 = Expression('Init4', degree=0, Init4=1)
###############################################################
#Interpolating ICs
Tn0 = interpolate(Tn_0, Mixed_Space1.sub(0).collapse())
Th0 = interpolate(Th_0, Mixed_Space1.sub(1).collapse())
Tc0 = interpolate(Tc_0, Mixed_Space1.sub(2).collapse())
Tr0 = interpolate(Tr_0, Mixed_Space1.sub(3).collapse())
###############################################################
#Assigning ICs
assign(U_n.sub(0),Tn0)
assign(U_n.sub(1),Th0)
assign(U_n.sub(2),Tc0)
assign(U_n.sub(3),Tr0)
##############################################################
# Create VTK files for visualization output
vtkfile_1 = XDMFFile(MPI.comm_world,"reaction_system/Tn.xdmf")
vtkfile_1.parameters["flush_output"] = True
vtkfile_2 = XDMFFile(MPI.comm_world,"reaction_system/Th.xdmf")
vtkfile_2.parameters["flush_output"] = True
vtkfile_3 = XDMFFile(MPI.comm_world,"reaction_system/Tc.xdmf")
vtkfile_3.parameters["flush_output"] = True
vtkfile_4 = XDMFFile(MPI.comm_world,"reaction_system/Tr.xdmf")
vtkfile_4.parameters["flush_output"] = True
##############################################################
k = Constant(0.01)
t=0.0
F1 = ((Tn-Tn_n)/k)*v1*dx+D_Tn*dot(grad(Tn),grad(v1))*dx-Constant(1)*v1*dx+Constant(1)*Tn*v1*dx+Constant(0.1)*Th*Tn*v1*dx+Constant(0.2)*Th*Tn*v1*dx+Constant(0.01)*Tn*v1*dx\
+ ((Th-Th_n)/k)*v2*dx+D_Th*dot(grad(Th),grad(v2))*dx+(Constant(0.5)*Tr+Constant(1))*Th*v2*dx\
+ ((Tc-Tc_n)/k)*v3*dx+D_Tc*dot(grad(Tc),grad(v3))*dx-Constant(0.1)*Th*Tn*v3*dx+(Constant(0.2)*Tr+Constant(0.01))*Tc*v3*dx\
+ ((Tr-Tr_n)/k)*v4*dx+D_Tr*dot(grad(Tr),grad(v4))*dx-Constant(0.2)*Th*Tn*v4*dx+Constant(0.7)*Tr*v4*dx\
#######################################################################
#Saving
Tn0.rename('Tn_','Tn_')
Th0.rename('Th_','Th_')
Tc0.rename('Tc_','Tc_')
Tr0.rename('Tr_','Tr_')
vtkfile_1.write(Tn0,t)
vtkfile_2.write(Th0,t)
vtkfile_3.write(Tc0,t)
vtkfile_4.write(Tr0,t)
##############################################################
#######################################################################
J1 = derivative(F1,U)
problem1 = Problem(J1,F1,[])
custom_solver1 = CustomSolver1()
custom_solver1.solve(problem1, U.vector())
##############################################################
Tn_, Th_, Tc_, Tr_= U.split()
##############################################################
#Renaming for animation plots
Tn_.rename('Tn_','Tn_')
Th_.rename('Th_','Th_')
Tc_.rename('Tc_','Tc_')
Tr_.rename('Tr_','Tr_')
#######################################################################
#######################################################################
vtkfile_1.write(Tn_,t)
vtkfile_2.write(Th_,t)
vtkfile_3.write(Tc_,t)
vtkfile_4.write(Tr_,t)
#######################################################################
print('Number of processes used',num_processes)
list_timings(TimingClear.clear, [TimingType.wall])
Now when I run this in series I get this:
[MPI_AVG] Summary of timings | reps wall avg wall tot
Apply (PETScMatrix) | 3 0.0016835 0.0050504
Apply (PETScVector) | 37 1.2478e-05 0.0004617
Assemble cells | 7 0.23717 1.6602
Build sparsity | 2 0.061506 0.12301
Compute SCOTCH graph re-ordering | 7 0.0037814 0.02647
Compute connectivity 1-2 | 1 0.0073968 0.0073968
Compute entities dim = 1 | 1 0.039101 0.039101
Delete sparsity | 2 3.9e-06 7.8e-06
DistributedMeshTools: reorder vertex values | 4 0.0036773 0.014709
Init dof vector | 6 0.0028735 0.017241
Init dofmap | 7 0.041515 0.29061
Init dofmap from UFC dofmap | 7 0.009452 0.066164
Init tensor | 2 0.0048962 0.0097925
Number distributed mesh entities | 7 2.5286e-06 1.77e-05
PETSc Krylov solver | 3 0.82722 2.4817
SCOTCH: call SCOTCH_graphBuild | 7 4.1171e-05 0.0002882
SCOTCH: call SCOTCH_graphOrder | 7 0.0028291 0.019804
But when I do it with “mpirun -n 8” I get:
Process 0: [MPI_AVG] Summary of timings | reps wall avg wall tot
Apply (PETScMatrix) | 3 0.010986 0.032957
Apply (PETScVector) | 37 0.00030367 0.011236
Assemble cells | 7 0.048845 0.34191
Build LocalMeshData from local Mesh | 1 0.014725 0.014725
Build distributed mesh from local mesh data | 1 0.11427 0.11427
Build local part of distributed mesh (from local mesh data) | 1 0.0048257 0.0048257
Build sparsity | 2 0.035817 0.071635
Compute SCOTCH graph re-ordering | 7 0.00056693 0.0039685
Compute connectivity 1-2 | 1 0.0010896 0.0010896
Compute entities dim = 1 | 1 0.0078631 0.0078631
Compute graph partition (SCOTCH) | 1 0.058645 0.058645
Compute local part of mesh dual graph | 1 0.0074662 0.0074662
Compute mesh entity ownership | 1 0.0025741 0.0025741
Compute non-local part of mesh dual graph | 1 0.00052979 0.00052979
Delete sparsity | 2 5.1437e-06 1.0287e-05
Distribute cells | 1 0.0041209 0.0041209
Distribute mesh (cells and vertices) | 1 0.011897 0.011897
Distribute vertices | 1 0.0052324 0.0052324
DistributedMeshTools: reorder vertex values | 12 0.002893 0.034716
Extract partition boundaries from SCOTCH graph | 1 0.00020005 0.00020005
Get SCOTCH graph data | 1 4.6375e-06 4.6375e-06
Init dof vector | 6 0.0016481 0.0098885
Init dofmap | 7 0.010739 0.07517
Init dofmap from UFC dofmap | 7 0.0024653 0.017257
Init tensor | 2 0.0021027 0.0042053
Number distributed mesh entities | 8 0.0017322 0.013857
Number mesh entities for distributed mesh (for specified vertex ids) | 1 0.011717 0.011717
PETSc Krylov solver | 3 3.0889 9.2666
SCOTCH: call SCOTCH_dgraphBuild | 1 4.915e-05 4.915e-05
SCOTCH: call SCOTCH_dgraphHalo | 1 0.0004909 0.0004909
SCOTCH: call SCOTCH_dgraphPart | 1 0.057151 0.057151
SCOTCH: call SCOTCH_graphBuild | 7 1.2593e-05 8.815e-05
SCOTCH: call SCOTCH_graphOrder | 7 0.00036042 0.0025229
My computer has 8 logical processors and I use “FEniCS/2019.2.0.dev0”. I do not understand why the PETSc krylov solver is slower in parallel. I believe I am using the proper solver and preconditioner. Could you please help me with this?
Best,