Improve efficiency when using parallel computing and limited by formulation

Hi all,

I am here to ask whether anybody more expert might help me speed up parallel computation applied to my problem.
I am solving the diffusion equation in two separate domains (an inner circular one of radius R, and an external annular one), which are connected at the interface which is controlled by two boundary conditions:

  • D_in * ∂u|r<R = D_out * ∂u|r>R: the flux needs to be equal inside and outside

  • Conc_in(r=R)=Conc_out(r=R)*P: the concentrations at the boundary need to fulfill the partition coefficient P

To implement these boundary conditions, I need to extract the mean at specific dofs (slightly inside or outside the boundary R) at every cycle in order to compute the concentrations and gradients so that I can use them respectively as Dirichlet and neumann bc for the next solution.

Here is my code (it is quite lengthy, but I could not slim it any further):

# Initializing parameters
P = 2 # Partition coefficient
t = 0 # Start time
T = 3 # End time
R=1 # Radius
o=10 # Radius of whole domain
O_conc = 50 # Initial concentration outside
D_i=0.2 # Diffusion coefficient inside
D_o=0.8 # Diffusion coefficient outside
num_steps = 10 # Number of time steps
dt = (T-t)/num_steps # Time step size

import numpy as np
import gmsh
import dolfinx
from dolfinx import fem, plot
from dolfinx.io import XDMFFile, gmshio
import ufl
from mpi4py import MPI
from petsc4py import PETSc

# MESH GENERATION
gmsh.initialize()
gmsh.option.setNumber("General.Terminal", 0)
model_rank = 0
comm = MPI.COMM_WORLD
gdim=2
gmsh.option.setNumber("Mesh.Algorithm", 7)

# Inside
if comm.rank == model_rank:
    gmsh.model.add("Inside")
    gmsh.model.setCurrent("Inside")
    center= gmsh.model.occ.addPoint(0,0,0,tag=1)
    outer = gmsh.model.occ.addCircle(0,0,0,R,tag=2)
    disk_loop =gmsh.model.occ.addCurveLoop([outer])
    disk = gmsh.model.occ.addPlaneSurface([disk_loop])
    gmsh.model.occ.synchronize()
    gmsh.model.addPhysicalGroup(gdim,[disk])
        # Create mesh resolution
    gmsh.model.mesh.field.add("Distance",1)
    gmsh.model.mesh.field.setNumbers(1, "NodesList", [center])
    gmsh.model.mesh.field.add("Threshold", 2)
    gmsh.model.mesh.field.setNumber(2, "InField", 1)
    gmsh.model.mesh.field.setNumber(2, "LcMin", 0.5) #0.05 optimal
    gmsh.model.mesh.field.setNumber(2, "LcMax", 0.05) #0.005 optimal
    gmsh.model.mesh.field.setNumber(2, "DistMin", R/5)
    gmsh.model.mesh.field.setNumber(2, "DistMax", 9*R/10)
    gmsh.model.mesh.field.setAsBackgroundMesh(2)
    gmsh.model.mesh.generate(gdim)
    gmsh.write("Inside_complete.msh")
domain_i, cell_markers_i, facet_markers_i = gmshio.model_to_mesh(gmsh.model, 
    comm, model_rank,partitioner=dolfinx.mesh.create_cell_partitioner(dolfinx.mesh.GhostMode.shared_facet))
domain_i.name = "Inside"
cell_markers_i.name = f"{domain_i.name}_cells"
facet_markers_i.name = f"{domain_i.name}_facets"

# Outside
if comm.rank == model_rank:
    gmsh.model.add("Outside")
    gmsh.model.setCurrent("Outside")
    center= gmsh.model.occ.addPoint(0,0,0,tag=1)
    disk =gmsh.model.occ.addDisk(0,0,0,o,o)
    disk_inner =gmsh.model.occ.addDisk(0,0,0,R,R)
    cut = gmsh.model.occ.cut([(2, disk)], [(2, disk_inner)])[0]
    extruded_geometry = gmsh.model.occ.extrude(cut, 0, 0, 0.5, numElements=[5], recombine=True)
    gmsh.model.occ.synchronize()
    gdim= 2
    gmsh.model.addPhysicalGroup(gdim,[cut[0][1]], tag=1)
        # Create mesh resolution
    gmsh.model.mesh.field.add("Distance",1)
    gmsh.model.mesh.field.setNumbers(1, "NodesList", [center])
    gmsh.model.mesh.field.add("Threshold", 2)
    gmsh.model.mesh.field.setNumber(2, "InField", 1)
    gmsh.model.mesh.field.setNumber(2, "LcMin", 0.05) #0.005 optimal
    gmsh.model.mesh.field.setNumber(2, "LcMax", 0.8) #0.2 optimal
    gmsh.model.mesh.field.setNumber(2, "DistMin", R+(R/10))
    gmsh.model.mesh.field.setNumber(2, "DistMax", 4*R)
    gmsh.model.mesh.field.setAsBackgroundMesh(2)
    gmsh.model.mesh.generate(gdim)
    gmsh.write("Outside_complete.msh")
domain_o, cell_markers_o, facet_markers_o = gmshio.model_to_mesh(gmsh.model, 
    comm, model_rank,partitioner=dolfinx.mesh.create_cell_partitioner(dolfinx.mesh.GhostMode.shared_facet))
domain_o.name = "Outside"
cell_markers_o.name = f"{domain_o.name}_cells"
facet_markers_o.name = f"{domain_o.name}_facets"
gmsh.finalize()

# Creating space functions
V_i = fem.FunctionSpace(domain_i, ("CG", 1))
V_o = fem.FunctionSpace(domain_o, ("CG", 1))

# Setting FRAP initial conditions
u_n_i = fem.Function(V_i)
x_i = V_i.tabulate_dof_coordinates()
u_n_i.interpolate(lambda x: (np.full((1, x.shape[1]),0)))
u_n_o = fem.Function(V_o)
x_o = V_o.tabulate_dof_coordinates()
u_n_o.interpolate(lambda x: (np.full((1, x.shape[1]),O_conc)))

#Setting up BOUNDARY CONDITIONS
def set_boundary(boundaries, domain, xdmf):
    fdim= domain.topology.dim - 1
    facet_indices, facet_markers = [], []
    for (marker, locator) in boundaries:
        facets = dolfinx.mesh.locate_entities(domain, fdim, locator)
        facet_indices.append(facets)
        facet_markers.append(np.full_like(facets, marker))
    facet_indices = np.hstack(facet_indices).astype(np.int32)
    facet_markers = np.hstack(facet_markers).astype(np.int32)
    sorted_facets = np.argsort(facet_indices)
    facet_tag = dolfinx.mesh.meshtags(domain, fdim, 
        facet_indices[sorted_facets], facet_markers[sorted_facets])
    domain.topology.create_connectivity(domain.topology.dim-1, 
        domain.topology.dim)
    xdmf.write_mesh(domain)
    xdmf.write_meshtags(facet_tag)
    ds = ufl.Measure("ds", domain=domain, subdomain_data=facet_tag)
    return ds
# BOUNDARY condtion DOMAIN_I: defining boundary at Interface R
boundaries_i =[(1, lambda x: np.isclose(np.sqrt(x[0]**2+x[1]**2),R))]
xdmf_i=XDMFFile(domain_i.comm, "Inside_complete.xdmf", "w")
ds_i=set_boundary(boundaries_i,domain_i, xdmf_i)
# BOUNDARY condtion DOMAIN_O settng Neumann at the R and outer boundary
boundaries_o =[(3, lambda x: np.isclose(np.sqrt(x[0]**2+x[1]**2),o)),
                (4, lambda x: np.isclose(np.sqrt(x[0]**2+x[1]**2),R))]
xdmf_o=XDMFFile(domain_o.comm, "Outside_complete.xdmf", "w")
ds_o=set_boundary(boundaries_o,domain_o,xdmf_o)

# Define solution variable
uhi = fem.Function(V_i)
uhi.interpolate(lambda x: (np.full((1, x.shape[1]),0)))
xdmf_i.write_function(uhi, t)
uho = fem.Function(V_o)
uho.interpolate(lambda x: (np.full((1, x.shape[1]),O_conc)))
xdmf_o.write_function(uho, t)

### DEFINIG VARIATIONAL FORMULATION inside and outside
f_=0
fi= fem.Constant(domain_i, PETSc.ScalarType(f_))
Di= fem.Constant(domain_i, PETSc.ScalarType(D_i))
ui, vi = ufl.TrialFunction(V_i), ufl.TestFunction(V_i)
dx_i=ufl.Measure("dx", domain=domain_i)
Fi = ui*vi*dx_i + Di*dt*ufl.dot(ufl.grad(ui), 
        ufl.grad(vi))*dx_i -(u_n_i + dt*fi)*vi*dx_i

fo= fem.Constant(domain_o, PETSc.ScalarType(f_))
Do= fem.Constant(domain_o, PETSc.ScalarType(D_o))
uo, vo = ufl.TrialFunction(V_o), ufl.TestFunction(V_o)
dx_o=ufl.Measure("dx", domain=domain_o)
Fo = uo*vo*dx_o + Do*dt*ufl.dot(ufl.grad(uo), 
        ufl.grad(vo))*dx_o - (u_n_o + dt*fo)*vo*dx_o
###

### Adding boundaries to solver
class BoundaryCondition():
    def __init__(self, type, marker, values,v,ds):
        self._type = type
        if type == "Neumann":
                self._bc = ufl.inner(values, v) * ds(marker)
    @property
    def bc(self):
        return self._bc
    @property
    def type(self):
        return self._type
    #Extract positions for dirichlet BC
def interface(x:np.array) -> np.array:
     return np.isclose(x[0]*x[0] + x[1]*x[1],R*R)
dofs_o = fem.locate_dofs_geometrical(V_o, interface)
dofs_i = fem.locate_dofs_geometrical(V_i, interface)
def add_bc(boundary_conditions, F, V, dofs, value_dir):
    bcs=[]
    for condition in boundary_conditions:
        if condition.type == "Neumann":
            F += condition.bc
    # Setting up dirichlet boundary condition at r=R
    u_D = fem.Function(V)
    u_D.interpolate(lambda x: (np.full((1, x.shape[1]),value_dir)))
    bcs=fem.dirichletbc(u_D,dofs)
    return u_D, bcs
    # Bcs INSIDE
g_value_i=fem.Constant(domain_i, PETSc.ScalarType(0))
boundary_conditions_i = [BoundaryCondition("Neumann", 1, g_value_i,vi,ds_i)]
u_Di, bcs_i=add_bc(boundary_conditions_i, Fi, V_i, dofs_i, 0)
    # Bcs OUTSIDE
g_value_o=fem.Constant(domain_o, PETSc.ScalarType(0))
boundary_conditions_o = [BoundaryCondition("Neumann", 3, 0, vo, ds_o),
        BoundaryCondition("Neumann", 4, g_value_o,vo,ds_o)]
u_Do, bcs_o=add_bc(boundary_conditions_o, Fo, V_o, dofs_o, O_conc)
###

### Finalizing VARIATIONAL PROBLEM
    # inside
ai = fem.form(ufl.lhs(Fi))
Li = fem.form(ufl.rhs(Fi))
Ai = fem.petsc.assemble_matrix(ai, bcs=[bcs_i])
Ai.assemble()
bi = fem.petsc.create_vector(Li)
solver_i = PETSc.KSP().create(domain_i.comm)
solver_i.setOperators(Ai)
solver_i.setType(PETSc.KSP.Type.PREONLY)
solver_i.getPC().setType(PETSc.PC.Type.LU)
    # outside
ao = fem.form(ufl.lhs(Fo))
Lo = fem.form(ufl.rhs(Fo))
Ao = fem.petsc.assemble_matrix(ao, bcs=[bcs_o])
Ao.assemble()
bo = fem.petsc.create_vector(Lo)
solver_o = PETSc.KSP().create(domain_o.comm)
solver_o.setOperators(Ao)
solver_o.setType(PETSc.KSP.Type.PREONLY)
solver_o.getPC().setType(PETSc.PC.Type.LU)
###

# Establishing dofs to assess gradient later
step_x_i=0.05
step_x_o=0.05
tol = 1e-2
def interface_i(x:np.array) -> np.array:
    return np.logical_and((x[0]*x[0] + x[1]*x[1])<((R-step_x_i)*
            (R-step_x_i)+tol),(x[0]*x[0] + x[1]*x[1])>((R-step_x_i)*
            (R-step_x_i)-tol))
def interface_o(x:np.array) -> np.array:
    return np.logical_and((x[0]*x[0] + x[1]*x[1])<((R+step_x_o)*
            (R+step_x_o)+tol),(x[0]*x[0] + x[1]*x[1])>((R+step_x_o)*
            (R-step_x_o)+tol))
root_dofs_go = fem.locate_dofs_geometrical(V_o, interface_o)
root_dofs_gi = fem.locate_dofs_geometrical(V_i, interface_i)
root_dofs_go = fem.locate_dofs_topological(V_o, domain_o.topology.dim - 1, root_dofs_go)
root_dofs_gi = fem.locate_dofs_topological(V_i, domain_i.topology.dim - 1, root_dofs_gi)

for n in range(num_steps):
    t +=dt
  
    # Computing values at the interface for the next cycle 
    u_n_o_values=np.mean(u_n_o.x.array[root_dofs_go])
    u_n_i_values=np.mean(u_n_i.x.array[root_dofs_gi])
    Ioa = np.mean(comm.allgather(u_n_o_values))
    Iia = np.mean(comm.allgather(u_n_i_values))
    Io=(Ioa*D_o*step_x_i+Iia*D_i*step_x_o)/(P*D_i*step_x_o+D_o*step_x_i)
    Ii=Io*P

    #Updating the value given to Neumann boundaries
    g_value_i.value =((Ii-Iia))/step_x_i
    g_value_o.value=((Ioa-Io))/step_x_o
    
    #Updating Dirichlet boundaries
    u_Di.interpolate(lambda x: (np.full((1, x.shape[1]),Ii)))
    bcs_i=fem.dirichletbc(u_Di,dofs_i)
    u_Do.interpolate(lambda x: (np.full((1, x.shape[1]),Io)))
    bcs_o=fem.dirichletbc(u_Do,dofs_o)
    
    # (1) Solving the PDE outside
    # Update the right hand side reusing the initial vector
    with bo.localForm() as loc_bo:
        loc_bo.set(0)
    fem.petsc.assemble_vector(bo, Lo)

    # Apply Dirichlet boundary condition to the vector
    fem.petsc.apply_lifting(bo, [ao], [[bcs_o]])
    bo.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, 
            mode=PETSc.ScatterMode.REVERSE)
    fem.petsc.set_bc(bo, bcs=[bcs_o])

    # Solve linear problem
    solver_o.solve(bo, uho.vector)
    uho.x.scatter_forward()
    
    # (2) Solving the PDE inside
    # Update the right hand side reusing the initial vector
    with bi.localForm() as loc_bi:
        loc_bi.set(0)
    fem.petsc.assemble_vector(bi, Li)

    # Apply Dirichlet boundary condition to the vector
    fem.petsc.apply_lifting(bi, [ai], [[bcs_i]])
    bi.ghostUpdate(addv=PETSc.InsertMode.ADD_VALUES, 
            mode=PETSc.ScatterMode.REVERSE)
    fem.petsc.set_bc(bi, [bcs_i])

    # Solve linear problem
    solver_i.solve(bi, uhi.vector)
    uhi.x.scatter_forward()

    # Update solution at previous time step (u_n)
    u_n_i.x.array[:] = uhi.x.array
    u_n_o.x.array[:] = uho.x.array

    # write value of computed solution at each step
    xdmf_i.write_function(uhi, t)
    xdmf_o.write_function(uho, t)

xdmf_i.close()
xdmf_o.close()

Maybe you could think of a better implementation, but for this code and specific mesh, I notice that I need to be careful gathering the average value at the root_dofs_go and root_dofs_gi from all ranks when computing in parallel. I encounter two main problems:

  • I thought it was necessary to gather results from all ranks, since if I don’t, sometimes one of the ranks does not return any cells relative root_dofs_go, thus making it impossible for the solver to proceed any further. But this is now also happening when I am gathering (I have no idea what this could be caused by?)

  • even when it works, elapsed times are not improved when compared with serial (I guess because of all the communication between ranks necessary to gather the different dofs)

Is there any way I could implement this in a more efficient way? (In the future, I will need to use finer meshes and traslate the problem in 3D).

Thank you in advance for any help!
Marta