Parallelisation of topology optimisation

Good morning,

I have a few questions about parallel programming in FEniCSx related to my need of topology optimisation. I got more familiar with MPI Communication with this tutorial, but these details are not clear:
1 - mesh.topology.create_connectivity: do you need to gather the mesh to one process in order for it to work?
2 - these krylov solver options do not work in parallel:

OPTIONS["ksp_type"]="cg"
OPTIONS["pc_type"]="ilu"
what is a suitable replacement in parallel, especially for linear elasticity?

3 - assemble_matrix : is there any operation to do thereafter for parallel process like ghost values updates?
4 - do you need to update ghost values when setting the values of DG0 functions?

I thank you a lot for all these answers, and my code, if needed, is given below :

#IMPORTS
import numpy as np
from dolfinx import fem, cpp, io, mesh
from mpi4py import MPI
from petsc4py import PETSc
import ufl

#PARAMETERS
NITER_TOTAL=3
PENAL=38
WEIGHT=1
LAMBDA=0.6
MU=0.4
L, W, H=0.303, 0.19, 0.064
EMP=0.06
AXE=0.025
NX, NY, NZ = 60, 38, 12

#MESH CREATION
points=[np.array([0, 0, 0]), np.array([L, W, H])]
domain=mesh.create_box( 
    MPI.COMM_WORLD, 
    points,
    [NX,NY,NZ],
    cell_type=cpp.mesh.CellType.hexahedron,
    ghost_mode=mesh.GhostMode.shared_facet
    )
dim=domain.topology.dim


#BOUNDARY MARKERS
def axes(x):
    return np.logical_and(
               np.logical_or(np.sqrt( (x[0]-EMP)**2 + (x[2]-H/2)**2 ) <=AXE,
                             np.sqrt( (x[0]-L+EMP)**2 + (x[2]-H/2)**2 ) <=AXE),
               np.logical_or(np.isclose(x[1], 0, atol=1e-6), 
                             np.isclose(x[1], W, atol=1e-6))
           )
def bottom(x):
    return np.isclose(x[2], 0, atol=1e-6)

bottom_facets=mesh.locate_entities(domain, dim-1, bottom)
axes_facets=mesh.locate_entities(domain, dim-1, axes)
facet_indices=np.array(np.hstack([bottom_facets, axes_facets]), dtype=np.int32)
facet_markers=[np.full(len(bottom_facets), 1, dtype=np.int32), 
               np.full(len(axes_facets), 2, dtype=np.int32)]
facet_markers=np.hstack(facet_markers)
sorted_facets=np.argsort(facet_indices)
facet_tag=mesh.meshtags(
    domain, 
    dim-1, 
    facet_indices[sorted_facets],
    facet_markers[sorted_facets]
    )

domain.topology.create_connectivity(dim-1, dim)#not sure about this in parallel

#LOADS AND BC
U=fem.VectorFunctionSpace(domain, ("CG", 1))
u=fem.Function(U, name="Déplacement")

bcdofs=fem.locate_dofs_geometrical(U, axes)
u_0=np.array((0,)*dim, dtype=PETSc.ScalarType)
bc=fem.dirichletbc(u_0, bcdofs, U)

ds= ufl.Measure("ds", domain=domain, subdomain_data=facet_tag)
bottom_surf=MPI.COMM_WORLD.allreduce(
                fem.assemble_scalar(fem.form(fem.Constant(domain, 1.)*ds(1))),
                op=MPI.SUM
                                    )

def eps(v):
    return ufl.sym(ufl.grad(v))
def sigma(v):
    return (LAMBDA*ufl.nabla_div(v)*ufl.Identity(dim)+2*MU*eps(v))
def dE(u, v):
    return ufl.inner(sigma(u), eps(v))/2

#DENSITY SET UP
T=fem.FunctionSpace(domain, ("DG", 0))

density_old=fem.Function(T)
density=fem.Function(T, name="Density")
def homogenous(x):
    return np.full(len(x[0]), 0.3)
density.interpolate(homogenous)

VOL=MPI.COMM_WORLD.allreduce(
        fem.assemble_scalar(fem.form(fem.Constant(domain, 1.)*ufl.dx)),
        op=MPI.SUM
                            )
frac_init=MPI.COMM_WORLD.allreduce(
              fem.assemble_scalar(fem.form(density*ufl.dx))/VOL, 
              op=MPI.SUM
                                  )

#VARIATIONAL PROBLEM
u_=ufl.TestFunction(U)
du=ufl.TrialFunction(U)
a=ufl.inner(density**PENAL*sigma(u_), eps(du))*ufl.dx
b=ufl.dot((WEIGHT/bottom_surf)*ufl.FacetNormal(domain), u_)*ds(1)

solver=PETSc.KSP().create(MPI.COMM_WORLD)
OPTIONS=PETSc.Options()

#DOES NOT WORK IN PARALLEL
#OPTIONS["ksp_type"]="cg"
#OPTIONS["ksp_atol"]=1.0e-10
#OPTIONS["pc_type"]="ilu"

solver.setFromOptions()
compliance_history=[]
compliance_old=1e30

B=fem.petsc.assemble_vector(fem.form(b))
fem.petsc.apply_lifting(B, [fem.form(a)], bcs=[[bc]])
B.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
fem.petsc.set_bc(B, [bc])

#OPTIMISATION LOOP
for i in range(NITER_TOTAL):

    #SOLVE
    A=fem.petsc.assemble_matrix(fem.form(a), bcs=[bc])
    A.assemble()
    solver.setOperators(A)


    
    iter_solve=solver.solve(B, u.vector)
    u.x.scatter_forward()

    #OPTIMISATION CRITERIA
    sensitivity=-PENAL*density**(PENAL-1)*dE(u,u)

    #LAGRANGE MULTIPLIER FINDER (DICHOTOMY)
    lmin, lmax=0, 1e8
    counter=0
    frac=0
    while np.abs(frac-frac_init)>1e-4:
        lmoy=(lmin+lmax)/2
        densitynew=fem.Function(T)
        densitynew.interpolate(fem.Expression((-sensitivity/lmoy)**0.5, 
                                              T.element.interpolation_points))
        denv=density.vector.getArray()
        denvnew=densitynew.vector.getArray()
        criterion=np.maximum(0.001,
                             np.maximum(denv-np.full(len(denv), 0.2),
                                        np.minimum(1,
                                                   np.minimum(
                                                     denv+np.full(len(denv), 0.2),
                                                     denv*denvnew
                                                             )
                                                   )
                                       )         
                            )
        densitynew.vector.setArray(criterion)
        densitynew.vector.ghostUpdate(addv=PETSc.InsertMode.ADD, 
                                      mode=PETSc.ScatterMode.REVERSE)
        frac=MPI.COMM_WORLD.allreduce(
                 fem.assemble_scalar(fem.form(densitynew*ufl.dx))/VOL, 
                 op=MPI.SUM
                                     )

        if  frac-frac_init>0:
            lmin=lmoy
        else:
            lmax=lmoy

        counter+=1
    
    density.vector.setArray(densitynew.vector.getArray())
    density.vector.ghostUpdate(addv=PETSc.InsertMode.ADD, 
                               mode=PETSc.ScatterMode.REVERSE)

    compliance=MPI.COMM_WORLD.allreduce(
                   fem.assemble_scalar(fem.form(ufl.action(b,u))), 
                   op=MPI.SUM
                                       )
    compliance_history.append(compliance)

    print("\n ***** Step {}: compliance=".format(i), compliance, 
          " in : ", counter, "iterations", 
          ", process #", domain.comm.rank, " ***** ")


print("ALL CLEAR PROCESS #", MPI.COMM_WORLD.rank)

1 Like

No, the mesh is not gathered on a single process. It uses MPI neighbourhood communicators to communicate index numbers (local or ghost entities), see:https://github.com/FEniCS/dolfinx/blob/e85cf6d31a185d889f2bd480562c8b2d0d7e819a/cpp/dolfinx/mesh/topologycomputation.cpp#L168-L170

For linear elasticity I would suggest using GAMG, see: GitHub - FEniCS/performance-test: Mini App for FEniCSx performance testing

This operation does the ghost updates.

You should do so, if you use a DG formulation, where you might want information from a ghost cell. Rule of thumb is that you should use scatter_forward whenever you do an operation on a subset of cells.

1 Like

Thanks a lot! Much appreciated :clap: