Hi,

I’m currently working on solving a viscoelastic Stokes flow simulation within Fenics and I’ve reached the point where serial computing just isn’t cutting it anymore, I’d like to go parallel.

I’ve done some reading around the area and understand how to use the mpi command, but it seems to be hard to find any increase in computation speed when running with it. My code is as follows:

```
from fenics import *
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import time
start_time = time.time()
parameters["std_out_all_processes"] = False;
#parameters['krylov_solver']['nonzero_initial_guess'] = True
parameters['ghost_mode'] = 'shared_facet'
T = 0.25
num_steps = 25
dt = T / num_steps
t_domain = np.linspace(0,T,num_steps)
beta = 0.59
We = 0.1
theta = 1-beta
mesh = Mesh('Meshes/wedge5.xml')
def epsilon(a):
return sym(nabla_grad(a))
def trans(a):
return a.T
def magnitude(vec):
return sqrt(vec**2)
def flux(ui, ni, taui):
return (dot(ui,ni)*taui + abs(dot(ui,ni))*taui)/2
def drag_func(p, u, tau, mesh):
n = FacetNormal(mesh)
force_x = (-p+tau[0,0]+2*beta*u[0].dx(0))*n[0]
force_y = (tau[1,0]+beta*(u[0].dx(1)+u[1].dx(0)))*n[1]
drag_coeff = -assemble(2*(force_x+force_y)*ds_circle)
drag_list.append(drag_coeff)
P = FiniteElement('CG', mesh.ufl_cell(), 1)
V = VectorElement('CG', mesh.ufl_cell(), 2)
S = TensorElement('DG', mesh.ufl_cell(), 0)
D = TensorElement('DG', mesh.ufl_cell(), 1)
Pr = FunctionSpace(mesh, P)
Ve = FunctionSpace(mesh, V)
St = FunctionSpace(mesh, S)
De = FunctionSpace(mesh, D)
stokes = FunctionSpace(mesh, MixedElement([V,P,D]))
class Inflow(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and near(x[0], -20) #Left side of the channel
class Top_Wall(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and near(x[1], 2) #Top and bottom of the channel
class Bottom_Wall(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and near(x[1], 0) #Top and bottom of the channel
class Outflow(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and near(x[0], 20) #Right side of the channel
class Wedge(SubDomain):
def inside(self, x, on_boundary):
return on_boundary and (between(x[0], (-1,1)) and between(x[1], (0,1))) #Cylinder
subdomains = MeshFunction("size_t", mesh, mesh.topology().dim() - 1)
inflow = Inflow()
inflow.mark(subdomains, 1)
top_wall = Top_Wall()
top_wall.mark(subdomains, 2)
bottom_wall = Bottom_Wall()
bottom_wall.mark(subdomains, 3)
outflow = Outflow()
outflow.mark(subdomains, 4)
wedge = Wedge()
wedge.mark(subdomains, 5)
vel_inflow = Expression(('1.5*(1-(x[1]*x[1]/4))','0'),degree=2)
str_inflow = Expression((('2*We*(1-beta)*(9/16)*x[1]*x[1]','-3/4*(1-beta)*x[1]') \
, ('-3/4*(1-beta)*x[1]','0')),degree=2, We=We, beta=beta)
stress_inflow_0 = DirichletBC(St, str_inflow, subdomains, 1)
velocity_inflow_1 = DirichletBC(stokes.sub(0), vel_inflow, subdomains, 1)
noslip_top_1 = DirichletBC(stokes.sub(0), vel_inflow, subdomains, 2)
symmetry_1 = DirichletBC(stokes.sub(0).sub(1), Constant(0), subdomains, 3)
outflow_condition_1 = DirichletBC(stokes.sub(0).sub(1), Constant(0), subdomains, 4)
outflow_pressure_1 = DirichletBC(stokes.sub(1), Constant(0), subdomains, 4)
noslip_wedge_1 = DirichletBC(stokes.sub(0), Constant((0,0)), subdomains, 5)
bcst = [stress_inflow_0]
bcup = [velocity_inflow_1,
noslip_top_1,
symmetry_1,
outflow_condition_1,
noslip_wedge_1,
outflow_pressure_1]
ds_circle = Measure("ds", subdomain_data=subdomains, subdomain_id=5)
drag_list = []
new_drag_list = []
err_est=[]
nn=FacetNormal(mesh)
tau = TrialFunction(St)
S = TestFunction(St)
tau1 = Function(St)
taus = Function(St)
u, p, G = TrialFunctions(stokes)
v, q, R = TestFunctions(stokes)
w1 = Function(stokes)
ws = Function(stokes)
us, ps, Gs = split(ws)
stress = (We/dt)*inner(tau-taus,S)*dx \
+ inner(taus,S)*dx \
+ We*inner(dot(us,nabla_grad(taus)) \
- dot(taus,trans(Gs)) \
- dot(Gs,taus), S)*dx \
- (1-beta)*inner(Gs+trans(Gs),S)*dx \
+ We*inner(flux(us,nn,taus)('+') - flux(us,nn,taus)('-'),jump(S))*dS \
stokes_system = (beta+theta)*inner(grad(u),grad(v))*dx \
- p*div(v)*dx + q*div(u)*dx \
+ inner(tau1,grad(v))*dx \
+ inner(G-grad(u),R)*dx \
- theta*inner(G,grad(v))*dx \
u_residual = TrialFunction(Ve)
v_residual = TestFunction(Ve)
p_residual = TrialFunction(Pr)
q_residual = TestFunction(Pr)
tau_residual = TrialFunction(St)
S_residual = TestFunction(St)
tau_err = Function(St)
u_err = Function(Ve)
p_err = Function(Pr)
t = 0
for n in tqdm(range(num_steps)):
stress_l = assemble(lhs(stress))
[bc.apply(stress_l) for bc in bcst]
stress_r = assemble(rhs(stress))
[bc.apply(stress_r) for bc in bcst]
solve(stress_l, tau1.vector(), stress_r, 'gmres')
stokes_l = assemble(lhs(stokes_system))
[bc.apply(stokes_l) for bc in bcup]
stokes_r = assemble(rhs(stokes_system))
[bc.apply(stokes_r) for bc in bcup]
solve(stokes_l, w1.vector(), stokes_r)
u1, p1, G1 = split(w1)
drag_func(p1, u1, tau1, mesh)
taus.assign(tau1)
ws.assign(w1)
drag = np.array(drag_list)
print('')
print('T: ', T)
print('No. steps: ', num_steps)
print('beta:',beta)
print('We:',We)
print(drag[-1])
print("--- %s seconds ---" % (time.time() - start_time))
```

Could anyone point me in the right direction as to where I’m going wrong with mpi, or to some examples that show considerable speed increase when run with it? I’ve tried numerous dolfin demos but very few show a difference when compared to a serial run.

Thanks!

(Apologies about the previous deleted posts, I messed up the markdown :P)