Regarding RuntimeError: *** Error: Duplication of MPI communicator failed (MPI_Comm_dup

Hello everyone,
In Fenics-Dolfin 2019.1.0 version, I wish to plot normed rate of change of a quantity (velocity) versus time. To do so HDF5 files are read which have been saved at particular time intervals and from which the data for velocity and timestamp is extracted.

When I try to obtain the normed rate of change of velocity for different domain sizes using a list domain which contains different domain sizes, I get the following error:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-1-260ec68159e8> in <module>
    109 else:
    110     tInterval = 'var'
--> 111 T,UDot = getNumSolution(Re,domain,cs,case,typeOfNorm,tInterval)

<ipython-input-1-260ec68159e8> in getNumSolution(Re, domain, cs, case, typeOfNorm, ti)
     74         t = []
     75         print(i)
---> 76         trimmedFileList,velocity,time = getSortedData(trimmedFileList,filePath)
     77         uDot,t = NormOfUDot(velocity,time,typeOfNorm)
     78         UDot.append(uDot)

<ipython-input-1-260ec68159e8> in getSortedData(tfl, fp)
     37     time = []
     38     for i in range(len(tfl)):
---> 39         u,t = readHDF5Files(fp+'/'+tfl[i])
     40         velocity.append(u)
     41         time.append(t)

<ipython-input-1-260ec68159e8> in readHDF5Files(filePath)
     21     f.read(var,"var")
     22     time = f.attributes("var/vector_0")['timestamp']
---> 23     u,p,c,q,phi = var.split(True)
     24     return u,time
     25 

/usr/local/lib/python3.6/dist-packages/dolfin/function/function.py in split(self, deepcopy)
    538         if num_sub_spaces == 1:
    539             raise RuntimeError("No subfunctions to extract")
--> 540         return tuple(self.sub(i, deepcopy) for i in range(num_sub_spaces))

/usr/local/lib/python3.6/dist-packages/dolfin/function/function.py in <genexpr>(.0)
    538         if num_sub_spaces == 1:
    539             raise RuntimeError("No subfunctions to extract")
--> 540         return tuple(self.sub(i, deepcopy) for i in range(num_sub_spaces))

/usr/local/lib/python3.6/dist-packages/dolfin/function/function.py in sub(self, i, deepcopy)
    517         if deepcopy:
    518             return Function(self.function_space().sub(i),
--> 519                             self.cpp_object().sub(i),
    520                             name='%s-%d' % (str(self), i))
    521         else:

RuntimeError: *** Error: Duplication of MPI communicator failed (MPI_Comm_dup

However this works if the list domain has only one entry. How can I obtain data for all domain sizes at once?

Following is my code :

from dolfin import *
from mshr import *
import matplotlib.pyplot as plt
import numpy as np
import math
import os
def get_space(mesh):
    Q = FiniteElement("Lagrange", mesh.ufl_cell(), 1)
    V = VectorElement("Lagrange", mesh.ufl_cell(), 2)
    W = FunctionSpace(mesh,MixedElement([V,Q,Q,Q,Q]))
    
    return W

def readHDF5Files(filePath):
    mesh = Mesh()
    f=HDF5File(mesh.mpi_comm(),filePath, 'r')
    f.read(mesh,"mesh",False)   
    var = Function(get_space(mesh))
    f.read(var,"var")
    time = f.attributes("var/vector_0")['timestamp']
    u,p,c,q,phi = var.split(True)
    return u,time

def sortData(tfl,v,t):
    sv = v
    stfl = [x for _,x in sorted(zip(t,tfl))]
    sv.sort(key=dict(zip(sv,t)).get)
    st = sorted(t)
        
    return stfl,sv,st

def getSortedData(tfl,fp):
    velocity = []
    time = []
    for i in range(len(tfl)):
        u,t = readHDF5Files(fp+'/'+tfl[i])
        velocity.append(u)
        time.append(t)
    tfl,velocity,time = sortData(tfl,velocity,time)
    return tfl,velocity,time

def NormOfUDot(v,time,typeOfNorm):
    normUDotArray = []
    timeInstanceArray = []
    for i in range(len(v)-1):
        u = v[i+1]
        old_u = v[i]
        t = time[i+1]
        old_t = time[i]
        normUDot = norm((u.vector()-old_u.vector())/(t-old_t),typeOfNorm)
        normUDotArray.append(normUDot)
        timeInstanceArray.append(old_t)
    return normUDotArray,timeInstanceArray

def getNumSolution(Re,domain,cs,case,typeOfNorm,ti):
    UDot = []
    T = []
    trimmedFileList=[]
    for i in domain:
        filePath = 'LU_simulations/ref_mesh/'+str(i)+'/Re_'+Re+'_dt_'+ti+'_'+case+'Stat_'+str(cs)
        allFiles = os.listdir(filePath+'/')
        for jj in range(len(allFiles)):
            if allFiles[jj].find('.h5')!=-1:
                trimmedFileList.append(allFiles[jj])

        uDot = []
        t = []
        print(i)
        trimmedFileList,velocity,time = getSortedData(trimmedFileList,filePath)
        uDot,t = NormOfUDot(velocity,time,typeOfNorm)
        UDot.append(uDot)
        T.append(t)

    return T, UDot

####### Controls ########
Re = '1e-3'
case = 'MT' # For var time step var = mt or st
typeOfNorm = 'L2'
domain = [500,1000,2000,4000,8000]
# domain = [8000]
cs     = 5 #fixed
if case == 'MT':
    tInterval = '1'
    plotTitle = 'Modertately thick limit'
elif case == 'ST':
    tInterval = '10'
    plotTitle = 'Super thick limit'
else:
    tInterval = 'var'
T,UDot = getNumSolution(Re,domain,cs,case,typeOfNorm,tInterval)

Does this happen because the object var in the function def readHDF5Files(filePath): gets overloaded or crosses a memory threshold? I would highly be grateful for any leads.

Please try to make a code which is reproducible by others. If I try to run the code you provided I get

Traceback (most recent call last):
  File "/tmp/tmp.py", line 91, in <module>
    T,UDot = getNumSolution(Re,domain,cs,case,typeOfNorm,tInterval)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/tmp/tmp.py", line 61, in getNumSolution
    allFiles = os.listdir(filePath+'/')
               ^^^^^^^^^^^^^^^^^^^^^^^^
FileNotFoundError: [Errno 2] No such file or directory: 'LU_simulations/ref_mesh/500/Re_1e-3_dt_1_MTStat_5/'

which is surely due to the fact that I don’t have your files.

@francesco-ballarin , I am thankful to you for your prompt response! I just realized that I had made a minute error by not emptying the list trimmedFileList which needs to be done every iteration. After doing so the code worked just fine. Sorry for the inconvenience :pray: