Dear members,
I have been struggling to find out how to solve my parallelization problem of the eigenvalue solver for Helmholtz Equation. I am getting correct eigenvalues for 1 and 2 cores, but when I try to use 4 or more cores, my iteration converges to completely different eigenvalue, or not converging at all.
After many trial and errors, I realized that the problem is somewhere in the matrix that I have implemented manually.
I couldn’t produce MWE since my solver contains so many scripts working together. Instead I would like to discuss what kind of approach should I take while I am generating my own matrices by writing specific values to the specific rows and columns.
The problem itself includes subdomains. And matrix is highly sparse. The formula of the matrix is;
$$a_{k} = \int_{\Omega}\phi_{k}dV$$
$$b_{j} = \nabla\phi_{j}(x_r).n_r$$
$$ D_{kj} = a x b $$ # outer product
What I do is
- I calculate nonzero dofs and and their indices of vector a_k,
- I calculate nonzero dofs and their indices if vector b_j
- I apply cross product and get the nonzero dof values, and their rows and columns.
- Use these indices and dofs in order to generate highly sparse PETSc matrix of D_{kj}
Since the matrix is real representation of complex matrix, it has 4 blocks as usual.
Here is the code for the matrix that I generate is;
import dolfin as dolf
import numpy as np
from petsc4py import PETSc
class ActiveFlame:
gamma = 1.4
def __init__(self, mesh, subdomains, x_r, degree=1, comm=None):
"""
Parameters
----------
mesh : dolfin.cpp.mesh.Mesh
Mesh of the entire domain.
subdomains : dolfin.cpp.mesh.MeshFunctionSizet
Relevant subdomain for flame region.
x_r : np.array
flame location vector
degree : int, optional
Degree of basis functions. The default is 1.
"""
self.comm = comm
self.mesh = mesh
self.subdomains = subdomains
self.x_r = x_r
self.degree = degree
# __________________________________________________
self._a = {}
self._b = {}
self._D_kj = None
self._D_kj_adj = None
# __________________________________________________
CG = dolf.FiniteElement('CG', mesh.ufl_cell(), degree)
W = dolf.FunctionSpace(mesh, dolf.MixedElement([CG, CG]), constrained_domain=constrained_domain)
self.function_space = W
for fl, x in enumerate(self.x_r):
self._a[str(fl)] = self._assemble_a(fl)
self._b[str(fl)] = self._assemble_b(x)
@property
def submatrices(self):
return self._D_kj
@property
def adjoint_submatrices(self):
return self._D_kj_adj
@staticmethod
def _helper_func(v, dofmap):
"""
Returns the nonzero indices and values of matrix a
in order to perform efficient matrix multiplication
Parameters
----------
v : <class 'tuple'>
includes mixed elements of a_1 and a_2
dofmap : dolfin.cpp.fem.DofMap
mapping that maps the dofs
Returns
-------
v : np.array
2D array which includes index and dofs of nonzero cells
"""
indices = np.flatnonzero(v) # Returns the indices which are nonzero
values = v[indices]
my_list = []
for index, value in zip(indices, values):
my_list.append([dofmap.dofs()[index], value])
v = np.array(my_list)
return v
def _assemble_a(self, fl):
"""
Assembles \int \phi_k dV
Parameters
----------
fl : int
flame tag
Returns
-------
v : <class 'tuple'>
includes assembled elements of a_1 and a_2
"""
(v_1, v_2) = dolf.TestFunction(self.function_space)
dx = dolf.Measure('dx', subdomain_data=self.subdomains)
V = dolf.FunctionSpace(self.mesh, 'CG', 1)
const = dolf.interpolate(dolf.Constant(1), V)
V_fl = dolf.assemble(const * dx(fl))
a_1 = dolf.assemble(v_1 / V_fl * dx(fl))
a_2 = dolf.assemble(v_2 / V_fl * dx(fl))
dofmap = self.function_space.dofmap()
a_1 = self._helper_func(a_1, dofmap)
a_2 = self._helper_func(a_2, dofmap)
#print(a_1)
a = (a_1, a_2)
return a
def _assemble_b(self, x):
"""
Calculates degree of freedoms and indices of
right vector of
\nabla(\phi_j(x_r)) . n
which includes gradient value of test fuunction at
reference point x_r
Parameters
----------
x : np.array
flame location vector
Returns
-------
np.array
Array of degree of freedoms and indices as vector b.
"""
n = np.array([[0, 0, 1]]) # row
dimension = self.mesh.topology().dim()
if dimension == 1:
n = np.array([[1]])
elif dimension == 2:
n = np.array([[1, 0]])
# else: # elif dimension == 3:
# pass
n = v.transpose() # column
b = [np.array([]), np.array([])]
cell_index = self.mesh.bounding_box_tree().compute_first_entity_collision(dolf.Point(*x))
if cell_index <= self.mesh.num_entities(dimension):
print("Cell is in Mesh")
cell = dolf.Cell(self.mesh, cell_index)
b = []
for j in range(2):
dofmap = self.function_space.sub(j).dofmap()
cell_dofs = dofmap.cell_dofs(cell_index)
element = self.function_space.sub(j).element()
d_dx = element.evaluate_basis_derivatives_all(1, x, cell.get_vertex_coordinates(), cell.orientation())
d_dx = d_dx.reshape((len(cell_dofs), -1))
d_dv = np.dot(d_dx, n)
d_dv = d_dv[:, 0]
my_list = []
for i, dof in enumerate(cell_dofs):
my_list.append([dofmap.tabulate_local_to_global_dofs()[dof], d_dv[i]])
my_vec = np.array(my_list)
b.append(my_vec)
#
else:
# raise ValueError("The cell for the subdomain is not in the mesh, please check the mesh and subdomain.")
print("The cell for the subdomain is not in the mesh")
return (*b, )
@staticmethod
def _csr_matrix(a, b):
# len(a) and len(b) are not the same
nnz = len(a) * len(b)
row = np.zeros(nnz)
col = np.zeros(nnz)
val = np.zeros(nnz)
for i, c in enumerate(a):
for j, d in enumerate(b):
row[i * len(b) + j] = c[0]
col[i * len(b) + j] = d[0]
val[i * len(b) + j] = c[1] * d[1]
row = row.astype(dtype='int32')
col = col.astype(dtype='int32')
return row, col, val
def assemble_submatrices(self, problem_type='direct'):
"""
This function handles efficient cross product of the
vectors a and b calculated above and generates highly sparse
matrix D_kj which represents active flame matrix without FTF and
other constant multiplications.
Parameters
----------
problem_type : str, optional
Specified problem type. The default is 'direct'.
Matrix can be obtained by selecting problem type, other
option is adjoint.
"""
num_fl = len(self.x_r) # number of flames
global_size = self.function_space.dim()
istart, iend = self.function_space.dofmap().ownership_range()
local_size = iend - istart # local size
print(local_size)
D_kj = dict()
for k in range(2):
for j in range(2):
row = dict()
col = dict()
val = dict()
for fl in range(num_fl):
u = None
v = None
if problem_type == 'direct':
u = self._a[str(fl)][k] # column vector
v = self._b[str(fl)][j] # row vector
elif problem_type == 'adjoint':
u = self._b[str(fl)][k]
v = self._a[str(fl)][j]
row[str(fl)], col[str(fl)], val[str(fl)] = self._csr_matrix(u, v)
row = np.concatenate([row[str(fl)] for fl in range(num_fl)])
col = np.concatenate([col[str(fl)] for fl in range(num_fl)])
val = np.concatenate([val[str(fl)] for fl in range(num_fl)])
i = np.argsort(row)
row = row[i]
col = col[i]
val = val[i]
if len(val)==0:
mat = PETSc.Mat().create(comm=self.comm) #PETSc.COMM_WORLD
mat.setSizes([(local_size, global_size), (local_size, global_size)])
mat.setFromOptions()
mat.setUp()
mat.assemble()
else:
mat = PETSc.Mat().create(PETSc.COMM_WORLD) # MPI.COMM_SELF
mat.setSizes([(local_size, global_size), (local_size, global_size)])
mat.setType('aij')
mat.setUp()
for i in range(len(row)):
mat.setValue(row[i],col[i],val[i])
mat.assemblyBegin()
mat.assemblyEnd()
D_kj['{0}{1}'.format(k + 1, j + 1)] = mat
if problem_type == 'direct':
self._D_kj = (D_kj['11'], D_kj['12'], D_kj['21'], D_kj['22'])
elif problem_type == 'adjoint':
self._D_kj_adj = (D_kj['11'], D_kj['12'], D_kj['21'], D_kj['22'])
I am sorry that class is so long, but I couldn’t shorten it more.
I’m getting same eigenvalues for 1 and 2 cores, but -np=>3
causes wrong eigenvalues. I am pretty sure that problem is in this matrix.
My question is what kind of considerations should I have before manually implementing this PETSc matrix for parallel simulations in my case(or generating that kind of matrices)? I mean, what should I follow in order to obtain working parallel matrix regardless of the number of cores that I use?