Computing the Darcy flux using FEniCS

def solve_darcy(k_sample, f,mesh, V):
    u_D = Function(V)
    u_D.interpolate(lambda x:-1/(4*(np.pi**2))*np.sin(2*np.pi*x[0])-1/(4 *(np.pi**2))*np.cos(2*np.pi*x[1])) 
    left_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[0], 0))
    right_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[0], 1))
    bottom_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[1], 0))
    top_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[1], 1))
    bcs = [dirichletbc(u_D, left_boundary),
       dirichletbc(u_D, right_boundary),
       dirichletbc(u_D, bottom_boundary),
      dirichletbc(u_D, top_boundary)]    
    dx_mesh = dx(domain=mesh)
    u = TrialFunction(V)
    v = TestFunction(V)
    a = -dot(k_sample * grad(u), grad(v)) * dx_mesh
    L = f * v * dx_mesh
    u_sol= Function(V)
    problem = LinearProblem(a, L, bcs=bcs, petsc_options={"ksp_type": "cg", "pc_type": "hypre"})
    u_sol = problem.solve()
    return u_sol

def compute_flux(u_sol, k_sample, mesh, V):
    q = -k_sample * ufl.grad(u_sol)
    boundary_tag = dolfinx.mesh.locate_entities_boundary(mesh, mesh.topology.dim - 1, 
                                                          lambda x: np.isclose(x[0], 0.5))
    ds_boundary = ufl.Measure('ds', domain=mesh, subdomain_data=boundary_tag)
    n = ufl.FacetNormal(mesh)
    flux_expr = ufl.dot(q, n) * ds_boundary
    flux = mesh.comm.allreduce(fem.assemble_scalar(fem.form(flux_expr)), op=MPI.SUM)
    return flux

I wrote the code above to compute the Darcy flux through the boundary at x=0.5 based upon a given permeability. However, I noticed that the computed flux depends linearly on the mesh resolution, which is not what I expected. I anticipated that using a finer mesh would help the computed flux converge to the “correct solution,” rather than it scaling linearly with the mesh size. I’m wondering if there might be an issue with my implementation, or if this behavior is expected. Thank you in advance for your help!

Please provide a complete, reproducible example.

import numpy as np
import matplotlib.pyplot as plt
from mpi4py import MPI
from dolfinx import mesh
from dolfinx import default_real_type, fem, io, mesh


np.random.seed(45)

mesh_sizes = [8, 16, 32, 64, 128, 256, 512]  
truncation_level = 100  

sol = np.random.normal(0, 1, 100)  
lambda_value = 0.5
w_n = solve_transcendental_equation(lambda_value, 100)

flux_results = {}

for size in mesh_sizes:
    # Create mesh and function space
    current_mesh = mesh.create_unit_square(MPI.COMM_WORLD, size, size)
    current_V = functionspace(current_mesh, ("Lagrange", 1))

    k_sample = generate_permeability_sample(sol, w_n[:truncation_level], truncation_level, current_mesh, current_V)

    p_cor = solve_darcy(k_sample, f, current_mesh, current_V)
    print(domain_average(current_mesh, p_cor))

    flux = compute_flux(p_cor, k_sample, current_mesh, current_V)
    
    flux_results[size] = flux  

# Plot the computed flux for different mesh sizes
plt.figure(figsize=(8, 5))

mesh_sizes_squared = [size**2 for size in mesh_sizes]
print(flux_results)
plt.plot(mesh_sizes_squared, list(flux_results.values()), marker='o', linestyle='-', color='b', label=f"Truncation Level R={truncation_level}")

plt.xlabel("Mesh Resolution (NxN)", fontsize=15)
plt.ylabel("Computed Flux (q)", fontsize=15)
plt.title("Flux for Different Mesh Resolutions (R=100)", fontsize=14)
plt.legend()
plt.grid(True)
plt.show()

This is the code I used to plot my flux in function of the different mesh sizes. Additionally, I compute the permeability using KL expansions as follows:

import numpy as np
import scipy
from dolfinx import fem
lam =0.5
dom = np.linspace(0, 1, 1000)

wn = solve_transcendental_equation(0.5, 100)
wn = np.array(wn)
thetan_1D = 2 * lam / (lam ** 2 * wn ** 2 + 1)
bn = np.zeros((len(dom), len(wn)))
An = np.zeros(len(wn))
for j in range(len(wn)):
    for i in range(len(dom)):
        bn[i, j] = np.sin(wn[j] * dom[i]) + lam * wn[j] * np.cos(wn[j] * dom[i])
    An[j] = 1/np.sqrt(np.trapz(bn[:, j] ** 2, dom))


def generate_permeability_sample(norm_sample, wn, N_modes,mesh,V, sigma=1.0, lam=0.5 ):
    wn = np.array(wn)
    points = mesh.geometry.x
    num_points = points.shape[0]
    thetan_2D = np.zeros(N_modes) 
    i = 0
    rowcolsum = 0
    rownum = 0
    colnum = rowcolsum
    k_array = np.zeros(num_points)
    while i < N_modes:
        if rownum > rowcolsum:
            rowcolsum += 1
            rownum = 0
            colnum = rowcolsum
        thetan_2D[i] = thetan_1D[rownum] * thetan_1D[colnum]
        scale_factor = norm_sample[i] * np.sqrt(thetan_2D[i])
        b1 = An[rownum] * (np.sin(wn[rownum] * points[:, 0]) + lam * wn[rownum] * np.cos(wn[rownum] * points[:, 0]))
        b2 = An[colnum] * (np.sin(wn[colnum] * points[:, 1]) + lam * wn[colnum] * np.cos(wn[colnum] * points[:, 1]))
        k_array += scale_factor * b1 * b2
        rownum += 1
        colnum -= 1
        i += 1
    k_array = np.exp(k_array)
    k_sample = fem.Function(V)
    k_sample.x.array[:] = k_array
    return k_sample

Its still not reproducable, as the functions solve_transcendental_equation and domain_average are not defined and the imports are not all there.

Glancing at your code, it looks like the Darcy equation and flux computation are correct. So I am guessing something is off with the permeability definition. I would quickly test setting the permeability to a constant to vefify this. I’d do this myself, but I can’t run your code as it’s not MWE :wink:


import numpy as np
import dolfinx
from mpi4py import MPI
from petsc4py import PETSc
from ufl import TrialFunction, TestFunction, dot, grad, dx, inner
from dolfinx.fem import Function, functionspace, dirichletbc, locate_dofs_geometrical
from dolfinx.mesh import create_unit_square
from scipy.spatial.distance import cdist
from scipy.linalg import eigh
from dolfinx.fem.petsc import LinearProblem
from dolfinx import fem
from dolfinx import default_scalar_type
from dolfinx.fem import Function
from petsc4py import PETSc
from scipy.interpolate import RegularGridInterpolator

import numpy as np
from scipy.optimize import fsolve
import random

def solve_darcy(k_sample, f,mesh, V):
    u_D = Function(V)
    u_D.interpolate(lambda x:-1/(4*(np.pi**2))*np.sin(2*np.pi*x[0])-1/(4 *(np.pi**2))*np.cos(2*np.pi*x[1])) 
    left_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[0], 0))
    right_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[0], 1))
    bottom_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[1], 0))
    top_boundary = locate_dofs_geometrical(V, lambda x: np.isclose(x[1], 1))
    bcs = [dirichletbc(u_D, left_boundary),
       dirichletbc(u_D, right_boundary),
       dirichletbc(u_D, bottom_boundary),
      dirichletbc(u_D, top_boundary)]    
    dx_mesh = dx(domain=mesh)
    u = TrialFunction(V)
    v = TestFunction(V)
    a = -dot(k_sample * grad(u), grad(v)) * dx_mesh
    L = f * v * dx_mesh
    u_sol= Function(V)
    problem = LinearProblem(a, L, bcs=bcs, petsc_options={"ksp_type": "cg", "pc_type": "hypre"})
    u_sol = problem.solve()
    return u_sol


from dolfinx.fem import assemble_scalar
import dolfinx
from mpi4py import MPI
from petsc4py import PETSc
from mpi4py import MPI


def compute_flux(u_sol, k_sample, mesh, V):
    q = -k_sample * ufl.grad(u_sol)
    
    facets = dolfinx.mesh.locate_entities_boundary(
        mesh, mesh.topology.dim - 1, lambda x: np.isclose(x[0], 0.0)
    )
    
    if len(facets) == 0:
        print("Warning: No facets found on the left boundary!")
    
    facet_tags = dolfinx.mesh.meshtags(
        mesh, mesh.topology.dim - 1, facets, np.full_like(facets, 1, dtype=np.int32)
    )
    
    n = ufl.FacetNormal(mesh)
    
    ds_left = ufl.Measure("ds", domain=mesh, subdomain_data=facet_tags, subdomain_id=1)
    
    flux_expr = ufl.dot(q, n) * ds_left
    
    flux = mesh.comm.allreduce(fem.assemble_scalar(fem.form(flux_expr)), op=MPI.SUM)
    
    return flux


def solve_transcendental_equation(lambda_value, num_modes):
    def equation(w):
        return np.tan(w) - (2 * lambda_value * w) / (lambda_value**2 * w**2 - 1)
    w_n = [1.72066]
    for n in range(1, num_modes ):
        initial_guess = n * np.pi
        w_n.append(fsolve(equation, initial_guess)[0])
    return w_n


  

import numpy as np
import scipy
from dolfinx import fem
lam =0.5
dom = np.linspace(0, 1, 1000)

wn = solve_transcendental_equation(0.5, 100)
wn = np.array(wn)
thetan_1D = 2 * lam / (lam ** 2 * wn ** 2 + 1)
bn = np.zeros((len(dom), len(wn)))
An = np.zeros(len(wn))
for j in range(len(wn)):
    for i in range(len(dom)):
        bn[i, j] = np.sin(wn[j] * dom[i]) + lam * wn[j] * np.cos(wn[j] * dom[i])
    An[j] = 1/np.sqrt(np.trapz(bn[:, j] ** 2, dom))


def generate_permeability_sample(norm_sample, wn, N_modes,mesh,V, sigma=1.0, lam=0.5 ):
    wn = np.array(wn)
    points = mesh.geometry.x
    num_points = points.shape[0]
    thetan_2D = np.zeros(N_modes) 
    i = 0
    rowcolsum = 0
    rownum = 0
    colnum = rowcolsum
    k_array = np.zeros(num_points)
    while i < N_modes:
        if rownum > rowcolsum:
            rowcolsum += 1
            rownum = 0
            colnum = rowcolsum
        thetan_2D[i] = thetan_1D[rownum] * thetan_1D[colnum]
        scale_factor = norm_sample[i] * np.sqrt(thetan_2D[i])
        b1 = An[rownum] * (np.sin(wn[rownum] * points[:, 0]) + lam * wn[rownum] * np.cos(wn[rownum] * points[:, 0]))
        b2 = An[colnum] * (np.sin(wn[colnum] * points[:, 1]) + lam * wn[colnum] * np.cos(wn[colnum] * points[:, 1]))
        k_array += scale_factor * b1 * b2
        rownum += 1
        colnum -= 1
        i += 1
    k_array = np.exp(k_array)
    k_sample = fem.Function(V)
    k_sample.x.array[:] = k_array
    return k_sample
from dolfinx import geometry
import numpy as np
import ufl

lambda_value =0.5
sigma = 10**(-2)
N_modes = 20
np.random.seed(45)


mesh = create_unit_square(MPI.COMM_WORLD, 128, 128)
f =fem.Constant(mesh, default_scalar_type(-1))

V = functionspace(mesh, ("Lagrange", 1))  
sol = np.random.normal(0, 1, N_modes)
lambda_value = 0.5
w_n = solve_transcendental_equation(lambda_value, N_modes)
k = generate_permeability_sample(sol, w_n, N_modes, mesh,V)

p_cor = solve_darcy(k, f,mesh,V)
sol_QOI = compute_flux(p_cor, k, mesh,V)

Sorry, I tested it and this is a reproducible example. I don’t think the issue is in the permeability but I think I fixed it by using a different way of defining my boundaries through which I compute the flux (see the darcy flux function). However, I am now wondering if there is some way to optimize the code since computing the solution to the darcy problem and computing the flux is a very expensive operation. Thank you so much!

it shouldn’t be an expensive operation.
One thing you should consider is:
https://jsdokken.com/FEniCS-workshop/src/form_compilation.html
Secondly, I would like to see some number regarding how expensive each step is.
Please use for instance

import time
start = time.perf_counter()
# Add some operations here
end = time.perf_counter()
print(f"Operation {end-start}")

to profile various bits of your code.

When profiling my example code I get the following:
Computing the flux costs : Operation 0.003020667005330324
Solving the Darcy problem: 0.065877834001230076

Since I frequently use these function calls in my code, they significantly slow down execution.

Then I suggest looking at the aforementioned link, which tells you what structures can be pulled outside of loops, and how to update variable within loops.

Does `k_sample change between your calls?

Or is it the mesh that change?

Please note that solving a linear system of equations in less that 0.05 seconds is not very long, and if you want cache the LU factorization (ie the matrix in Darcy does not change per iteration), it will be hard to make it much faster.

Yes, the k_sample changes between calls as well as the mesh which is why I am unsure how to apply the link you sent.
Thank you so much for taking the time to help me!

If both k_sample and mesh changes, you can’t cache much. The only thing you could cache is the compiled form (ref: dolfinx/python/test/unit/fem/test_assemble_mesh_independent_form.py at ead49d0fdd57d2273162ede3484505fa77981001 · FEniCS/dolfinx · GitHub)
But it makes the code much more verbose, and isn’t likely to give much speedup.

I think what you can see most effect of is experimenting with

What is the effect of using {"ksp_type": "preonly", "pc_type":"lu", "pc_factor_mat_solver_type": "mumps"} instead of an iterative solve?
As your problem is fairly small (512x512x2 cells at the largest)

so it is likely that a direct solver is faster than an iterative one.

Using “{“ksp_type”: “preonly”, “pc_type”:“lu”, “pc_factor_mat_solver_type”: “mumps”}” slows down the Darcy solver to Operation 0.125788500001363…

Related to making my code more efficient. I am currently evaluating the solution of the Darcy problem using the following code I found online:

from dolfinx import geometry
import numpy as np
import ufl

lambda_value =0.5
sigma = 10**(-2)
N_modes = 20
np.random.seed(45)


mesh = create_unit_square(MPI.COMM_WORLD, 128, 128)
f =fem.Constant(mesh, default_scalar_type(-1))

V = functionspace(mesh, ("Lagrange", 1))  
sol = np.random.normal(0, 1, N_modes)
lambda_value = 0.5
w_n = solve_transcendental_equation(lambda_value, N_modes)
k = generate_permeability_sample(sol, w_n, N_modes, mesh,V)

p_cor = solve_darcy(k, f,mesh,V)


x_coords = np.linspace(0, 1, 256)
y_coords = np.linspace(0, 1, 256)
points = np.array([[x, y, 0.0] for x in x_coords for y in y_coords]).T  
points_flattened = points.T  # shape: (N, 3)

num_points_to_select = 16
random_indices = np.random.choice(len(points_flattened), num_points_to_select, replace=False)

selected_points_randomly = points_flattened[random_indices]
points = np.array(selected_points_randomly).T
bb_tree = geometry.bb_tree(mesh, mesh.topology.dim)

p_values2 = []  
cells = []
points_on_proc = []

cell_candidates = geometry.compute_collisions_points(bb_tree, points.T)
colliding_cells = geometry.compute_colliding_cells(mesh, cell_candidates, points.T)

for i, point in enumerate(points.T):
    if len(colliding_cells.links(i)) > 0:
        points_on_proc.append(point)
        cell = colliding_cells.links(i)[0]
        cells.append(cell)  
        p_value_array = p_cor.eval(point, cell)  
        p_value = float(p_value_array) if p_value_array.ndim == 0 else float(p_value_array[0])  
        p_values2.append([point[0], point[1], p_value])  
    else:
        print(f"Point {point} is not in any cell")

However, iterating over all these points is also an expensive computation and I was wondering if maybe there was a more efficient manner. Thank you!

You can switch cg to gmres as it seems slightly faster for a Poisson problem:

from mpi4py import MPI
from petsc4py import PETSc
import dolfinx.fem.petsc
import ufl
import time


options = {
    "ksp_type": "gmres",
    "pc_type": "hypre",
    "ksp_error_if_not_converged": True,
}
comm = MPI.COMM_WORLD


def solve_poisson(mesh, petsc_options):
    V = dolfinx.fem.functionspace(mesh, ("Lagrange", 1))
    u, v = ufl.TrialFunction(V), ufl.TestFunction(V)
    a = ufl.dot(ufl.grad(u), ufl.grad(v)) * ufl.dx
    L = v * ufl.dx

    mesh.topology.create_connectivity(mesh.topology.dim - 1, mesh.topology.dim)
    boundary_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)
    dofs = dolfinx.fem.locate_dofs_topological(
        V, mesh.topology.dim - 1, boundary_facets
    )
    bcs = [dolfinx.fem.dirichletbc(0.0, dofs, V)]

    problem = dolfinx.fem.petsc.LinearProblem(
        a, L, bcs=bcs, petsc_options=petsc_options
    )
    # Ensure that options are reset
    opts = PETSc.Options()
    set_options = opts.getAll()
    for key in set_options.keys():
        del opts[key]

    start = time.perf_counter()
    uh = problem.solve()
    end = time.perf_counter()
    return end - start


for n in [80, 320, 600]:
    mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, n, n)
    solve_time = solve_poisson(mesh, options)
    print(f"{n=}, {solve_time=}")

GMRES

n=80, solve_time=0.013839929999903688
n=320, solve_time=0.2216061730000547
n=600, solve_time=0.8179052340001363

“cg”

n=80, solve_time=0.01591311299989684
n=320, solve_time=0.23022311700015052
n=600, solve_time=0.8430353889998514

However, we are talking setup and solving in less than a second for a 600x600x2 number of cells. What kind of problem do you have where you have to change the mesh this frequently?

I am doing a Multilevel Monte Carlo simulation so the Darcy problem has to be solved for each sample which is why it is important that it is efficient. Additionally, the mesh changes between the levels.

Then the only remaining thing is to consider mpi-based parallelism, as the same problem as above yields the following on two procs:

n=80, solve_time=0.02045749499984595
n=320, solve_time=0.25582983699996475
n=600, solve_time=0.5599033309999868

which is a speedup if the grid is large.
Do you know the levels on before-hand?
Then you can cache all the structures up and including the linear problem for each level, and simply call solve() after updating the values within your permability. That will save quite a lot of overhead.

Yes, I do know the levels beforehand! How would I go about caching these structures? I have been taking a look at the website you sent but I am unsure how I should pass the k_sample along. Should I also pass all the cached structures into the function or what would you recommend? Thank you!

Additionally, by dividing by k_sample you can make the matrix material independent, which means that you can LU-factorize it once, yielding bigger speedups.
An example is:

from mpi4py import MPI
from petsc4py import PETSc
from time import perf_counter
import dolfinx.fem.petsc
import ufl

prefactorize = True

# Create mesh
M = 500
mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, M, M)
V = dolfinx.fem.functionspace(mesh, ("Lagrange", 1))
u, v = ufl.TrialFunction(V), ufl.TestFunction(V)

# Create symbolic linear and bi-linear form
a = ufl.inner(u, v) * ufl.dx
x = ufl.SpatialCoordinate(mesh)
c = dolfinx.fem.Constant(mesh, 1.0)
f = c * x[0] + ufl.sin(x[1])
L = ufl.inner(f, v) * ufl.dx

# Compile forms
a_kernel = dolfinx.fem.form(a)
L_kernel = dolfinx.fem.form(L)

# Assemble matrix `A` and create a vector for the solution `uh` and rhs `b`
A = dolfinx.fem.petsc.assemble_matrix(a_kernel)
A.assemble()
uh = dolfinx.fem.Function(V)
b = dolfinx.fem.Function(V)

# Create a direct solver that uses MUMPS for LU decomposition
ksp = PETSc.KSP().create(mesh.comm)
ksp.setType("preonly")
pc = ksp.getPC()
pc.setType("lu")
pc.setFactorSolverType("mumps")
pc.setOperators(A)

# Factorize matrix `A`
if prefactorize:
    start_time = perf_counter()
    pc.setUp()
    end_time = perf_counter()
    print(f"Matrix factorized in {end_time - start_time} seconds")

# Temporal loop where `c` is incremented, and `b` re-assembled
for increment in range(10):
    c.value += increment
    # Reset vector `b`
    b.x.array[:] = 0
    dolfinx.fem.petsc.assemble_vector(b.x.petsc_vec, L_kernel)
    b.x.scatter_reverse(dolfinx.la.InsertMode.add)
    b.x.scatter_forward()

    # Solve linear system
    solve_start = perf_counter()
    ksp.solve(b.x.petsc_vec, uh.x.petsc_vec)
    uh.x.scatter_forward()
    solve_end = perf_counter()
    print(f"{increment} Linear system solved in {solve_end - solve_start} seconds")

which yields:
prefactorize=True returns

Matrix factorized in 2.1065948240000125 seconds
0 Linear system solved in 0.05877562199998465 seconds
1 Linear system solved in 0.05775631900019107 seconds
2 Linear system solved in 0.058324290999962614 seconds
3 Linear system solved in 0.05790613900012431 seconds
4 Linear system solved in 0.056981832000019494 seconds
5 Linear system solved in 0.057047895000096105 seconds
6 Linear system solved in 0.05660051399991062 seconds
7 Linear system solved in 0.05861389500000769 seconds
8 Linear system solved in 0.058447528999977294 seconds
9 Linear system solved in 0.05824351800015393 seconds

while if you use prefactorize=False returns

0 Linear system solved in 2.162149869999894 seconds
1 Linear system solved in 0.05714299500004927 seconds
2 Linear system solved in 0.0563828200001808 seconds
3 Linear system solved in 0.05830249699988599 seconds
4 Linear system solved in 0.05745338400015498 seconds
5 Linear system solved in 0.05701260900013949 seconds
6 Linear system solved in 0.05810925800005862 seconds
7 Linear system solved in 0.059993740999971124 seconds
8 Linear system solved in 0.05935429200008002 seconds
9 Linear system solved in 0.059796621000032246 seconds

But would making the matrix A material independent not change the interpretation of my problem? Since I am solving the forward problem in function of the permeability I am unsure if dividing by k would give me my required result, or?

You would divide by k on both sides.
I would structure the code along the lines of:

from mpi4py import MPI
from petsc4py import PETSc
import dolfinx.fem.petsc
import ufl
import time
import numpy as np


class Problem:
    def __init__(self, N):
        mesh = dolfinx.mesh.create_unit_square(MPI.COMM_WORLD, N, N)
        comm = mesh.comm

        # THIS IS PROBLEM specifc
        V = dolfinx.fem.functionspace(mesh, ("Lagrange", 1))
        u, v = ufl.TrialFunction(V), ufl.TestFunction(V)
        a = ufl.dot(ufl.grad(u), ufl.grad(v)) * ufl.dx
        self._kappa = dolfinx.fem.Function(V, name="TIME DEPENDENT VARIABLE")
        L = 1 / self._kappa * v * ufl.dx

        mesh.topology.create_connectivity(mesh.topology.dim - 1, mesh.topology.dim)
        boundary_facets = dolfinx.mesh.exterior_facet_indices(mesh.topology)
        dofs = dolfinx.fem.locate_dofs_topological(
            V, mesh.topology.dim - 1, boundary_facets
        )
        bcs = [dolfinx.fem.dirichletbc(0.0, dofs, V)]

        # PROBLEM INDEPENDENT
        cffi_options = ["-Ofast", "-march=native"]
        jit_options = {
            "cffi_extra_compile_args": cffi_options,
            "cffi_libraries": ["m"],
        }
        self._a = dolfinx.fem.form(a, jit_options=jit_options)
        self._L = dolfinx.fem.form(L, jit_options=jit_options)
        self._solver = PETSc.KSP().create(comm)
        self._A = dolfinx.fem.petsc.assemble_matrix(self._a, bcs=bcs)
        self._A.assemble()
        self._b = dolfinx.fem.Function(V)
        self._uh = dolfinx.fem.Function(V)
        self._solver.setOperators(self._A)
        self._bcs = bcs
        options = {
            "ksp_type": "preonly",
            "pc_type": "lu",
            "pc_factor_mat_solver_type": "mumps",
            "ksp_error_if_not_converged": "True",
        }
        # Set options to solver
        opts = PETSc.Options()
        for key, val in options.items():
            opts[key] = val
        self._solver.setFromOptions()
        self._solver.getPC().setUp()
        # Remove options in case some-one else wants to use another solver
        set_options = opts.getAll()
        for key in set_options.keys():
            del opts[key]

    def set_kappa_values(self, values):
        assert np.invert(np.isclose(values, 0)).all()
        self._kappa.x.array[:] = values

    def num_values_in_kappa(self):
        return len(self._kappa.x.array)

    def solve(self):
        # Assemble rhs
        with self._b.x.petsc_vec.localForm() as b_loc:
            b_loc.set(0)
        dolfinx.fem.petsc.assemble_vector(self._b.x.petsc_vec, self._L)

        # Apply boundary conditions to the rhs
        dolfinx.fem.petsc.apply_lifting(self._b.x.petsc_vec, [self._a], bcs=[self._bcs])
        self._b.x.petsc_vec.ghostUpdate(
            addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE
        )
        for bc in self._bcs:
            bc.set(self._b.x.array)

        # Solve linear system and update ghost values in the solution
        self._solver.solve(self._b.x.petsc_vec, self._uh.x.petsc_vec)

        return self._uh


problems = [(n, Problem(n)) for n in [80, 320, 600]]


for i in range(2):
    for n, problem in problems:
        start = time.time()
        kappa = np.random.rand(problem.num_values_in_kappa())
        start = time.perf_counter()
        problem.set_kappa_values(kappa)
        problem.solve()
        end = time.perf_counter()
        print(f"Time for {n=} ({problem._A.sizes}) {end - start}")

which on my computer yields the run-times:

Time for n=80 (((6561, 6561), (6561, 6561))) 0.002592999000171403
Time for n=320 (((103041, 103041), (103041, 103041))) 0.0386238000000958
Time for n=600 (((361201, 361201), (361201, 361201))) 0.1357020890000058
Time for n=80 (((6561, 6561), (6561, 6561))) 0.0029604400001517206
Time for n=320 (((103041, 103041), (103041, 103041))) 0.038615088000369724
Time for n=600 (((361201, 361201), (361201, 361201))) 0.1289683029999651
1 Like

Thank you so much for this example. It seems like it will result in a significant speed up. However, using the code I can’t call my compute_flux function anymore since q = -k_sample * ufl.grad(u_sol) fails due to issues in broadcasting different shapes. However, if I understand your code together you return the same format as my previous Darcy solver, or am I misunderstanding something? Once again thank you for your help!