Calculate adjoint derivatives w.r.t to multiple Constants

Hi all,

I am trying to calculate gradients w.r.t some coefficient used in the weak form.

consider this MWE, where we optimize source term in heat equation (Poisson)
This works all great!!

import dolfinx
import ufl
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np

# mesh and function space
mesh = create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [128, 128], CellType.triangle)
V = fem.FunctionSpace(mesh, ("CG", 1))
u = fem.Function(V)
v = ufl.TestFunction(V)

# this works OK
k = fem.Function(V)

# let's say I have bunch of these and I want to get grad J w.r.t them
#k = fem.Constant(mesh, PETSc.ScalarType((1.0))) #<- what should this be?

dx = ufl.Measure("dx")
F = ufl.inner(ufl.grad(u), ufl.grad(v))*dx # heat equation
F += k*v*dx # add source of heat

# boundary conditions
def left(x): return np.isclose(x[0], 0)
def right(x): return np.isclose(x[0], 1)

fdim = mesh.topology.dim - 1

u1 = fem.Function(V)
boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
with u1.vector.localForm() as loc:
    loc.set(0)
bc1 = fem.dirichletbc(u1, dolfinx.fem.locate_dofs_topological(V, fdim, boundary_facets))

bcs = [bc1]

# solve the problem
problem = fem.petsc.NonlinearProblem(F, u, bcs)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)

# how big is the solution?
J = ufl.dot(u, u)*dx
J_form = fem.form(J)

# partial derivative of J w.r.t. k
dJdk_form = fem.form(ufl.derivative(J, k))

# partial derivative of R w.r.t. k
dRdk_form = fem.form(ufl.adjoint(ufl.derivative(F, k)))

lhs = ufl.adjoint(ufl.derivative(F, u))
rhs = -ufl.derivative(J, u)
problem = dolfinx.fem.petsc.LinearProblem(lhs, rhs, bcs=bcs)

def compute_dJdk_where_k_is_function(value):
    # set k
    k.x.array[:] = value

    # solve new solution vector
    solver.solve(u)

    # reassemble adjoint stuff
    dJdk = fem.petsc.assemble_vector(dJdk_form)
    dJdk.assemble()
    dRdk = fem.petsc.assemble_matrix(dRdk_form)
    dRdk.assemble()

    # solve adjoint vector
    lmbda = problem.solve()

    # calculate derivative (GRADIENT w.r.t. dofs of k)
    dJdk += dRdk*lmbda.vector
    dJdk.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)

    # calculate size
    J_value = fem.assemble_scalar(J_form)
    J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM) # J value

    s = np.sum(dJdk.array)
    s = MPI.COMM_WORLD.allreduce(s, op=MPI.SUM) # grad when k constant
    return s, J_value

# find zero
k_value = 10.0
alpha = 0.5

# Vanila gradient descent
for i in range(100):
    s, J_value = compute_dJdk_where_k_is_function(k_value)
    if MPI.COMM_WORLD.rank == 0:
        print(s, J_value, k_value)
    k_value += -alpha*s

which gives this output as expected:

2.6665788448001653 13.332994270288646 10.0
2.3110467080246226 10.014661621528067 8.666710577599918
2.002917294976341 7.522199842016697 7.511187223587607
1.73587045064272 5.650065134662384 6.509728576099436
1.504428679592717 4.243869705191134 5.641793350778076
.
.
.
2.4995200656325894e-06 1.1714863136466332e-11 9.373564309995559e-06
2.166263667531661e-06 8.799269022487337e-12 8.123804277179265e-06
1.8774397300618952e-06 6.60930771689225e-12 7.040672443413434e-06

Awesome!!
But there is a problem when I try to calculate the same except I define the source term k as fem.Constant.

When I try that I get an Error:
ufl.log.UFLException: Can only create arguments automatically for non-indexed coefficients.

which is triggered at:
dJdk_form = fem.form(ufl.derivative(J, k))

When I try to define the constant differently it fails elsewhere.

Therefore my question is:
How can I calculate such derivative w.r.t Constants, perhaps multiple and get back the gradient?
I was able to only use the workaround shown in the MWE, where I represent the k as fem.Function and then sum all the contributions. But I feel like that is not the most efficient way, or is it?

Thank you for any reply.

Just a little update on this problem…

I was looking around and found this PR.

This would make it possible to use something like fem.FunctionSpace(mesh, ("Real", 0)) for memory friendly definition of k, but the PR does not look very active at the moment.

So I have managed to get through the docs and finally made some progress…
Here is a version that kind of works for a single constant:

import dolfinx
import ufl
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np

import dolfinx.nls.petsc
import dolfinx.fem.petsc

# mesh and function space
mesh = create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], CellType.triangle)
V = fem.FunctionSpace(mesh, ("CG", 1))
u = fem.Function(V)
v = ufl.TestFunction(V)

# sensitivity wrt this
k_var = ufl.variable(fem.Constant(mesh, PETSc.ScalarType((1.0)))) #<- what should this be?
k_const = fem.Constant(mesh, PETSc.ScalarType((1.0))) #<- what should this be?

dx = ufl.Measure("dx")
F_var = ufl.inner(ufl.grad(u), ufl.grad(v))*dx # heat equation
F_var += k_var*v*dx # add source of heat

F_const = ufl.inner(ufl.grad(u), ufl.grad(v))*dx # heat equation
F_const += k_const*v*dx # add source of heat

# boundary conditions
def left(x): return np.isclose(x[0], 0)

fdim = mesh.topology.dim - 1

u1 = fem.Function(V)
boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
with u1.vector.localForm() as loc:
    loc.set(0)
bc1 = fem.dirichletbc(u1, dolfinx.fem.locate_dofs_topological(V, fdim, boundary_facets))

bcs = [bc1]

# solve the problem
problem = fem.petsc.NonlinearProblem(F_const, u, bcs)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)

# how big is the solution?
J = ufl.dot(u, u)*dx
J_form = fem.form(J)

# partial derivative of J w.r.t. k
dJdk_form = fem.form(ufl.diff(J, k_var))

# partial derivative of R w.r.t. k
dRdk_form = fem.form(ufl.diff(F_var, k_var))

lhs = ufl.adjoint(ufl.derivative(F_const, u))
rhs = -ufl.derivative(J, u)
problem = dolfinx.fem.petsc.LinearProblem(lhs, rhs, bcs=bcs)

def compute_dJdk_where_k_is_constant(value):
    # set k
    k_const.value = value

    # solve new solution vector
    solver.solve(u)

    # reassemble adjoint stuff
    dJdk = fem.assemble_scalar(dJdk_form)
    dRdk = fem.petsc.assemble_vector(dRdk_form)
    dRdk.assemble()

    # solve adjoint vector
    lmbda = problem.solve()

    # calculate derivative (GRADIENT w.r.t. k as constant)
    dJdk += dRdk*lmbda.vector
    dJdk.ghostUpdate(addv=PETSc.InsertMode.INSERT, mode=PETSc.ScatterMode.FORWARD)

    # calculate size
    J_value = fem.assemble_scalar(J_form)
    J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM) # J value

    s = np.sum(dJdk.array)
    s = MPI.COMM_WORLD.allreduce(s, op=MPI.SUM) # grad when k constant
    return s, J_value

# find zero
k_value = 10.0
alpha = 0.5
# Vanila gradient descent
for i in range(100):
    s, J_value = compute_dJdk_where_k_is_constant(k_value)
    if MPI.COMM_WORLD.rank == 0:
        print(s, J_value, k_value)
    k_value += -alpha*s

My problem is that if I run this MWE with MPI, the results are not consistent, although still good (in this case, in some other it might drift unexpectedly).

I still do not know how to implement this for multiple constants…

Do I have to define both forms like above (one with k as variable and one with k as fem.Constant)?

Thank you for any reply.

Still talking to myself, but it helps me think…

This kind of does the job while being easy to use:

import dolfinx
import ufl
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np
import time

import dolfinx.nls.petsc
import dolfinx.fem.petsc

# mesh and function space
mesh = create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], CellType.triangle)
V = fem.FunctionSpace(mesh, ("CG", 1))
u = fem.Function(V)
v = ufl.TestFunction(V)

# sensitivity will be calculated wrt this
k1_0 = np.full(5, 10.0)
k1 = fem.Constant(mesh, PETSc.ScalarType(k1_0))
k2_0 = np.full(2, 10.0)
k2 = fem.Constant(mesh, PETSc.ScalarType(k2_0))

dx = ufl.Measure("dx")
F = ufl.inner(ufl.grad(u), ufl.grad(v))*dx # heat equation
F += (sum(k1) + sum(k2))*v*dx # add source of heat

# boundary conditions
def left(x): return np.isclose(x[0], 0)

fdim = mesh.topology.dim - 1

u1 = fem.Function(V)
boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
with u1.vector.localForm() as loc:
    loc.set(0)
bc1 = fem.dirichletbc(u1, dolfinx.fem.locate_dofs_topological(V, fdim, boundary_facets))

bcs = [bc1]

# solve the problem
problem = fem.petsc.NonlinearProblem(F, u, bcs)
nls_solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)

J = ufl.inner(u, u)*dx

class AdjointDerivative:
    def __init__(self, form, controls, J, forward_solver, bcs=None):
        self.form = form
        self.controls = controls
        self.J = J
        self.J_form = fem.form(self.J)
        self.forward_solver = forward_solver
        self.vars = {}
        for control in self.controls:
            for i in range(len(control)):
                self.vars[control[i]] = ufl.variable(control[i])
        self.var_form = ufl.replace(self.form, self.vars)

        self.dJdk_form = [fem.form(ufl.diff(self.J, v)) for v in self.vars.values()]
        self.dRdk_form = [fem.form(ufl.diff(self.var_form, var)) for var in self.vars.values()]
        self.dRdk = [fem.petsc.assemble_vector(form) for form in self.dRdk_form]

        # adjoint problem
        lhs = ufl.adjoint(ufl.derivative(self.form, u))
        rhs = -ufl.derivative(J, u)
        self.adjoint_problem = dolfinx.fem.petsc.LinearProblem(lhs, rhs, bcs=bcs)

    def compute_gradient(self, control_values):
        # set control values
        start_idx = 0
        for control in self.controls:
            n = len(control)
            end_idx = start_idx + n
            control.value[:] = control_values[start_idx:end_idx]
            start_idx = end_idx 
    
        # solve new solution vector
        self.forward_solver.solve(u)

        # solve adjoint vector
        lmbda = self.adjoint_problem.solve()

        #initialize empty gradient vector
        dJdk = np.zeros(len(self.dRdk))
        for i in range(len(self.dRdk)):

            # reassemble adjoint stuff
            self.dRdk[i].assemble()

            # calculate derivative: dJdk + dRdk*lmbda 
            dJdk[i] = fem.assemble_scalar(self.dJdk_form[i])
            dJdk[i] += self.dRdk[i].dot(lmbda.vector)

        J_value = fem.assemble_scalar(self.J_form)
        J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM)

        return dJdk, J_value

controls = [k1, k2]
grad = AdjointDerivative(F, controls, J, nls_solver, bcs=bcs)

# find zero
start = time.time()
k_value = np.concatenate([k1_0.copy(), k2_0.copy()])
alpha = 0.5
# Vanila gradient descent
for i in range(10):
    s, J_value = grad.compute_gradient(k_value)
    if MPI.COMM_WORLD.rank == 0:
        print(J_value)
    k_value += -alpha*s
if MPI.COMM_WORLD.rank == 0:
    print(f"Loop time: {time.time() - start}")

But I still do not know why it gives different answer when running in parallel.

with mpirun -n 1 it gives:

653.2668876355016
2.9121586785616693
0.012981934840404524
5.787137680401019e-05
2.5798128663970103e-07
1.1500390682185916e-09
5.126689131826598e-12
2.2853955296547626e-14
1.0187925564957716e-16
4.541613299326357e-19
Loop time: 0.22720575332641602

while with mpirun -n 10 it gives:

653.2668876354853
6.811738442384701
0.07102729601721423
0.0007406151633957501
7.722535574472333e-06
8.052435143991264e-08
8.396427717673384e-10
8.755115335607253e-12
9.129125756467329e-14
9.519113556207951e-16
Loop time: 0.16471171379089355

Why is it not consistent? What am I missing?

I don’t have bandwidth to look at this example until mid December, but I promise that I will have a look then.

1 Like

I think I have the correct solution, that produces consistent results and passes a Taylor convergence test. Here is the code for single fem.Constant with two values in it (len(k0)):

import dolfinx
import ufl
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls
from mpi4py import MPI
from petsc4py import PETSc
import numpy as np
import time

import dolfinx.nls.petsc
import dolfinx.fem.petsc

# mesh and function space
mesh = create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], CellType.triangle)
V = fem.FunctionSpace(mesh, ("CG", 1))
u = fem.Function(V)
v = ufl.TestFunction(V)

# sensitivity will be calculated wrt this
k0 = np.full(2, 10.0)
k_const = fem.Constant(mesh, PETSc.ScalarType(k0))

dx = ufl.Measure("dx")
F_const = ufl.inner(ufl.grad(u), ufl.grad(v))*dx # heat equation
F_const += sum(k_const)*v*dx # add source of heat

# boundary conditions
def left(x): return np.isclose(x[0], 0)

fdim = mesh.topology.dim - 1

u1 = fem.Function(V)
boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
with u1.vector.localForm() as loc:
    loc.set(0)
bc1 = fem.dirichletbc(u1, dolfinx.fem.locate_dofs_topological(V, fdim, boundary_facets))

bcs = [bc1]

# solve the problem
problem = fem.petsc.NonlinearProblem(F_const, u, bcs)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)
solver.convergence_criterion = "residual"
solver.rtol = 1e-16

dict_var = {c: ufl.variable(c) for c in k_const}
F_var = ufl.replace(F_const, dict_var)

# how big is the solution?
J = ufl.inner(u, u)*dx
J_form = fem.form(J)

# partial derivative of J w.r.t. k
dJdk_form = [fem.form(ufl.diff(J, var)) for var in dict_var.values()]

# partial derivative of R w.r.t. k
dFdk_form = [fem.form(ufl.diff(F_var, var)) for var in dict_var.values()]
dFdk = [fem.petsc.assemble_vector(form) for form in dFdk_form]

lhs = ufl.adjoint(ufl.derivative(F_const, u))
rhs = -ufl.derivative(J, u)
problem = dolfinx.fem.petsc.LinearProblem(lhs, rhs, bcs=bcs,
                                          petsc_options={"ksp_type": "preonly", 
                                                         "pc_type": "lu"})

def grad(value):
    # set k
    k_const.value = value

    # solve new solution vector
    solver.solve(u)

    # solve adjoint vector
    lmbda = problem.solve()

    #initialize empty gradient vector
    dJdk = np.zeros(len(dFdk))

    for i in range(len(dFdk)):

        # reassemble adjoint stuff
        with dFdk[i].localForm() as dfdk_loc:
            dfdk_loc.set(0)
        dolfinx.fem.petsc.assemble_vector(dFdk[i], dFdk_form[i])
        dFdk[i].ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)

        # calculate derivative (GRADIENT w.r.t. k as constant)
        dJdk[i] = fem.assemble_scalar(dJdk_form[i])  # partial dJdk
        dJdk[i] += dFdk[i].dot(lmbda.vector)         # partial dFdk*adjoint_vec

    # calculate size
    J_value = fem.assemble_scalar(J_form)
    J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM) # J value

    return dJdk, J_value

def loss(value):
    k_const.value = value
    solver.solve(u)
    J_value = fem.assemble_scalar(J_form)
    J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM) # J value
    return J_value

def taylor_test(loss, grad, k0, p=1e-3, n=5):
        g0, _ = grad(k0)
        l0 = loss(k0)
        reminder = []
        perturbance = []
        for i in range(0, n):
            l1 = loss(k0+p)
            reminder.append(l1 - l0 - np.sum(g0*p))
            perturbance.append(p)
            p /= 2
        conv_rate = convergence_rates(reminder, perturbance)
        return reminder, perturbance, conv_rate

def convergence_rates(r, p):
    cr = [] # convergence rates
    for i in range(1, len(p)):
        cr.append(np.log(r[i] / r[i - 1])
                 / np.log(p[i] / p[i - 1]))
    return cr

r = taylor_test(loss, grad, k0)
print("Convergence rate:", r[2]) # should be 2.0

# find zero
start = time.time()
k_value = k0.copy()
alpha = 0.5
# Vanila gradient descent
for i in range(10):
    s, J_value = grad(k_value)
    if MPI.COMM_WORLD.rank == 0:
        print(J_value, k_value)
    k_value += -alpha*s
if MPI.COMM_WORLD.rank == 0:   
    print("elapsed time:", time.time() - start)

with mpirun -n 1:

Convergence rate: [1.9998945765808456, 1.999579332845479, 1.9983387252468865, 1.9931604187626235]
53.32790919474743 [10. 10.]
28.68068576463282 [7.33360454 7.33360454]
15.42497630885088 [5.37817556 5.37817556]
8.295823052529853 [3.94414127 3.94414127]
4.46163927521872 [2.89247723 2.89247723]
2.3995479286534738 [2.12122841 2.12122841]
1.290519001364818 [1.55562503 1.55562503]
0.6940637746786695 [1.14083388 1.14083388]
0.373279682679401 [0.83664245 0.83664245]
0.20075636646754783 [0.61356049 0.61356049]
elapsed time: 0.18547725677490234

with mpirun -n 10:

Convergence rate: [1.999886605278707, 1.9995460660086042, 1.998224259455954, 1.9927456450993608]
53.327909194747264 [10. 10.]
28.68068576462917 [7.33360454 7.33360454]
15.424976308846658 [5.37817556 5.37817556]
8.295823052526544 [3.94414127 3.94414127]
4.461639275216239 [2.89247723 2.89247723]
2.3995479286518213 [2.12122841 2.12122841]
1.2905190013637264 [1.55562503 1.55562503]
0.6940637746780001 [1.14083388 1.14083388]
0.37327968267899103 [0.83664245 0.83664245]
0.2007563664672966 [0.61356049 0.61356049]
elapsed time: 0.11587405204772949

So the results are the same (almost) but faster.

There were multiple issues. I did not set the LinearProblem for solving the adjoint vector lmbda correctly (tolerances and so on) as well as the ghostUpdates for various stages.

There is buch of collective MPI communication calls (only for evaluation of the objective). I do not know if it is possible to get rid of them. The gradient call does not need any collective MPI calls, so that is good.

Now I have to figure out how to define the objective as sum of square errors at bunch of specific points instead of the integral over the whole domain. I have some idea how to do that…

I will post the results when I figure it out.

Thank you all :slightly_smiling_face:

Here is a little helper script - derivatives.py, and test for it bellow. It seems to do what it should be (both in serial and parralel). The test is done 2D domain, so it might need some tweaking for 3D.

Edit: It looks like it works in 3D as is.

This might be useful to some people, before new dolfin-adjoint is implemented. The usage is shown in the test bellow. It does not handle constrains.

from mpi4py import MPI
from dolfinx import geometry
from dolfinx import fem
import numpy as np
import ufl
from petsc4py import PETSc

import dolfinx.nls.petsc
import dolfinx.fem.petsc

def wrap_constant_controls(controls):
    vars = {}
    for control in controls:
        for i in range(len(control)):
            vars[control[i]] = ufl.variable(control[i])
    return vars

class UflObjective:
    def __init__(self, J, u, controls):
        self.J = J
        self.J_form = fem.form(J)
        self.u = u
        self.controls = controls

        self.rhs = -ufl.derivative(J, u)
        self.rhs_form = fem.form(self.rhs)
        self.b = dolfinx.fem.petsc.create_vector(self.rhs_form)

        self.vars = wrap_constant_controls(self.controls)
        self.J_var_form = ufl.replace(self.J, self.vars)

        self.dJdk_form = [fem.form(ufl.diff(self.J_var_form, v)) for v in self.vars.values()]

    def assemble_adjoint_rhs_vector(self):
        with self.b.localForm() as b_loc:
            b_loc.set(0.0)
        dolfinx.fem.petsc.assemble_vector(self.b, self.rhs_form)
        self.b.assemble()
        return self.b
    
    def eval_dJdk(self):
        dJdk = np.zeros(len(self.dJdk_form))
        for i in range(len(self.dJdk_form)):
            dJdk[i] = fem.assemble_scalar(self.dJdk_form[i])
            dJdk[i] = MPI.COMM_WORLD.allreduce(dJdk[i], op=MPI.SUM)
        return dJdk
    
    def evaluate(self):
        J_value = fem.assemble_scalar(self.J_form)
        J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM)
        return J_value

class Point_wise_lsq_objective:
    def __init__(self, p_coords, f, controls, true_values = None):
        self.p_coords = p_coords
        self.f = f
        self.controls = controls
        self.b = f.copy()
        self.true_values = true_values

        self.vars = wrap_constant_controls(self.controls)

        self.evaluate_dofs_sensitivities()

    def evaluate_dofs_sensitivities(self):
        # sensitivity of values at p_coords w.r.t values at dofs
        domain = self.f.function_space.mesh
        points = np.array(self.p_coords)
        fdim = domain.topology.dim
        bb_tree = geometry.bb_tree(domain, fdim)
        cell_candidates = geometry.compute_collisions_points(bb_tree, points)
        colliding_cells = geometry.compute_colliding_cells(domain, cell_candidates, points)

        # initialize result on each proc
        self.points = []
        self.cells = []
        self.dofs = []
        self.sensitivities = []
        self.true_values_map = []
        for i, point in enumerate(points):
            if len(colliding_cells.links(i)) > 0:
                cell = colliding_cells.links(i)[0]
                dofs = self.f.function_space.dofmap.cell_dofs(cell)
                s = np.zeros(len(dofs))

                # there might exist nicer way but finite diff works too on (CG, 1)
                for j in range(len(dofs)):
                    val_0 = self.f.eval(point, cell)
                    self.f.x.array[dofs[j]] += 1.0
                    val_1 = self.f.eval(point, cell)
                    self.f.x.array[dofs[j]] -= 1.0
                    s[j] = val_1[0] - val_0[0]
            
                self.points.append(point)
                self.cells.append(cell)
                self.dofs.append(dofs)
                self.sensitivities.append(s)
                self.true_values_map.append(i)
    
    def assemble_adjoint_rhs_vector(self):
        with self.b.vector.localForm() as b_loc:
            b_loc.set(0)
        for i in range(len(self.points)):
            # contribution to dJdu of single point
            value = -2*(self.f.eval(self.points[i], self.cells[i])[0]-self.true_values[self.true_values_map[i]])
            self.b.x.array[self.dofs[i]] += self.sensitivities[i]*value
        return self.b.vector

    def eval_points(self):
        values = []
        for i in range(len(self.points)):
            values.append(self.f.eval(self.points[i], self.cells[i])[0])
        return values
    
    def eval_dJdk(self):
        dJdk = np.zeros(len(self.vars.keys()))
        return dJdk
    
    def evaluate(self):
        J_value = 0.0
        for i in range(len(self.points)):
            J_value += (self.f.eval(self.points[i], self.cells[i])[0]-self.true_values[self.true_values_map[i]])**2
        J_value = MPI.COMM_WORLD.allreduce(J_value, op=MPI.SUM)
        return J_value

class AdjointDerivative:
    def __init__(self, J, controls, form, forward_solver, u, bcs=None):
        self.form = form
        self.controls = controls
        self.J = J
        self.forward_solver = forward_solver
        self.u = u
        self.lmbda = u.copy()
        self.bcs = bcs or []

        self.vars = wrap_constant_controls(self.controls)
        self.var_form = ufl.replace(self.form, self.vars)

        self.dFdk_form = [fem.form(ufl.diff(self.var_form, var)) for var in self.vars.values()]
        self.dFdk = [fem.petsc.create_vector(form) for form in self.dFdk_form]

        # adjoint problem solver definition
        self.lhs = ufl.adjoint(ufl.derivative(self.form, u))
        self.lhs_form = fem.form(self.lhs)
        self.A = dolfinx.fem.petsc.create_matrix(self.lhs_form)

        self.adjoint_solver = PETSc.KSP().create(u.function_space.mesh.comm)
        self.adjoint_solver.setOperators(self.A)
        self.adjoint_solver.setType(PETSc.KSP.Type.PREONLY)
        self.adjoint_solver.setTolerances(atol=1e-16)
        self.adjoint_solver.getPC().setType(PETSc.PC.Type.LU)

    def solve_adjoint_problem(self):

        # lhs assembly
        self.A.zeroEntries()
        dolfinx.fem.petsc.assemble_matrix_mat(self.A, self.lhs_form, bcs=self.bcs)
        self.A.assemble()

        # rhs assembly
        b = self.J.assemble_adjoint_rhs_vector()
        dolfinx.fem.petsc.apply_lifting(b, [self.lhs_form], bcs=[self.bcs])
        b.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
        dolfinx.fem.petsc.set_bc(b, self.bcs)

        self.adjoint_solver.solve(b, self.lmbda.vector)
        self.lmbda.x.scatter_forward()
        return self.lmbda

    def compute_gradient(self, *args, **kwargs):
    
        # run forward solve
        self.forward_solver(*args, **kwargs)

        # solve adjoint vector
        self.solve_adjoint_problem()

        #initialize empty gradient vector
        dJdk = self.J.eval_dJdk()
        for i in range(len(self.dFdk)):

            # reassemble adjoint stuff
            with self.dFdk[i].localForm() as dfdk_loc:
                dfdk_loc.set(0)
            dolfinx.fem.petsc.assemble_vector(self.dFdk[i], self.dFdk_form[i])
            self.dFdk[i].ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
            
            # add dFdk*lmbda
            dJdk[i] += self.dFdk[i].dot(self.lmbda.vector)
            

        J_value = self.J.evaluate()

        return dJdk, J_value
    
def taylor_test(loss, grad, k0, p=1e-3, n=5):
        g0 = grad(k0)
        l0 = loss(k0)
        reminder = []
        perturbance = []
        for i in range(0, n):
            l1 = loss(k0+p)
            reminder.append(l1 - l0 - np.sum(g0*p))
            perturbance.append(p)
            p /= 2
        conv_rate = convergence_rates(reminder, perturbance)
        return reminder, perturbance, conv_rate

def convergence_rates(r, p):
    cr = []
    for i in range(1, len(p)):
        cr.append(np.log(r[i] / r[i - 1])
                 / np.log(p[i] / p[i - 1]))
    return cr

unittest

from mpi4py import MPI
import dolfinx
import ufl
from petsc4py import PETSc
import numpy as np
import unittest

from derivatives import (
    UflObjective, Point_wise_lsq_objective, AdjointDerivative, taylor_test)

import dolfinx.nls.petsc
import dolfinx.fem.petsc

class TestDerivative(unittest.TestCase):
    def setUp(self) -> None:
        # mesh and function spaces
        mesh = dolfinx.mesh.create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], dolfinx.mesh.CellType.triangle)
        V = dolfinx.fem.FunctionSpace(mesh, ("P", 1)) # solution space
        self.u = dolfinx.fem.Function(V)  # solution trial
        v = ufl.TestFunction(V)  # solution test

        self.k = dolfinx.fem.Constant(mesh, PETSc.ScalarType((1.0, 1.0)))

        self.dx = ufl.Measure("dx")
        self.F = ufl.inner(ufl.grad(self.u), ufl.grad(v))*self.dx # heat equation
        self.F += sum(self.k)*v*self.dx # add source of heat

        # boundary conditions
        def left(x): return np.isclose(x[0], 0)

        fdim = mesh.topology.dim - 1

        u1 = dolfinx.fem.Function(V)
        boundary_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
        with u1.vector.localForm() as loc:
            loc.set(0)
        bc1 = dolfinx.fem.dirichletbc(u1, dolfinx.fem.locate_dofs_topological(V, fdim, boundary_facets))

        self.bcs = [bc1]

        # solve the problem
        problem = dolfinx.fem.petsc.NonlinearProblem(self.F, self.u, self.bcs)
        self.solver = dolfinx.nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem)
        self.solver.convergence_criterion = "residual"
        self.solver.rtol = 1e-16
        self.forward = lambda : self.solver.solve(self.u)
    
    def test_ufl_objective(self):
        J = ufl.inner(self.u, self.u)*self.dx

        controls = [self.k]
        Jr = UflObjective(J, self.u, controls)
        adjoint = AdjointDerivative(Jr, controls, self.F, self.forward, self.u, self.bcs)

        def grad(value):
            self.k.value[:] = value
            g, l = adjoint.compute_gradient()
            return g

        def loss(value):
            self.k.value[:] = value
            self.solver.solve(self.u)
            l = Jr.evaluate()
            return l
        
        k0 = np.array([1, 1])
        r = taylor_test(loss, grad, k0)
        self.assertTrue(np.allclose(r[2], 2.0, atol=0.1), f"Taylor test failed - Convergence rate: {r[2]}")


    def test_lsq_objective(self):
        # sum of square errors at specific point in the domain
        points = [[0.0, 0.0, 0.0], [0.5, 0.5, 0.0]]
        true_values = [0.0, 0.0]
        controls = [self.k]
        Jr = Point_wise_lsq_objective(points, self.u, controls, true_values)
        adjoint = AdjointDerivative(Jr, controls, self.F, self.forward, self.u, self.bcs)

        def grad(value):
            self.k.value[:] = value
            g, l = adjoint.compute_gradient()
            return g

        def loss(value):
            self.k.value[:] = value
            self.solver.solve(self.u)
            l = Jr.evaluate()
            return l
        
        k0 = np.array([1, 1])
        r = taylor_test(loss, grad, k0)
        self.assertTrue(np.allclose(r[2], 2.0, atol=0.1), f"Taylor test failed - Convergence rate: {r[2]}")

    def test_optimisation(self):
        points = [[0.0, 0.0, 0.0], [0.5, 0.5, 0.0]]
        true_values = [0.0, 0.0]
        controls = [self.k]
        Jr = Point_wise_lsq_objective(points, self.u, controls, true_values)
        adjoint = AdjointDerivative(Jr, controls, self.F, self.forward, self.u, self.bcs)

        # find zero
        k_value = np.array([1.0, 1.0])
        alpha = 0.5
        # Vanila gradient descent
        for i in range(150):
            g, l = adjoint.compute_gradient()
            self.k.value += -alpha*g
            if MPI.COMM_WORLD.rank == 0:
                print(l, self.k.value)

        self.assertTrue(np.allclose(self.k.value, 0.0, atol=1e-6), f"k_value should be [0.0, 0.0] not {self.k.value}")

    def tearDown(self) -> None:
        pass

if __name__ == '__main__':
    unittest.main()
1 Like