Calculate adjoint derivatives in unsteady problems

Dear all,

I am trying to solve an optimal control problem with the adjoint method. To simplify things, I am trying to reproduce a simplified version of the tutorial Time-distributed controls written in dolfin-adjoint for dolfinx.

I will state the problem as follows. Consider the heat equation over a squared domain \Omega with homogeneous Dirichlet boundaries \Gamma_{D} and a control Dirichlet boundary \Omega_{C}:

\frac{\partial u}{\partial t} = \nu\Delta u in \Omega
u = 0 at \Gamma_{D}
u = f(t) at \Gamma_{C}

I define the cost function as the discrepancy between a desired state and the solution at t=T:
J(u(T)) = \frac{1}{2}\int_{\Omega}||u(T) - u_{obs}||^{2}dx
where u_{obs} is the desired profile at t=T:
u_{obs} = x(1-x)y(1-y)

Therefore, the main difference with the dolfin-adjoint tutorial is that I want to use a Dirichlet BC as the control, and not a forcing. I will impose the boundary conditions weakly using the Nitsche’s method as in Weak imposition of Dirichlet conditions for the Poisson problem — FEniCSx tutorial, so that I can use ufl differentiation. For simplicity, I have considered that the control boundary f(t) is homogeneous in space. Therefore, the rows of the control vector \boldsymbol{m} are the value of the control boundary at each time step, \boldsymbol{m}(n) = f(t^{n}).

Using the adjoint method, I compute the gradient of J with respect to the design parameter \boldsymbol{m} as follows:

\frac{dJ}{d\boldsymbol{m}} = \frac{\partial J}{\partial \boldsymbol{m}} + \left(\frac{\partial \boldsymbol{R}}{\partial\boldsymbol{m}}\right)^{T}\boldsymbol{\lambda}
where \boldsymbol{\lambda} is the adjoint variable, solution of the adjoint problem:
\left(\frac{\partial\boldsymbol{R}}{\partial\boldsymbol{u}}\right)^{T}\boldsymbol{\lambda} = -\frac{\partial J}{\partial\boldsymbol{u}}

The terminal condition for the adjoint variable is:
\boldsymbol{\lambda}_{T} = - (\boldsymbol{u}(T) - \boldsymbol{u}_{obs})
and the boundary conditions are homogeneous Dirichlet everywhere.

I followed a similar procedure as in Calculate sensitivity when the control is a fem function. However, the Taylor test did not pass. When I checked the gradient, I got a vector whose rows are all the same (meaning that the adjoint problem is not solved properly).

Here is my simplified code:

from mpi4py import MPI
from petsc4py import PETSc

import dolfinx
import ufl
from ufl import Measure, dot, inner, grad, TestFunction, adjoint, derivative, action, Circumradius
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls, la, io, default_scalar_type
import numpy as np
import dolfinx.nls.petsc
import dolfinx.fem.petsc

# Mesh and function space
mesh = dolfinx.mesh.create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], CellType.triangle)
n = ufl.FacetNormal(mesh)
Q = fem.FunctionSpace(mesh, ("CG", 1))

# Define solution vector and test function
u = fem.Function(Q)
u_s = fem.Function(Q) # Solution
q = TestFunction(Q)

# Sensitivity will be calculated wrt this
f = fem.Function(Q)  # The Dirichlet BC function in time f is the design variable
f.name = "Control"

# Numerical parameters
dx = Measure("dx")
dt = 1e-2
k = fem.Constant(mesh, dt)
nu = fem.Constant(mesh, 1.0e-2)
num_steps = 100

# Boundaries
def left(x):
    return np.isclose(x[0], 0)
def right(x):
    return np.isclose(x[0], 1)
def top(x):
    return np.isclose(x[1], 1)
def bottom(x):
    return np.isclose(x[1], 0)

fdim = mesh.topology.dim - 1

left_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
right_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, right)
top_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, top)
bottom_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, bottom)

# Mark zero Dirichlet facets (left, top and bottom) with 1, Control Dirichlet facets (right) with 2
marked_facets = np.hstack([left_facets, right_facets, top_facets, bottom_facets])
marked_values = np.hstack([np.full_like(left_facets, 1), np.full_like(right_facets, 2), np.full_like(top_facets, 1), np.full_like(bottom_facets, 1)])
sorted_facets = np.argsort(marked_facets)
facet_tag = dolfinx.mesh.meshtags(mesh, fdim, marked_facets[sorted_facets], marked_values[sorted_facets])

ds = ufl.Measure('ds', domain=mesh, subdomain_data=facet_tag)

# BCs will be imposed weakly to get sensitivities using UFL derivatives
bcs = []

# Initial condition
def init_cond(x):
    return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
u_s.interpolate(init_cond)

# Residual of the variational form of Heat equation: R = (T_comp - T_comp_old) + dt*(S_comp) - dt*BC_comp
# Temporal component
T_comp = inner(u,q)*dx
T_comp_old = inner(u_s,q)*dx

# Space components
S_comp = nu*inner(grad(u), grad(q))*dx

# Boundary components using Nitsche’s method
h = 2 * Circumradius(mesh)
alpha = fem.Constant(mesh, default_scalar_type(10))

# Zero Dirichlet at left, top and bottom boundaries
uD = fem.Function(Q)
with uD.vector.localForm() as loc:
    loc.set(0.0)
BC_comp = -inner(dot(n,grad(u)), q)*ds(1)
BC_comp -= inner(dot(n,grad(q)), u)*ds(1)
BC_comp += alpha/h*inner(u,q)*ds(1)
BC_comp += inner(dot(n,grad(q)),uD)*ds(1)
BC_comp -= alpha/h*inner(uD,q)*ds(1)

# Control Dirichlet (u=f(t)) at right boundary
BC_comp += -inner(dot(n,grad(u)), q)*ds(2)
BC_comp -= inner(dot(n,grad(q)), u)*ds(2)
BC_comp += alpha/h*inner(u,q)*ds(2)
BC_comp += inner(dot(n,grad(q)),f)*ds(2)
BC_comp -= alpha/h*inner(f,q)*ds(2)

# Residual
R = (T_comp - T_comp_old) + k*(S_comp) - k*BC_comp

# Solver for the direct problem
problem_dir = fem.petsc.NonlinearProblem(R, u, bcs)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem_dir)
solver.rtol = 1e-16
solver.atol = 1e-16

# Define desired u profile at t=T (final time)
def profile(x):
    return x[0]*(1-x[0])*x[1]*(1-x[1])
u_obs = fem.Function(Q)
u_obs.interpolate(profile)

# Define cost function
J = (1/2)*inner(u - u_obs, u - u_obs)*dx
J_form = fem.form(J)

# Adjoint problem
lmbda = fem.Function(Q)

# Residual of the heat equation with homogeneous Dirichlet everywhere
BC_comp_0 = -inner(dot(n,grad(u)), q)*ds
BC_comp_0 -= inner(dot(n,grad(q)), u)*ds
BC_comp_0 += alpha/h*inner(u,q)*ds
BC_comp_0 += inner(dot(n,grad(q)),uD)*ds
BC_comp_0 -= alpha/h*inner(uD,q)*ds

R0 = (T_comp - T_comp_old) + k*(S_comp) - k*BC_comp_0

# Bilinear and linear form of the adjoint problem
lhs = adjoint(derivative(R0, u)) # Change sign to integrate forward in time
rhs = -derivative(J, u)

# Create adjoint problem solver
A = fem.petsc.assemble_matrix(fem.form(lhs), bcs=bcs)
A.assemble()
b = fem.petsc.create_vector(fem.form(rhs))

solver_adj = PETSc.KSP().create(mesh.comm)
solver_adj.setOperators(A)
solver_adj.setType(PETSc.KSP.Type.PREONLY)
solver_adj.getPC().setType(PETSc.PC.Type.LU)

# Compute sensitivity: dJ/dm
# Partial derivative of cost (J) w.r.t. control (f)
dJdm_form = fem.form(derivative(J, f))
dJdm = fem.petsc.create_vector(dJdm_form)
dJdm.assemble()

# Partial derivative of residual (R) w.r.t. control (f)
dRdm_form = fem.form(adjoint(derivative(R, f)))
dRdm = fem.petsc.create_matrix(dRdm_form)

# Distribution of gradient (as if the control wasn't constant in space)
dJ = fem.Function(Q)
dJ.name = "dJ"

# Integral in space over control boundary (because we assumed the control to be constant in space)
dJdm_uniform_form = fem.form(inner(dJ*n,n)*ds(2))

# Function to compute the cost
def J_cost(m_n):

    # Initial condition
    def init_cond(x):
        return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
    u_s.interpolate(init_cond)

    t = 0
    for iterations in range(num_steps):
        t += dt

        with f.vector.localForm() as loc:
            loc.set(m_n[iterations])

        # Solve heat equation
        solver.solve(u)
        u.x.scatter_forward()

        # Update solution vector
        u_s.x.array[:] = u.x.array

        J_val = fem.assemble_scalar(J_form)

    return J_val

# Function to compute the gradient of the cost w.r.t. f
def grad_J(m_n,dJdm=dJdm):

    # Direct problem
    # Initial condition
    def init_cond(x):
        return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
    u_s.interpolate(init_cond)

    t = 0
    for iterations in range(num_steps):
        t += dt

        with f.vector.localForm() as loc:
            loc.set(m_n[iterations])

        # Solve heat equation
        solver.solve(u)
        u.x.scatter_forward()

        # Update solution vector
        u_s.x.array[:] = u.x.array

    # Adjoint problem
    # Initial adjoint variable
    u_s.x.array[:] = -(u.x.array - u_obs.x.array)

    # Integration of adjoint problem + gradient vector computation
    dJdm_uniform_vec = np.zeros(num_steps, dtype=PETSc.ScalarType)
    for iterations in range(num_steps):
        # Update the right hand side reusing the initial vector
        with b.localForm() as loc_b:
            loc_b.set(0)
        fem.petsc.assemble_vector(b, fem.form(rhs))

        solver_adj.solve(b, lmbda.vector)
        lmbda.x.scatter_forward()

        u_s.x.array[:] = lmbda.x.array

        # Reassemble dRdm
        dRdm.zeroEntries()
        fem.petsc.assemble_matrix_mat(dRdm, dRdm_form)
        dRdm.assemble()

        # Compute gradient
        # zero gradient first
        with dJdm.localForm() as dJdm_loc:
            dJdm_loc.set(0)

        fem.petsc.assemble_vector(dJdm, dJdm_form)
        dJdm.assemble()


        dJdm += dRdm*lmbda.vector
        dJdm.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)

        dJ.x.array[:] = dJdm

        dJdm_uniform_vec[iterations] =  fem.assemble_scalar(dJdm_uniform_form)

    return dJdm_uniform_vec

# Taylor test
def taylor_test(cost, grad, m_0, p, n=5):
        g0 = grad(m_0)
        l0 = cost(m_0)
        l0 = MPI.COMM_WORLD.allreduce(l0, op=MPI.SUM)
        reminder = []
        perturbance = []
        for i in range(0, n):
            l1 = cost(m_0+p)
            l1 = MPI.COMM_WORLD.allreduce(l1, op=MPI.SUM)
            g = np.sum(g0)#.array)
            g = MPI.COMM_WORLD.allreduce(g, op=MPI.SUM)
            reminder.append(l1 - l0 - g*p)
            perturbance.append(p)
            p /= 2
        conv_rate = convergence_rates(reminder, perturbance)
        return reminder, perturbance, conv_rate

# Compute convergence rate (they should be 2.0 for small p)
def convergence_rates(r, p):
    cr = [] # convergence rates
    for i in range(1, len(p)):
        cr.append(np.log(r[i] / r[i - 1])
                 / np.log(p[i] / p[i - 1]))
    return cr

# Taylor test
m_0 = np.zeros(num_steps, dtype=PETSc.ScalarType)
r = taylor_test(J_cost, grad_J, m_0, p=1e-4)
print("Convergence rate:", r[2])

Does anyone know what I am doing wrong?
Also, if anyone has a question regarding the problem statement or my code, please let me know. Thanks!