Dear all,
I am trying to solve an optimal control problem with the adjoint method. To simplify things, I am trying to reproduce a simplified version of the tutorial Time-distributed controls written in dolfin-adjoint
for dolfinx
.
I will state the problem as follows. Consider the heat equation over a squared domain \Omega with homogeneous Dirichlet boundaries \Gamma_{D} and a control Dirichlet boundary \Omega_{C}:
\frac{\partial u}{\partial t} = \nu\Delta u in \Omega
u = 0 at \Gamma_{D}
u = f(t) at \Gamma_{C}
I define the cost function as the discrepancy between a desired state and the solution at t=T:
J(u(T)) = \frac{1}{2}\int_{\Omega}||u(T) - u_{obs}||^{2}dx
where u_{obs} is the desired profile at t=T:
u_{obs} = x(1-x)y(1-y)
Therefore, the main difference with the dolfin-adjoint
tutorial is that I want to use a Dirichlet BC as the control, and not a forcing. I will impose the boundary conditions weakly using the Nitsche’s method as in Weak imposition of Dirichlet conditions for the Poisson problem — FEniCSx tutorial, so that I can use ufl
differentiation. For simplicity, I have considered that the control boundary f(t) is homogeneous in space. Therefore, the rows of the control vector \boldsymbol{m} are the value of the control boundary at each time step, \boldsymbol{m}(n) = f(t^{n}).
Using the adjoint method, I compute the gradient of J with respect to the design parameter \boldsymbol{m} as follows:
\frac{dJ}{d\boldsymbol{m}} = \frac{\partial J}{\partial \boldsymbol{m}} + \left(\frac{\partial \boldsymbol{R}}{\partial\boldsymbol{m}}\right)^{T}\boldsymbol{\lambda}
where \boldsymbol{\lambda} is the adjoint variable, solution of the adjoint problem:
\left(\frac{\partial\boldsymbol{R}}{\partial\boldsymbol{u}}\right)^{T}\boldsymbol{\lambda} = -\frac{\partial J}{\partial\boldsymbol{u}}
The terminal condition for the adjoint variable is:
\boldsymbol{\lambda}_{T} = - (\boldsymbol{u}(T) - \boldsymbol{u}_{obs})
and the boundary conditions are homogeneous Dirichlet everywhere.
I followed a similar procedure as in Calculate sensitivity when the control is a fem function. However, the Taylor test did not pass. When I checked the gradient, I got a vector whose rows are all the same (meaning that the adjoint problem is not solved properly).
Here is my simplified code:
from mpi4py import MPI
from petsc4py import PETSc
import dolfinx
import ufl
from ufl import Measure, dot, inner, grad, TestFunction, adjoint, derivative, action, Circumradius
from dolfinx.mesh import CellType, create_rectangle
from dolfinx import fem, nls, la, io, default_scalar_type
import numpy as np
import dolfinx.nls.petsc
import dolfinx.fem.petsc
# Mesh and function space
mesh = dolfinx.mesh.create_rectangle(MPI.COMM_WORLD, [[0, 0], [1, 1]], [64, 64], CellType.triangle)
n = ufl.FacetNormal(mesh)
Q = fem.FunctionSpace(mesh, ("CG", 1))
# Define solution vector and test function
u = fem.Function(Q)
u_s = fem.Function(Q) # Solution
q = TestFunction(Q)
# Sensitivity will be calculated wrt this
f = fem.Function(Q) # The Dirichlet BC function in time f is the design variable
f.name = "Control"
# Numerical parameters
dx = Measure("dx")
dt = 1e-2
k = fem.Constant(mesh, dt)
nu = fem.Constant(mesh, 1.0e-2)
num_steps = 100
# Boundaries
def left(x):
return np.isclose(x[0], 0)
def right(x):
return np.isclose(x[0], 1)
def top(x):
return np.isclose(x[1], 1)
def bottom(x):
return np.isclose(x[1], 0)
fdim = mesh.topology.dim - 1
left_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, left)
right_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, right)
top_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, top)
bottom_facets = dolfinx.mesh.locate_entities_boundary(mesh, fdim, bottom)
# Mark zero Dirichlet facets (left, top and bottom) with 1, Control Dirichlet facets (right) with 2
marked_facets = np.hstack([left_facets, right_facets, top_facets, bottom_facets])
marked_values = np.hstack([np.full_like(left_facets, 1), np.full_like(right_facets, 2), np.full_like(top_facets, 1), np.full_like(bottom_facets, 1)])
sorted_facets = np.argsort(marked_facets)
facet_tag = dolfinx.mesh.meshtags(mesh, fdim, marked_facets[sorted_facets], marked_values[sorted_facets])
ds = ufl.Measure('ds', domain=mesh, subdomain_data=facet_tag)
# BCs will be imposed weakly to get sensitivities using UFL derivatives
bcs = []
# Initial condition
def init_cond(x):
return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
u_s.interpolate(init_cond)
# Residual of the variational form of Heat equation: R = (T_comp - T_comp_old) + dt*(S_comp) - dt*BC_comp
# Temporal component
T_comp = inner(u,q)*dx
T_comp_old = inner(u_s,q)*dx
# Space components
S_comp = nu*inner(grad(u), grad(q))*dx
# Boundary components using Nitsche’s method
h = 2 * Circumradius(mesh)
alpha = fem.Constant(mesh, default_scalar_type(10))
# Zero Dirichlet at left, top and bottom boundaries
uD = fem.Function(Q)
with uD.vector.localForm() as loc:
loc.set(0.0)
BC_comp = -inner(dot(n,grad(u)), q)*ds(1)
BC_comp -= inner(dot(n,grad(q)), u)*ds(1)
BC_comp += alpha/h*inner(u,q)*ds(1)
BC_comp += inner(dot(n,grad(q)),uD)*ds(1)
BC_comp -= alpha/h*inner(uD,q)*ds(1)
# Control Dirichlet (u=f(t)) at right boundary
BC_comp += -inner(dot(n,grad(u)), q)*ds(2)
BC_comp -= inner(dot(n,grad(q)), u)*ds(2)
BC_comp += alpha/h*inner(u,q)*ds(2)
BC_comp += inner(dot(n,grad(q)),f)*ds(2)
BC_comp -= alpha/h*inner(f,q)*ds(2)
# Residual
R = (T_comp - T_comp_old) + k*(S_comp) - k*BC_comp
# Solver for the direct problem
problem_dir = fem.petsc.NonlinearProblem(R, u, bcs)
solver = nls.petsc.NewtonSolver(MPI.COMM_WORLD, problem_dir)
solver.rtol = 1e-16
solver.atol = 1e-16
# Define desired u profile at t=T (final time)
def profile(x):
return x[0]*(1-x[0])*x[1]*(1-x[1])
u_obs = fem.Function(Q)
u_obs.interpolate(profile)
# Define cost function
J = (1/2)*inner(u - u_obs, u - u_obs)*dx
J_form = fem.form(J)
# Adjoint problem
lmbda = fem.Function(Q)
# Residual of the heat equation with homogeneous Dirichlet everywhere
BC_comp_0 = -inner(dot(n,grad(u)), q)*ds
BC_comp_0 -= inner(dot(n,grad(q)), u)*ds
BC_comp_0 += alpha/h*inner(u,q)*ds
BC_comp_0 += inner(dot(n,grad(q)),uD)*ds
BC_comp_0 -= alpha/h*inner(uD,q)*ds
R0 = (T_comp - T_comp_old) + k*(S_comp) - k*BC_comp_0
# Bilinear and linear form of the adjoint problem
lhs = adjoint(derivative(R0, u)) # Change sign to integrate forward in time
rhs = -derivative(J, u)
# Create adjoint problem solver
A = fem.petsc.assemble_matrix(fem.form(lhs), bcs=bcs)
A.assemble()
b = fem.petsc.create_vector(fem.form(rhs))
solver_adj = PETSc.KSP().create(mesh.comm)
solver_adj.setOperators(A)
solver_adj.setType(PETSc.KSP.Type.PREONLY)
solver_adj.getPC().setType(PETSc.PC.Type.LU)
# Compute sensitivity: dJ/dm
# Partial derivative of cost (J) w.r.t. control (f)
dJdm_form = fem.form(derivative(J, f))
dJdm = fem.petsc.create_vector(dJdm_form)
dJdm.assemble()
# Partial derivative of residual (R) w.r.t. control (f)
dRdm_form = fem.form(adjoint(derivative(R, f)))
dRdm = fem.petsc.create_matrix(dRdm_form)
# Distribution of gradient (as if the control wasn't constant in space)
dJ = fem.Function(Q)
dJ.name = "dJ"
# Integral in space over control boundary (because we assumed the control to be constant in space)
dJdm_uniform_form = fem.form(inner(dJ*n,n)*ds(2))
# Function to compute the cost
def J_cost(m_n):
# Initial condition
def init_cond(x):
return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
u_s.interpolate(init_cond)
t = 0
for iterations in range(num_steps):
t += dt
with f.vector.localForm() as loc:
loc.set(m_n[iterations])
# Solve heat equation
solver.solve(u)
u.x.scatter_forward()
# Update solution vector
u_s.x.array[:] = u.x.array
J_val = fem.assemble_scalar(J_form)
return J_val
# Function to compute the gradient of the cost w.r.t. f
def grad_J(m_n,dJdm=dJdm):
# Direct problem
# Initial condition
def init_cond(x):
return 10*x[0]*(1-x[0])*x[1]*(1-x[1])
u_s.interpolate(init_cond)
t = 0
for iterations in range(num_steps):
t += dt
with f.vector.localForm() as loc:
loc.set(m_n[iterations])
# Solve heat equation
solver.solve(u)
u.x.scatter_forward()
# Update solution vector
u_s.x.array[:] = u.x.array
# Adjoint problem
# Initial adjoint variable
u_s.x.array[:] = -(u.x.array - u_obs.x.array)
# Integration of adjoint problem + gradient vector computation
dJdm_uniform_vec = np.zeros(num_steps, dtype=PETSc.ScalarType)
for iterations in range(num_steps):
# Update the right hand side reusing the initial vector
with b.localForm() as loc_b:
loc_b.set(0)
fem.petsc.assemble_vector(b, fem.form(rhs))
solver_adj.solve(b, lmbda.vector)
lmbda.x.scatter_forward()
u_s.x.array[:] = lmbda.x.array
# Reassemble dRdm
dRdm.zeroEntries()
fem.petsc.assemble_matrix_mat(dRdm, dRdm_form)
dRdm.assemble()
# Compute gradient
# zero gradient first
with dJdm.localForm() as dJdm_loc:
dJdm_loc.set(0)
fem.petsc.assemble_vector(dJdm, dJdm_form)
dJdm.assemble()
dJdm += dRdm*lmbda.vector
dJdm.ghostUpdate(addv=PETSc.InsertMode.ADD, mode=PETSc.ScatterMode.REVERSE)
dJ.x.array[:] = dJdm
dJdm_uniform_vec[iterations] = fem.assemble_scalar(dJdm_uniform_form)
return dJdm_uniform_vec
# Taylor test
def taylor_test(cost, grad, m_0, p, n=5):
g0 = grad(m_0)
l0 = cost(m_0)
l0 = MPI.COMM_WORLD.allreduce(l0, op=MPI.SUM)
reminder = []
perturbance = []
for i in range(0, n):
l1 = cost(m_0+p)
l1 = MPI.COMM_WORLD.allreduce(l1, op=MPI.SUM)
g = np.sum(g0)#.array)
g = MPI.COMM_WORLD.allreduce(g, op=MPI.SUM)
reminder.append(l1 - l0 - g*p)
perturbance.append(p)
p /= 2
conv_rate = convergence_rates(reminder, perturbance)
return reminder, perturbance, conv_rate
# Compute convergence rate (they should be 2.0 for small p)
def convergence_rates(r, p):
cr = [] # convergence rates
for i in range(1, len(p)):
cr.append(np.log(r[i] / r[i - 1])
/ np.log(p[i] / p[i - 1]))
return cr
# Taylor test
m_0 = np.zeros(num_steps, dtype=PETSc.ScalarType)
r = taylor_test(J_cost, grad_J, m_0, p=1e-4)
print("Convergence rate:", r[2])
Does anyone know what I am doing wrong?
Also, if anyone has a question regarding the problem statement or my code, please let me know. Thanks!