Hello,
I have created a simplified version of the code with a reduced dataset to investigate a performance issue. However, I noticed that vector_time
increases with each epoch, even in this simplified setup.
Below is the code I used, along with comments explaining each section.
Code
from fenics import *
from fenics_adjoint import *
import numpy as np
import torch
import torch.nn as nn
import time, datetime
from tqdm import tqdm
import gc
# Set logging level to WARNING to suppress information messages
set_log_level(LogLevel.WARNING)
# Define the boundary condition function
def boundary(x, on_boundary):
return on_boundary
# Define a function to generate coefficient function k(x)
def make_k(x):
return np.ones_like(x)
# Generate Dirichlet boundary condition (u_bc) and source term (f) expressions
def generate_u_bc_and_f(a, b):
u_bc_expr = Expression('cos(a*pi*x[0]) * cos(b*pi*x[1])',
degree=3, pi=np.pi, a=a, b=b)
f_expr = Expression('(a*a + b*b)*pi*pi*cos(a*pi*x[0])*cos(b*pi*x[1])',
degree=3, pi=np.pi, a=a, b=b)
return u_bc_expr, f_expr
# Define a simple neural network for k(x)
class kx_Net(nn.Module):
def __init__(self, layers):
super(kx_Net, self).__init__()
self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
self.activation = nn.ReLU()
def forward(self, x):
for i in range(len(self.linears)-1):
x = self.linears[i](x)
x = self.activation(x)
k = self.linears[-1](x)
return k
# Function to manually update model gradients
def manual_update_model_gradients(model, grad_L_theta):
for param, grad in zip(model.parameters(), grad_L_theta):
if param.grad is None:
param.grad = torch.zeros_like(param)
param.grad.add_(grad)
# Function to solve the FEniCS problem
def solve_fenics_problem(k, V, u_bc_expr, f_expr):
"""
Solves the FEniCS problem and extracts Neumann data.
"""
# Define Dirichlet boundary condition
u_bc = DirichletBC(V, u_bc_expr, boundary)
# Define the variational problem
u = TrialFunction(V)
v = TestFunction(V)
F = k * dot(grad(u), grad(v)) * dx - f_expr * v * dx
a, L = lhs(F), rhs(F)
# Solve the FEniCS problem
u_sol = Function(V)
A, b = assemble_system(a, L, u_bc)
solve(A, u_sol.vector(), b)
# Free memory by deleting temporary objects
del u_bc, u, v, F, a, L, A, b
return u_sol
# Mesh and function space setup
mesh = UnitSquareMesh(256, 256)
V = FunctionSpace(mesh, 'P', 1)
# Prepare data
data_num = 2**8
dof_coordinates = V.tabulate_dof_coordinates().reshape((-1, 2))
dof_coordinates_tensor = torch.tensor(dof_coordinates, dtype=torch.float32)
k_exact = make_k(dof_coordinates_tensor[:,[0]])
k_exact_func = Function(V)
k_exact_func.vector().set_local(k_exact[:,0])
# Random parameters for generating u_bc and f
rand_para = np.random.uniform(0, 2, (data_num, 2))
EPOCHS = 5
# Training loop
def Train(V, data_num, epoch):
pbar = tqdm(range(data_num), desc=f"Epoch {epoch}/{EPOCHS}")
compute_grad_time = vector_time = 0
for batch_idx in pbar:
a, b = rand_para[batch_idx]
# Zero out gradients for the optimizer
optimizer_k.zero_grad()
# Compute k(x) using the neural network
k_tensor = model_k(dof_coordinates_tensor)
k_func = Function(V)
k_func.vector().set_local(k_tensor[:,0].detach().cpu().numpy())
# Solve the FEniCS problems
u_bc_expr, f_expr = generate_u_bc_and_f(a, b)
u_output = solve_fenics_problem(k_func, V, u_bc_expr, f_expr)
target_u = solve_fenics_problem(k_exact_func, V, u_bc_expr, f_expr)
# Define Neumann flux expressions
neum_output_func = k_func * u_output
neum_exact_func = k_exact_func * target_u
# Compute the difference in Neumann flux
diff_neum_func = neum_output_func - neum_exact_func
# Define the loss function
L = assemble(0.5 * inner(diff_neum_func, diff_neum_func) * dx)
control_k = Control(k_func)
# Gradient computation
compute_grad_start_time = time.time()
dLdk = ReducedFunctional(L, control_k)
compute_grad_end_time = time.time()
# Retrieve vector values
vector_start_time = time.time()
grad_L_k = dLdk.derivative().vector().get_local().mean()
vector_end_time = time.time()
# Compute gradients and update model parameters
grad_k_theta = torch.autograd.grad(outputs=k_tensor.sum(), inputs=model_k.parameters())
grad_L_theta = [grad_L_k * gk for gk in grad_k_theta]
manual_update_model_gradients(model_k, grad_L_theta)
optimizer_k.step()
# Update timing metrics
compute_grad_time += compute_grad_end_time - compute_grad_start_time
vector_time += vector_end_time - vector_start_time
# Free memory
del target_u, k_func, u_bc_expr, f_expr, u_output, neum_output_func, neum_exact_func, diff_neum_func, control_k, dLdk
gc.collect()
return compute_grad_time, vector_time
# Define neural network architecture
layers = [2, 100, 100, 1]
model_k = kx_Net(layers)
optimizer_k = torch.optim.Adam(model_k.parameters(), lr=0.001)
# Run training loop
for epoch in range(1, EPOCHS + 1):
compute_grad_time, vector_time = Train(V, data_num, epoch)
print(f"compute_grad_time = {str(datetime.timedelta(seconds=int(compute_grad_time)))}")
print(f"vector_time = {str(datetime.timedelta(seconds=int(vector_time)))}")
Results
Here are the training results:
Epoch 1/5:
compute_grad_time = 0:00:00
vector_time = 0:01:28
Epoch 2/5:
compute_grad_time = 0:00:00
vector_time = 0:01:32
Epoch 3/5:
compute_grad_time = 0:00:00
vector_time = 0:01:35
Epoch 4/5:
compute_grad_time = 0:00:00
vector_time = 0:01:38
Epoch 5/5:
compute_grad_time = 0:00:00
vector_time = 0:01:42
Questions
- Could there be a memory or computation graph accumulation issue in
ReducedFunctional
or dLdk.derivative()
?
- Are there any optimizations or best practices to prevent this increasing runtime in FEniCS-adjoint?
Any insights or suggestions would be greatly appreciated. Thank you! 