This code is supposed to construct a network composed of two sections. first section is an mlp with a single output(u) and the second part consists of a vector [du/dx, -du/dt] and I'm training this vector output based on the input X = [x,t]. But when I run the code it returns nonetype parameters. What do you guys think is wrong?
import torch
import torch.nn as nn
from torch.nn import functional as F
from collections import OrderedDict
import numpy as np
from choose_optimizer import *
from functorch import make_functional
from functorch import vmap
from functorch import jacrev
`# CUDA support
if torch.cuda.is_available():
device = torch.device('cuda')
else:
device = torch.device('cpu')
def div(u):
"""Accepts a function u:R^D -> R^D."""
J = torch.func.jacrev(u)
return lambda x: torch.trace(J(x))
# the deep neural network
class DNN(torch.nn.Module):
def __init__(self, layers, activation, use_batch_norm=False, use_instance_norm=False):
super(DNN, self).__init__()
# parameters
self.depth = len(layers) - 1
if activation == 'identity':
self.activation = torch.nn.Identity
elif activation == 'tanh':
self.activation = torch.nn.Tanh
elif activation == 'relu':
self.activation = torch.nn.ReLU
elif activation == 'gelu':
self.activation = torch.nn.GELU
elif activation == 'sin':
self.activation = sin
self.use_batch_norm = use_batch_norm
self.use_instance_norm = use_instance_norm
layer_list = list()
for i in range(self.depth - 1):
layer_list.append(
('layer_%d' % i, torch.nn.Linear(layers[i], layers[i+1]))
)
if self.use_batch_norm:
layer_list.append(('batchnorm_%d' % i, torch.nn.BatchNorm1d(num_features=layers[i+1])))
if self.use_instance_norm:
layer_list.append(('instancenorm_%d' % i, torch.nn.InstanceNorm1d(num_features=layers[i+1])))
layer_list.append(('activation_%d' % i, self.activation()))
layer_list.append(
('layer_%d' % (self.depth - 1), torch.nn.Linear(layers[-2], layers[-1]))
)
layerDict = OrderedDict(layer_list)
# deploy layers
self.layers = torch.nn.Sequential(layerDict)
def output_vector(self):
#"""Returns an unbatched vector field, i.e. assumes input is a 1D tensor."""
F_fn, params = make_functional(self.layers)
print(len(params))
J_fn = torch.func.jacrev(F_fn, argnums=1)
def A_fn(params, x):
J = J_fn(params, x)
A = J - J.T
return A
def A_flat_fn(params, x):
A = A_fn(params, x)
A_flat = A.reshape(-1)
return A_flat
def ddF(params, x):
D = x.nelement()
dA_flat = torch.func.jacrev(A_flat_fn, argnums=1)(params, x)
Jac_all = dA_flat.reshape(D, D, D)
ddF = torch.vmap(torch.trace)(Jac_all)
return ddF
return ddF, params, A_fn
def forward(self, x):
u_fn, params, A_fn = output_vector()
u = torch.vmap(u_fn, in_dims=(None, 0))(params, x)
#out = self.layers(x)
return u
class PhysicsInformedNN_pbc():
"""PINNs (convection/diffusion/reaction) for periodic boundary conditions."""
def __init__(self, system, X_u_train, u_train, X_f_train, bc_lb, bc_ub, layers, G, nu, beta, rho, optimizer_name, lr,
net, L=1, activation='tanh', loss_style='mean'):
self.system = system
self.x_u = torch.tensor(X_u_train[:, 0:1], requires_grad=True).float().to(device)
self.t_u = torch.tensor(X_u_train[:, 1:2], requires_grad=True).float().to(device)
self.x_f = torch.tensor(X_f_train[:, 0:1], requires_grad=True).float().to(device)
self.t_f = torch.tensor(X_f_train[:, 1:2], requires_grad=True).float().to(device)
self.x_bc_lb = torch.tensor(bc_lb[:, 0:1], requires_grad=True).float().to(device)
self.t_bc_lb = torch.tensor(bc_lb[:, 1:2], requires_grad=True).float().to(device)
self.x_bc_ub = torch.tensor(bc_ub[:, 0:1], requires_grad=True).float().to(device)
self.t_bc_ub = torch.tensor(bc_ub[:, 1:2], requires_grad=True).float().to(device)
self.net = net
if self.net == 'DNN':
self.dnn = DNN(layers, activation).to(device)
else: # "pretrained" can be included in model path
# the dnn is within the PINNs class
self.dnn = torch.load(net).dnn
self.u = torch.tensor(u_train, requires_grad=True).float().to(device)
self.layers = layers
self.nu = nu
self.beta = beta
self.rho = rho
self.G = torch.tensor(G, requires_grad=True).float().to(device)
self.G = self.G.reshape(-1, 1)
self.L = L
self.lr = lr
self.optimizer_name = optimizer_name
self.optimizer = choose_optimizer(self.optimizer_name, self.dnn.parameters(), self.lr)
self.loss_style = loss_style
self.iter = 0
def net_u(self, x, t):
"""The standard DNN that takes (x,t) --> u."""
u = self.dnn(torch.cat([x, t], dim=1))
return u
def net_f(self, x, t):
""" Autograd for calculating the residual for different systems."""
u = self.net_u(x, t)
u_t = torch.autograd.grad(
u, t,
grad_outputs=torch.ones_like(u),
retain_graph=True,
create_graph=True
)[0]
u_x = torch.autograd.grad(
u, x,
grad_outputs=torch.ones_like(u),
retain_graph=True,
create_graph=True
)[0]
u_xx = torch.autograd.grad(
u_x, x,
grad_outputs=torch.ones_like(u_x),
retain_graph=True,
create_graph=True
)[0]
if 'convection' in self.system or 'diffusion' in self.system:
f = u_t - self.nu*u_xx + self.beta*u_x - self.G
elif 'rd' in self.system:
f = u_t - self.nu*u_xx - self.rho*u + self.rho*u**2
elif 'reaction' in self.system:
f = u_t - self.rho*u + self.rho*u**2
return f
def net_b_derivatives(self, u_lb, u_ub, x_bc_lb, x_bc_ub):
"""For taking BC derivatives."""
u_lb_x = torch.autograd.grad(
u_lb, x_bc_lb,
grad_outputs=torch.ones_like(u_lb),
retain_graph=True,
create_graph=True
)[0]
u_ub_x = torch.autograd.grad(
u_ub, x_bc_ub,
grad_outputs=torch.ones_like(u_ub),
retain_graph=True,
create_graph=True
)[0]
return u_lb_x, u_ub_x
def loss_pinn(self, verbose=True):
""" Loss function. """
if torch.is_grad_enabled():
self.optimizer.zero_grad()
u_pred = self.net_u(self.x_u, self.t_u)
u_pred_lb = self.net_u(self.x_bc_lb, self.t_bc_lb)
u_pred_ub = self.net_u(self.x_bc_ub, self.t_bc_ub)
if self.nu != 0:
u_pred_lb_x, u_pred_ub_x = self.net_b_derivatives(u_pred_lb, u_pred_ub, self.x_bc_lb, self.x_bc_ub)
#f_pred = self.net_f(self.x_f, self.t_f)
if self.loss_style == 'mean':
loss_u = torch.mean((self.u - u_pred) ** 2)
loss_b = torch.mean((u_pred_lb - u_pred_ub) ** 2)
if self.nu != 0:
loss_b += torch.mean((u_pred_lb_x - u_pred_ub_x) ** 2)
#loss_f = torch.mean(f_pred ** 2)
elif self.loss_style == 'sum':
loss_u = torch.sum((self.u - u_pred) ** 2)
loss_b = torch.sum((u_pred_lb - u_pred_ub) ** 2)
if self.nu != 0:
loss_b += torch.sum((u_pred_lb_x - u_pred_ub_x) ** 2)
#loss_f = torch.sum(f_pred ** 2)
loss = loss_u + loss_b #+ self.L*loss_f
print(loss)
if loss.requires_grad:
loss.backward()
grad_norm = 0
for p in self.dnn.parameters():
print(p.grad)
param_norm = p.grad.detach().data.norm(2)
grad_norm += param_norm.item() ** 2
grad_norm = grad_norm ** 0.5
if verbose:
if self.iter % 100 == 0:
print(
'epoch %d, gradient: %.5e, loss: %.5e, loss_u: %.5e, loss_b: %.5e, loss_f: %.5e' % (self.iter, grad_norm, loss.item(), loss_u.item(), loss_b.item(), loss_f.item())
)
self.iter += 1
return loss
def train(self):
self.dnn.train()
self.optimizer.step(self.loss_pinn)
def predict(self, X):
x = torch.tensor(X[:, 0:1], requires_grad=True).float().to(device)
t = torch.tensor(X[:, 1:2], requires_grad=True).float().to(device)
self.dnn.eval()
u = self.net_u(x, t)
u = u.detach().cpu().numpy()
return u
I tried different methods for calculating gradients of u with respect to x and t. but they all return the same error.