I build a network with 2 inputs(namely x1,x2) and 2 outputs(namely y1, y2). I use dataloader to feed the data to my network, and the batch size is 100? How can I get the partial derivative of y1 over x1 and partial derivative of y2 over x2 for every piece of data in the batch?
Here is some of my code:
class NeuralNetwork(nn.Module):
def __init__(self):
super(NeuralNetwork, self).__init__()
self.input_layer = nn.Linear(2, 5)
self.hidden_layers = nn.ModuleList([nn.Linear(5, 5) for _ in range(3)])
self.output_layer = nn.Linear(5, 2)
self.activation = nn.Tanh()
def forward(self, x):
x = self.activation(self.input_layer(x))
for hidden_layer in self.hidden_layers:
x = self.activation(hidden_layer(x))
x = self.output_layer(x)
return x
model = NeuralNetwork()
train_loader = DataLoader(data, batch_size= 100, shuffle=True)
for epoch in range(iterations):
for i, (x_train, y_label) in enumerate(train_loader, 0):
# x_train is 100*2, y_label is 100*2, y_net is 100*2
x_train = autograd.Variable(x_train, requires_grad=True)
y_net = model(x_train.float())
a10 = torch.tensor([1.0,0.0], requires_grad=True)
a01 = torch.tensor([0.0,1.0], requires_grad=True)
grad_output1 = torch.ones_like(y_net)
grad_output1[:, 1] = 0
dY1dX = autograd.grad(y_net, x_train, grad_outputs=grad_output1, create_graph=True, retain_graph=True)[0]
grad_output2 = torch.ones_like(y_net)
grad_output2[:, 0] = 0
dY2dX = autograd.grad(y_net, x_train, grad_outputs=grad_output2, create_graph=True, retain_graph=True)[0]
I am not sure is this is the right way and whether I can get the right partial derivative.