I made this Neural Network but something is wrong with my backward derivatives which cause the wights to be 'nan' and the prediction to be very low.
I used Softmax and Cross entropy loss.
class Neural_Network:
def __init__(self, input_size, output_size, hidden_size):
# parameters
self.inputSize = input_size
self.outputSize = output_size
self.hiddenSize = hidden_size
# weights
self.W1 = torch.randn(self.inputSize, self.hiddenSize)
self.b1 = torch.zeros(self.hiddenSize)
self.W2 = torch.randn(self.hiddenSize, int(self.hiddenSize/2))
self.b2 = torch.zeros(int(self.hiddenSize/2))
self.W3 = torch.randn(int(self.hiddenSize/2), self.outputSize)
self.b3 = torch.zeros(self.outputSize)
def forward(self, X):
self.z1 = torch.matmul(X, self.W1) + self.b1
self.h1 = tanh(self.z1)
self.z2 = torch.matmul(self.h1, self.W2) + self.b2
self.h2 = tanh(self.z2)
self.z3 = torch.matmul(self.h2, self.W3) + self.b3
return softmax(self.z3)
def backward(self, X, y, y_hat, lr=.1):
batch_size=y.size(0)
y_hat1 = tnnfunc.one_hot(y_hat)
y1 = tnnfunc.one_hot(y)
if y1.shape[1]<10:
print('enter y1 reshape')
diff = 10-y1.shape[1]
add = torch.zeros(y1.shape[0],diff)
y1 = torch.cat((y1,add),1)
#y1=y1_T.T
if y_hat1.shape[1]<10:
print('enter y1_hat reshape')
diff = 10-y_hat1.shape[1]
add = torch.zeros(y_hat1.shape[0],diff)
y_hat1 = torch.cat((y_hat1,add),1)
dl_dz3=(1/batch_size)*(y_hat1-y1)
dl_dh2=torch.matmul(dl_dz3,torch.t(self.W3))
dl_dz2=dl_dh2*(self.h2)
dl_dh1=torch.matmul(dl_dz2,torch.t(self.W2))
dl_dz1=dl_dh1*self.h1
self.W1 -= lr*torch.matmul(torch.t(X), dl_dz1)
self.b1 -= lr*torch.matmul(torch.t(dl_dz1), torch.ones(batch_size))
self.W2 -= lr*torch.matmul(torch.t(self.h1), dl_dz2)
self.b2 -= lr*torch.matmul(torch.t(dl_dz2), torch.ones(batch_size))
self.W3 -= lr*torch.matmul(torch.t(self.h2), dl_dz3)
self.b3 -= lr*torch.matmul(torch.t(dl_dz3), torch.ones(batch_size))
def train(self, X, y):
print('enter training')
# forward + backward pass for training
o = self.forward(X)
_, predicted = torch.max(o.data, 1)
self.backward(X, y, predicted)