Torch self implementation of Neural Network

81 Views Asked by At

I made this Neural Network but something is wrong with my backward derivatives which cause the wights to be 'nan' and the prediction to be very low.

I used Softmax and Cross entropy loss.

class Neural_Network:
    def __init__(self, input_size, output_size, hidden_size):
        # parameters
        self.inputSize = input_size
        self.outputSize = output_size
        self.hiddenSize = hidden_size
        
        # weights
        self.W1 = torch.randn(self.inputSize, self.hiddenSize)
        self.b1 = torch.zeros(self.hiddenSize) 
        
        self.W2 = torch.randn(self.hiddenSize, int(self.hiddenSize/2))
        self.b2 = torch.zeros(int(self.hiddenSize/2))
        
        self.W3 = torch.randn(int(self.hiddenSize/2), self.outputSize)
        self.b3 = torch.zeros(self.outputSize)

    def forward(self, X):
        self.z1 = torch.matmul(X, self.W1) + self.b1
        self.h1 = tanh(self.z1) 
        self.z2 = torch.matmul(self.h1, self.W2) + self.b2
        self.h2 = tanh(self.z2)
        self.z3 = torch.matmul(self.h2, self.W3) + self.b3
        
        return softmax(self.z3)
    
    def backward(self, X, y, y_hat, lr=.1):
        batch_size=y.size(0)

       
        y_hat1 = tnnfunc.one_hot(y_hat)
        y1 = tnnfunc.one_hot(y)

        if y1.shape[1]<10:
          print('enter y1 reshape')
          diff = 10-y1.shape[1]
          add = torch.zeros(y1.shape[0],diff)
          y1 = torch.cat((y1,add),1)
          #y1=y1_T.T

        if y_hat1.shape[1]<10:
          print('enter y1_hat reshape')
          diff = 10-y_hat1.shape[1]
          add = torch.zeros(y_hat1.shape[0],diff)
          y_hat1 = torch.cat((y_hat1,add),1)
          
        dl_dz3=(1/batch_size)*(y_hat1-y1)
        dl_dh2=torch.matmul(dl_dz3,torch.t(self.W3))
        dl_dz2=dl_dh2*(self.h2)
        dl_dh1=torch.matmul(dl_dz2,torch.t(self.W2))
        dl_dz1=dl_dh1*self.h1
                  
        self.W1 -= lr*torch.matmul(torch.t(X), dl_dz1)
        self.b1 -= lr*torch.matmul(torch.t(dl_dz1), torch.ones(batch_size))
        self.W2 -= lr*torch.matmul(torch.t(self.h1), dl_dz2)
        self.b2 -= lr*torch.matmul(torch.t(dl_dz2), torch.ones(batch_size))
        self.W3 -= lr*torch.matmul(torch.t(self.h2), dl_dz3)
        self.b3 -= lr*torch.matmul(torch.t(dl_dz3), torch.ones(batch_size))
        
    def train(self, X, y):
        print('enter training')
        # forward + backward pass for training
        o = self.forward(X)
        _, predicted = torch.max(o.data, 1)
        self.backward(X, y, predicted)
0

There are 0 best solutions below