I need to implement a Neural Network with only numpy, which gets two inputs, has one hidden layer, which uses ReLU as activation function, and one ouput layer, which uses sigmoid as activation. The loss I need to use is cross binary entropy. When I train my NN the output for every input is around 0.5. I think my problem is the backpropagation, I'm unsure if I implemented it correct. Or the error is somewhere else in my code. Maybe someone can help me.
import numpy as np
from tqdm import tqdm
X = np.array([[0, 0],
[0, 1],
[1, 0],
[1, 1]])
y = np.array([[0],
[1],
[1],
[0]])
def ReLU(x):
return np.maximum(0, x)
def d_ReLU(x):
return np.where(x > 0, 1, 0)
def sigmoid(x):
return 1/(1 + np.exp(-x))
def d_sigmoid(x):
return sigmoid(x) * (1 - sigmoid(x))
def binary_cross_entropy(y, y_pred):
loss = np.mean(-(y * np.log(y_pred) + (1 - y) * np.log(1-y_pred)))
return loss
def d_binary_cross_entropy(y, y_pred):
loss = np.where(y == 1, -1/y_pred, 1/(1 - y_pred))
return loss
class NeuralNetwork():
def __init__(self, input_size, hidden_size, output_size, learning_rate=0.1):
self.input_size = input_size
self.hidden_size = hidden_size
self.output_size = output_size
self.learning_rate = learning_rate
self.weights_hidden = np.random.uniform(size=(input_size, hidden_size))
self.weights_output = np.random.uniform(size=(hidden_size, output_size))
def forward_pass(self, X):
self.output_hidden = ReLU(np.dot(X, self.weights_hidden))
output = sigmoid(np.dot(self.output_hidden, self.weights_output))
return output
def backward_pass(self, X, y, y_pred):
output_delta = d_binary_cross_entropy(y, y_pred) * d_sigmoid(y_pred)
hidden_error = output_delta.dot(self.weights_output.T)
hidden_delta = hidden_error * d_ReLU(self.output_hidden)
self.weights_output -= self.learning_rate * self.output_hidden.T.dot(output_delta)
self.weights_hidden -= self.learning_rate * X.T.dot(hidden_delta)
def train(self, X, y, epochs):
for epoch in range(epochs):
output = self.forward_pass(X)
loss = binary_cross_entropy(y, output)
self.backward_pass(X, y, output)
print(f"Epoch {epoch + 1}/{epochs} - Loss: {loss:.4f}")
print(self.forward_pass(X))
Some corrections are needed in your code:
The first is if you return directly 0 in your ReLU function, you lost gradients. You sould multiply the return value from ReLU with relatively small number i.e. 0.0001 to avoid this issue.
Second, you should select smaller learning rate and bigger epoch to get proper results.
Corrected and working code as follows: