I have been working on my first neural net, building it completely from scratch. However when printing the cost function to track the models progress it only rises, the data I am using is just 1s,0s I wanted something simple for my first model. It has one hidden layer of two tanh nodes and then outputs into a sigmoid unit.
Code is below, copied from markdown version of jupyter notebook:
import numpy as np
import matplotlib.pyplot as plt
#creating our data
x = np.array([[0, 1, 0, 1], [0, 1, 0, 1], [1, 0, 1, 0], [1, 0, 1, 0], [0, 1, 0, 1]])
y = np.array([0, 1, 0, 1])
y = y.reshape(1, 4)
print(x)
[[0 1 0 1]
[0 1 0 1]
[1 0 1 0]
[1 0 1 0]
[0 1 0 1]]
print(y)
[[0 1 0 1]]
print(x.shape)
(5, 4)
print(y.shape)
(1, 4)
#initalize parameters
def rand_params():
W1 = np.random.randn(2, 5)
b1 = np.zeros([2, 1])
W2 = np.random.randn(1, 2)
b2 = np.zeros([1, 1])
return W1, b1, W2, b2
W1, b1, W2, b2 = rand_params()
print(f"W1: {W1}, b1: {b1}")
print(W1.shape, b1.shape)
W1: [[ 0.60366603 -0.12225707 -0.44483219 -1.40200651 -3.02768333]
[-0.98659326 -0.91009808 0.72461745 0.20677563 0.17493105]], b1: [[0.]
[0.]]
(2, 5) (2, 1)
print(f"W2: {W2}, b2: {b2}")
print(W2.shape, b2.shape)
W2: [[0.05478931 0.99102802]], b2: [[0.]]
(1, 2) (1, 1)
#forward propogation
def tanh(z):
a = (np.exp(z) - np.exp(-z)) / (np.exp(z) + np.exp(-z))
return a
def sigmoid(z):
a = 1 / (1 + np.exp(z))
return a
def der_tanh(z):
a = 1 - (tanh(z))**2
return a
def der_sigmoid(z):
a = sigmoid(z) * (1 - sigmoid(z))
# return a <-- MISSING?
#forward computation
def forward_prop(x, W1, b1, W2, b2):
Z1 = np.dot(W1, x) + b1
A1 = np.tanh(Z1)
Z2 = np.dot(W2, A1) + b2
y_hat = sigmoid(Z2)
return Z1, A1, Z2, y_hat
Z1, A1, Z2, y_hat = forward_prop(x, W1, b1, W2, b2)
def cost_function(y, y_hat, x):
m = x.shape[1]
J = -1 / m * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
return J, m
J, m = cost_function(y, y_hat, x)
#back propogation
def back_prop():
dZ2 = y_hat - y
dW2 = 1 / m * np.dot(dZ2, A1.T)
db2 = 1 / m * np.sum(dZ2, axis=1, keepdims=True)
dZ1 = np.dot(W2.T, dZ2) * der_tanh(Z1)
dW1 = 1 / m * np.dot(dZ1, x.T)
db1 = 1 / m * np.sum(dZ1, axis=1, keepdims=True)
return dW2, db2, dW1, db1
dW2, db2, dW1, db1 = back_prop()
#optimizing weights + biases
def update(W1, b1, W2, b2):
lr = 0.01
W1 = W1 - lr * dW1
b1 = b1 - lr * db1
W2 = W2 - lr * dW2
b2 = b2 - lr * db2
return W1, b1, W2, b2
W1, b1, W2, b2 = update(W1, b1, W2, b2)
# model
costs = []
W1, b1, W2, b2 = rand_params()
for epoch in range(1500):
Z1, A1, Z2, y_hat = forward_prop(x, W1, b1, W2, b2)
J, m = cost_function(y, y_hat, x)
if epoch % 100 == 0:
print(J)
costs.append(J)
dW2, db2, dW1, db1 = back_prop()
W1, b1, W2, b2 = update(W1, b1, W2, b2)
plt.plot(costs)
0.8188282199860928
1.1665507761146539
1.6868025884074527
2.3940967534280753
3.2473658397522387
4.183790888527539
5.158135855432985
6.147978715339146
7.143956636487831
8.142392777023431
9.141860280152706
10.141802197682296
11.142002210070622
12.142384342966537
13.142939005842882
Apart from any other possible bugs, sigmoid(z) should be defined as: