Neural Network works with ReLu bot not sigmoid

22 Views Asked by At

I am coding my own Neural Networks using Backpropagation, and have tried a simple pattern to test if it works: the inputs are 3 neurons and the output 2, the first output value being the XOR result of the first two input neurons and the second output one being the 3rd input neuron.

I have made the code below, and it works fine with a Leaky ReLu activation function which lets me think the backpropagation is correct, but the same code can't handle a sigmoid function. Is there something I am not seeing?

import numpy as np
import random
import time
import keyboard
import jsonpickle

start_time = time.time()

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def der_sigmoid(x):
    ex = np.exp(-x)
    return ex / ((1 + ex) ** 2)

def leaky_ReLu(x):
    return x * 0.01 if x < 0 else x


def der_leaky_ReLu(x):
    return 0.01 if x < 0 else 1


class Net:
    def __init__(self, topology, learning_rate=0.1):
        self.topology = topology
        self.learning_rate = learning_rate
        self.weights = [np.random.rand(topology[i + 1], topology[i])*0.5-0.25 for i in range(len(topology) - 1)]
        self.biases = [None] + [np.random.rand(topology[i + 1])*0.5-0.25 for i in range(len(topology) - 1)]

        self.activation = np.vectorize(sigmoid)
        self.der_activation = np.vectorize(der_sigmoid)

        self.weight_derivative = [np.zeros((topology[i + 1], topology[i])) for i in range(len(topology) - 1)]
        self.bias_derivative = [None] + [np.zeros(topology[i + 1]) for i in range(len(topology) - 1)]

    def get_output(self, input: np.ndarray):
        if len(input) != self.topology[0]:
            raise ValueError("Expected input size {0} but got {1}".format(self.topology[0], len(input)))

        values = input
        for i in range(len(self.weights)):
            values = self.activation(np.matmul(self.weights[i], values) + self.biases[i + 1])

        return values

    def with_backpropagation(self, input: np.ndarray, expected_output: np.ndarray):
        if len(input) != self.topology[0]:
            raise ValueError("Expected input size {0} but got {1}".format(self.topology[0], len(input)))
        if len(expected_output) != self.topology[-1]:
            raise ValueError("Expected output size {0} but got {1}".format(self.topology[1], len(expected_output)))

        layer_values = [input]
        z_values = [0]
        values = input
        for i in range(len(self.weights)):
            z = np.clip(np.matmul(self.weights[i], values) + self.biases[i + 1], -300, 300)
            values = self.activation(z)
            z_values.append(z)
            layer_values.append(values)

        values_vs_cost = [None] * (len(self.topology) - 1) + [2 * (layer_values[-1] - expected_output)]
        for i in range(len(self.topology) - 1, 0, -1):
            der_z = self.der_activation(z_values[i])

            self.bias_derivative[i] += der_z * values_vs_cost[i]
            values_vs_cost[i-1] = np.array([np.sum(self.weights[i-1][:, j]* der_z *values_vs_cost[i]) for j in range(self.topology[i-1])])
            self.weight_derivative[i - 1] += np.array([[layer_values[i - 1][k] * der_z[j] * values_vs_cost[i][j] for k in range(self.topology[i - 1])] for j in range(self.topology[i])])

    def reset_derivatives(self):
        self.weight_derivative = [np.zeros((self.topology[i + 1], self.topology[i])) for i in
                                  range(len(self.topology) - 1)]
        self.bias_derivative = [None] + [np.zeros(self.topology[i + 1]) for i in range(len(self.topology) - 1)]

    def train(self, training_data, epochs=500, batch_size=20):
        for epoch in range(epochs):
            print("epoch=", epoch)
            for _ in range(batch_size):
                input, output = training_data[random.randint(0, len(training_data) - 1)]
                self.with_backpropagation(np.array(input), np.array(output))

            for a in range(len(self.topology)):
                if a != len(self.topology) - 1:
                    self.weights[a] -= self.weight_derivative[a] / batch_size * self.learning_rate
                if a != 0:
                    self.biases[a] -= self.bias_derivative[a] / batch_size * self.learning_rate

            self.reset_derivatives()

    def train2(self, training_data, test_data, epochs=500, batch_size=20):
        #keyboard.on_press_key("g", self.save_data)

        for epoch in range(epochs):
            print("epoch=",epoch)
            permutation = np.random.permutation(training_data)

            for i in range(len(training_data)):
                print("|", end="")

                input, output = permutation[i]

                self.with_backpropagation(np.array(input), np.array(output))

                if (i!=0 and i%batch_size == 0) or i == len(training_data)-1:
                    print()
                    for a in range(len(self.topology)):
                        if a != len(self.topology) - 1:
                            self.weights[a] -= self.weight_derivative[a] / batch_size * self.learning_rate
                        if a != 0:
                            self.biases[a] -= self.bias_derivative[a] / batch_size * self.learning_rate

                    self.reset_derivatives()

            test_sample = test_data[random.randint(0, len(test_data) - 1)]
            expectedoutput_test = test_sample[1]
            actualoutput = self.get_output(test_sample[0])

            error = sum([(expectedoutput_test[i] - actualoutput[i]) ** 2 for i in range(len(expectedoutput_test))])
            print()
            print("Epoch {0}: Error {1}      Expected {2} and Got {3}".format(epoch, error, expectedoutput_test,
                                                                              actualoutput))

            self.save_data()

        self.save_data()

    def save_data(self,evt=None):
        print("Initialising saving...")
        json_net = jsonpickle.encode(self)
        open('./save_net.json', 'w').close()
        with open("./save_net.json", 'w') as f:
            f.write(json_net)
        print('Net succesfully saved')


nums = [0]*10
def format_data(old_version):
    lr = np.arange(10)

    desired_number=old_version[0]
    nums[int(desired_number)] += 1
    inputs=old_version[1:].tolist()

    return [inputs, (lr == desired_number).astype(np.int).tolist()]

if __name__ == "__main__":
    test = Net([3, 2, 2])
    
    examples = [[[0, 0, 1], [0, 1]], [[1, 0, 1], [1, 1]], [[0, 1, 1], [1, 1]], [[0, 0, 0], [0, 0]], [[1, 0, 0], [1, 0]], [[1, 1, 0], [0, 0]]]
    
    
    
    test.train(examples, 10000)

    #test.train(examples)
    
    print("After")
    print(test.get_output(np.array([1, 1, 1])))
    print(test.get_output(np.array([1, 1, 0])))
    print(test.get_output(np.array([0, 1, 0])))
    print(test.get_output(np.array([0, 1, 1])))
    print(test.get_output(np.array([1, 0, 0])))


    print("Took {} seconds".format(time.time()-start_time))
0

There are 0 best solutions below