Over the weekend I tried building a Neural Network, which improves using an evolutionary algorithm. I ran it for 5000 generations in the Cartpole environment of openai (https://www.openai.com/) but it doesn't improve really well. The Neural Network has 4 Inputs, 1 Hidden Layer with 3 Units, 1 Output and the Network uses tanH as activation function. Every generation has 100 individuals, 5 of them are chosen to form the next generation, with a 20% chance of mutation. Here's the Code for better understanding:
import operator
import gym
import math
import random
import numpy
import matplotlib.pyplot as plt
env = gym.make('CartPole-v0')
generations = 100
input_units = 4
Hidden_units = 3
output_units = 1
individuals = 100
fitest1 = []
fitest2 = []
def Neural_Network(x, weights1, weights2):
global output
output = list(map(operator.mul, x, weights1))
output = numpy.tanh(output)
output = list(map(operator.mul, output, weights2))
output = sum(output)
return(output)
weights1 = [[random.random() for i in range(input_units*Hidden_units)] for j in range(individuals)]
weights2 = [[random.random() for i in range(Hidden_units*output_units)] for j in range(individuals)]
fit_plot = []
for g in range(generations):
print('generation:',g+1)
fitness=[0 for f in range(individuals)]
prev_obs = []
observation = env.reset()
for w in weights1:
print(' individual ',weights1.index(w)+1, ' of ', len(weights1))
env.reset()
for t in range(500):
#env.render()
Neural_Network(observation, weights1[weights1.index(w)], weights2[weights1.index(w)])
action = output < 0.5
observation, reward, done, info = env.step(action)
fitness[weights1.index(w)]+=reward
if done:
break
print(' individual fitness:', fitness[weights1.index(w)])
print('min fitness:', min(fitness))
print('max fitness:', max(fitness))
print('average fitness:', sum(fitness)/len(fitness))
fit_plot.append(sum(fitness)/len(fitness))
for f in range(10):
fitest1.append(weights1[fitness.index(max(fitness))])
fitest2.append(weights2[fitness.index(max(fitness))])
fitness[fitness.index(max(fitness))] = -1000000000
for x in range(len(weights1)):
for y in range(len(weights1[x])):
weights1[x][y]=random.choice(fitest1)[y]
if random.randint(1,5) == 1:
weights1[random.randint(0, len(weights1)-1)][random.randint(0, len(weights1[0])-1)] += random.choice([0.1, -0.1])
for x in range(len(weights2)):
for y in range(len(weights2[x])):
weights2[x][y]=random.choice(fitest2)[y]
if random.randint(1,5) == 1:
weights1[random.randint(0, len(weights1)-1)][random.randint(0, len(weights1[0])-1)] += random.choice([0.1, -0.1])
plt.axis([0,generations,0,100])
plt.ylabel('fitness')
plt.xlabel('generations')
plt.plot(range(0,generations), fit_plot)
plt.show()
env.reset()
for t in range(100):
env.render()
Neural_Network(observation, fitest1[0], fitest2[0])
action = output < 0.5
observation, reward, done, info = env.step(action)
if done:
break
And if anyone wonders, a graph of the average fitness over the generations (I only ran it for 100 generations this time)
If there are still any Questions, just ask.
My opinion is that in evolutionary algorithm you are not choosing correct individuals at the end of EA. Ensure that you are choosing the best 2 individuals(could work with only one but we want to be better than that :) ) for new generation. This should improve a desired result :)