I am working with Cython to speed up some python code and I am running into the following error:
Traceback (most recent call last):
File "d:\ReinforcementLearning\BaseLines\A\Cythonver\testing.py", line 1, in <module>
import RL_Cython
File "RL_Cython.pyx", line 524, in init RL_Cython
agent.fit(iterations = 3, checkpoint = 1)
File "RL_Cython.pyx", line 430, in RL_Cython.Agent.fit
self.es.train(iterations, print_every = checkpoint)
File "RL_Cython.pyx", line 105, in RL_Cython.Deep_Evolution_Strategy.train
cpdef train(self, int epoch = 100, int print_every = 1):
File "RL_Cython.pyx", line 120, in RL_Cython.Deep_Evolution_Strategy.train
weights_population = self._get_weight_from_population(self.weights, population[k])
TypeError: an integer is required
The code looks like the following:
cdef class Deep_Evolution_Strategy:
cdef list weights
cdef double sigma, learning_rate
cdef int population_size
cdef public reward_function
inputs = None
def __init__(self, weights, reward_function, population_size, sigma, learning_rate):
self.weights = weights
self.reward_function = reward_function
self.population_size = population_size
self.sigma = sigma
self.learning_rate = learning_rate
cpdef _get_weight_from_population(self, list weights, int population):
cdef list weights_population = []
for index, i in enumerate(population):
jittered = self.sigma * i
weights_population.append(weights[index] + jittered)
print(type(weights_population))
return weights_population
cdef public list get_weights(self):
return self.weights
cpdef train(self, int epoch = 100, int print_every = 1):
lasttime = time.time()
cdef list population
cdef int i
cdef rewards
cdef int k
for i in range(epoch):
population = []
rewards = np.zeros(self.population_size)
for k in range(self.population_size):
x = []
for w in self.weights:
x.append(np.random.randn(*w.shape))
population.append(x)
for k in range(self.population_size):
weights_population = self._get_weight_from_population(self.weights, population[k])
rewards[k] = self.reward_function(weights_population)
rewards = (rewards - np.mean(rewards)) / (np.std(rewards) + 1e-7)
for index, w in enumerate(self.weights):
A = np.array([p[index] for p in population])
self.weights[index] = (
w
+ self.learning_rate
/ (self.population_size * self.sigma)
* np.dot(A.T, rewards).T
)
if (i + 1) % print_every == 0:
print('iter %d. reward: %f' % (i + 1, self.reward_function(self.weights)))
I've already done print(type(k, self.weights, population)). Self.weights and population are lists as it should be and k is an integer. So I don't know why this error is popping up in the first place.
The problem lies in this part of your code:
Apparently,
population[k]
is not an integer.Now, look at this part of your code:
There you are defining a
list
, and appending numbers to it. You are then appending thelist
into thepopulation
list, which you later iterate through to use for those classes:As you can see, you cannot pass a
list
as thepopulation
parameter, as you told python it should be anint
:int population