I made an env with Gym for Sudoku puzzle and I want to train an AI on it using KerasRL (I've removed the step reset and render method of the environment to not have too much code for StackOverflow). I use a Flatten and 3 dense layers for my model and compile it with Adam. I get a error when I try to compile it because the shape isn't the right one and i don't get why :
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam
from numpy.linalg.linalg import double
from numpy.core.numeric import ones
class Sudoku(Env):
#Fonction d'initialisaion
def __init__(self):
#Soit on met une case fixé et il y a 9 actions possibles (mettre un num de 1 à 9), sur la grille générale il y a 81 cases disponibles et 9 numéros possibles par case ->729
self.action_space = Discrete(729)
#81 cases qui prennent des valeurs entres 0 (case vide pour 0) et 9, le produit cartésien des deux donnent bien le nombre de grilles possibles au sudoku je pense
#chaque case est représenté par une case de tableau, le premier array correspond a la borne inferieur prise par chaque case (0) et le deuxieme par la borne sup 9, ça fait enormément d'état possibles
self.observation_space = Box(low= 0,high= 9, shape=(9,9),dtype=int)
#definition de l'état de départ que des zeros dans les 81 cases (vides)
self.state = np.zeros((9,9),dtype=int)
#fonction application d'action
def step(self,action):
return self.state , reward , done , info
def render(self, mode):
def reset(self):
return self.state
def build_model(states,actions):
model=Sequential()
model.add(Input(shape=states))
model.add(Flatten())
model.add(Dense(100,activation='relu'))
model.add(Dense(100,activation='relu'))
model.add(Dense(actions,activation='linear'))
return model
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_agent(model,actions):
policy= BoltzmannQPolicy()
memory= SequentialMemory(limit=50000,window_length=1)
agent= DQNAgent(model=model,memory=memory,policy=policy,nb_actions=actions,nb_steps_warmup=10,target_model_update=1e-2)
return agent
env=Sudoku()
states=env.observation_space.shape
print(states)
actions=env.action_space.n
model = build_model(states,actions)
model.summary()
agent = build_agent(model,actions)
agent.compile(Adam(lr=1e-3),metrics=['mae'])
agent.fit(env,nb_steps=20000,visualize=False,verbose=1)
I get this error :
Training for 20000 steps ... Interval 1 (0 steps performed) --------------------------------------------------------------------------- ValueError Traceback (most recent call last) in 1 agent.compile(Adam(lr=1e-3),metrics=['mae']) ----> 2 agent.fit(env,nb_steps=20000,visualize=False,verbose=1)
7 frames /usr/local/lib/python3.8/dist-packages/tensorflow/python/keras/engine/training_utils.py in standardize_input_data(data, names, shapes, check_batch_axis, exception_prefix) 569 shape = shapes[i] 570 if len(data_shape) != len(shape): --> 571 raise ValueError('Error when checking ' + exception_prefix + 572 ': expected ' + names[i] + ' to have ' + 573 str(len(shape)) + ' dimensions, but got array '
ValueError: Error when checking input: expected input_3 to have 3 dimensions, but got array with shape (1, 1, 9, 9)