I am trying to write a deep-q learning agent that learns to play tetris.
I have all the game logic done and am now writing the environment using tensorflow tf_agents.
Here is my code :
import numpy as np
from tf_agents.trajectories import time_step as ts
from src.controllers import GameController as gc
from src.controllers import TetriminoController as tc
import tf_agents as tfa
import pygame
class TetrisEnvironment(tfa.environments.PyEnvironment):
def __init__(self,screen):
self._action_spec = tfa.specs.array_spec.BoundedArraySpec(
shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
self._observation_spec = tfa.specs.array_spec.BoundedArraySpec(
shape=(18,10), dtype=np.int32, minimum=0, name='observation')
self._episode_ended = False
self.screen = screen
self.tetrimino_controller = tc.TetriminoController(screen)
self.dt = 0
self.clock = pygame.time.Clock()
self.score = 0
self._state = self.tetrimino_controller.block_grid_arr
def action_spec(self):
return self._action_spec
def observation_spec(self):
return self._observation_spec
def _reset(self):
self.tetrimino_controller = tc.TetriminoController(self.screen)
self.dt = 0
self.score = 0
self._state = self.tetrimino_controller.block_grid_arr
self._episode_ended = False
return ts.restart(np.array([self._state], dtype=np.int32))
def getState(self):
return self.tetrimino_controller.block_grid_arr
def _step(self, action):
done = False
reward = 0
for event in pygame.event.get():
if event.type == tc.etriminoController.END_GAME_EVENT:
done = True
if event.type == pygame.QUIT:
return 0,True
if event.type == tc.TetriminoController.STOP_MOVE_EVENT:
self.tetrimino_controller.new_tetrimino()
reward += self.getPunishment()
if event.type == tc.TetriminoController.TETRIS_EVENT:
reward += 100
if event.type == tc.TetriminoController.TETRIS_COMBO_DOUBLE_EVENT:
reward += 250
if event.type == tc.TetriminoController.TETRIS_COMBO_TRIPLE_EVENT:
reward += 400
if event.type == tc.TetriminoController.TETRIS_COMBO_QUADRUPLE_EVENT:
reward += 600
match action:
case 0:
self.tetrimino_controller.move_left()
case 1:
self.tetrimino_controller.move_right()
case 2:
self.tetrimino_controller.rotate()
case 3:
self.tetrimino_controller.move_down()
case _:
print("Invalid action")
if done:
# The last action ended the episode. Ignore the current action and start
# a new episode.
return self.reset()
if self._episode_ended:
return ts.termination(self.getState(), reward)
else:
return ts.transition(self.getState(), reward=0.0, discount=1.0)
def render(self):
self.screen.fill("purple")
self.tetrimino_controller.draw()
pygame.display.flip()
def getPunishment(self):
holes = self.tetrimino_controller.get_holes()
towers = self.tetrimino_controller.get_towers()
bumpiness = self.tetrimino_controller.get_bumpiness()
punishment = -0.05 * holes - 0.07 * bumpiness - 0.03 * towers
return punishment
I don't understand why i am getting an error here it seems to be because the expected shape of self._observation_spec is not met.
ValueError: Given
time_step
: TimeStep( {'discount': array(1., dtype=float32), 'observation': array([[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]], dtype=int32), 'reward': array(0., dtype=float32), 'step_type': array(0, dtype=int32)}) does not match expectedtime_step_spec
: TimeStep( {'discount': BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0), 'observation': BoundedArraySpec(shape=(18,10), dtype=dtype('int32'), name='observation', minimum=0, maximum=2147483647), 'reward': ArraySpec(shape=(), dtype=dtype('float32'), name='reward'), 'step_type': ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')})
Shape 18,10 and 10,18 are not working i defined the array as :
self.block_grid_arr = np.array([[0 for i in range (10)] for j in range (18)], dtype=np.int32),
I also dont understand why there are 4 square brackets in the error message. Any help would be appreciated.