tfa.specs.array_spec.BoundedArraySpec shape error

43 Views Asked by At

I am trying to write a deep-q learning agent that learns to play tetris.

I have all the game logic done and am now writing the environment using tensorflow tf_agents.

Here is my code :

import numpy as np
from tf_agents.trajectories import time_step as ts
from src.controllers import GameController as gc
from src.controllers import TetriminoController as tc
import tf_agents as tfa
import pygame

class TetrisEnvironment(tfa.environments.PyEnvironment):

    def __init__(self,screen):
        self._action_spec = tfa.specs.array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=3, name='action')
        self._observation_spec = tfa.specs.array_spec.BoundedArraySpec(
            shape=(18,10), dtype=np.int32, minimum=0, name='observation')
        self._episode_ended = False
        self.screen = screen
        self.tetrimino_controller = tc.TetriminoController(screen)
        self.dt = 0
        self.clock = pygame.time.Clock()
        self.score = 0
        self._state = self.tetrimino_controller.block_grid_arr

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        self.tetrimino_controller = tc.TetriminoController(self.screen)
        self.dt = 0
        self.score = 0
        self._state = self.tetrimino_controller.block_grid_arr
        self._episode_ended = False
        return ts.restart(np.array([self._state], dtype=np.int32))

    def getState(self):
        return self.tetrimino_controller.block_grid_arr
        
    def _step(self, action):
        done = False
        reward = 0
        for event in pygame.event.get():
            if event.type == tc.etriminoController.END_GAME_EVENT:
                done = True
            if event.type == pygame.QUIT:
                return 0,True
            if event.type == tc.TetriminoController.STOP_MOVE_EVENT:
                self.tetrimino_controller.new_tetrimino()
                reward += self.getPunishment()
            if event.type == tc.TetriminoController.TETRIS_EVENT:
                reward += 100
            if event.type == tc.TetriminoController.TETRIS_COMBO_DOUBLE_EVENT:
                reward += 250
            if event.type == tc.TetriminoController.TETRIS_COMBO_TRIPLE_EVENT:
                reward += 400
            if event.type == tc.TetriminoController.TETRIS_COMBO_QUADRUPLE_EVENT:
                reward += 600
        match action:
            case 0:
                self.tetrimino_controller.move_left()
            case 1:
                self.tetrimino_controller.move_right()
            case 2:
                self.tetrimino_controller.rotate()
            case 3:
                self.tetrimino_controller.move_down()
            case _:
                print("Invalid action")

        if done:
        # The last action ended the episode. Ignore the current action and start
        # a new episode.
            return self.reset()

        if self._episode_ended:
            return ts.termination(self.getState(), reward)
        else:
            return ts.transition(self.getState(), reward=0.0, discount=1.0)

    def render(self):
        self.screen.fill("purple")
        self.tetrimino_controller.draw()
        pygame.display.flip()

    def getPunishment(self):
        holes = self.tetrimino_controller.get_holes()
        towers = self.tetrimino_controller.get_towers()
        bumpiness = self.tetrimino_controller.get_bumpiness()
        punishment = -0.05 * holes - 0.07 * bumpiness - 0.03 * towers
        return punishment

I don't understand why i am getting an error here it seems to be because the expected shape of self._observation_spec is not met.

ValueError: Given time_step: TimeStep( {'discount': array(1., dtype=float32), 'observation': array([[[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]]], dtype=int32), 'reward': array(0., dtype=float32), 'step_type': array(0, dtype=int32)}) does not match expected time_step_spec: TimeStep( {'discount': BoundedArraySpec(shape=(), dtype=dtype('float32'), name='discount', minimum=0.0, maximum=1.0), 'observation': BoundedArraySpec(shape=(18,10), dtype=dtype('int32'), name='observation', minimum=0, maximum=2147483647), 'reward': ArraySpec(shape=(), dtype=dtype('float32'), name='reward'), 'step_type': ArraySpec(shape=(), dtype=dtype('int32'), name='step_type')})

Shape 18,10 and 10,18 are not working i defined the array as :

self.block_grid_arr = np.array([[0 for i in range (10)] for j in range (18)], dtype=np.int32),

I also dont understand why there are 4 square brackets in the error message. Any help would be appreciated.

0

There are 0 best solutions below