I'm trying to train an Agent in the MineRL environment using Keras. This is my code so far:

import gym
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from gym import spaces

import minerl
from collections import OrderedDict

class EnvHandeler():
    def __init__(self):
        self.env = gym.make('MineRLTreechop-v0')
        self.env.observation_space = gym.spaces.Dict({
            "pov": gym.spaces.Box(low=0, high=255, shape=(64, 64, 3))
        })
        self.action_space = self.env.action_space
        self.observation_space = self.env.observation_space
    def step(self, action):
        action_holder = self.env.action_space.noop()
        if action == 0:
            action_holder['forward'] = 1
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 1:
            action_holder['forward'] = 1
            action_holder['jump'] = 1
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 2:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 1
            action_holder['right'] = 0
            action_holder['attack'] = 0
        if action == 3:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 1
            action_holder['attack'] = 0
        if action == 4:
            action_holder['forward'] = 0
            action_holder['jump'] = 0
            action_holder['left'] = 0
            action_holder['right'] = 0
            action_holder['attack'] = 1

        n_state, reward, done, info = self.env.step(action_holder)
        return n_state,reward,done,info
    def render(self):
        self.env.render()
    def reset(self):
        #self.env.reset()
        return self.env.reset()

def build_model(height, width, channels, actions):
    model = Sequential()
    print(height,width,channels)
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu', input_shape=(3,height,width,channels)))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model



def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=1000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                   enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000
                   )
    return dqn

def main():
    env = EnvHandeler()

    height, width, channels = env.observation_space['pov'].shape

    actions = 5

    model = build_model(height,width,channels,actions)
    dqn = build_agent(model, actions)
    dqn.compile(Adam(lr=1e-4))
    print(model.summary())
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)


if __name__ == '__main__':
    main()

I'm getting the following error when it Starts training:

Training for 10000 steps ...
Traceback (most recent call last):
  File "F:/WORKING/Minecraft KI/Project/SmartTreeKeras.py", line 172, in <module>
    main()
  File "F:/WORKING/Minecraft KI/Project/SmartTreeKeras.py", line 159, in main
    dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\core.py", line 169, in fit
    action = self.forward(observation)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 225, in forward
    q_values = self.compute_q_values(state)
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 69, in compute_q_values
    q_values = self.compute_batch_q_values([state]).flatten()
  File "C:\Users\Marius\anaconda3\lib\site-packages\rl\agents\dqn.py", line 64, in compute_batch_q_values
    q_values = self.model.predict_on_batch(batch)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 1201, in predict_on_batch
    x, extract_tensors_from_dataset=True)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2334, in _standardize_user_data
    batch_size=batch_size)
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_v1.py", line 2361, in _standardize_tensors
    exception_prefix='input')
  File "C:\Users\Marius\anaconda3\lib\site-packages\tensorflow\python\keras\engine\training_utils.py", line 573, in standardize_input_data
    'with shape ' + str(data_shape))
ValueError: Error when checking input: expected conv2d_input to have 5 dimensions, but got array with shape (1, 3)``

As asked in the comments the env.step() return:

The env.stop() returns: (OrderedDict([('pov', array([[[120, 153, 216],
        [120, 153, 217],
        [120, 153, 217],
        ...,
        [121, 153, 215],
        [121, 153, 214],
        [121, 153, 214]],

       [[121, 153, 216],
        [120, 153, 216],
        [120, 153, 217],
        ...,
        [121, 153, 214],
        [121, 153, 214],
        [121, 153, 214]],

       [[121, 153, 216],
        [121, 153, 216],
        [120, 153, 216],
        ...,
        [121, 153, 214],
        [121, 153, 213],
        [121, 153, 213]],

       ...,

       [[ 16,  30,   9],
        [ 14,  27,   8],
        [ 13,  25,   7],
        ...,
        [ 17,  33,  10],
        [ 21,  40,  12],
        [ 17,  33,  10]],

       [[ 14,  27,   8],
        [ 14,  27,   8],
        [ 21,  41,  12],
        ...,
        [ 28,  54,  16],
        [ 16,  30,   9],
        [ 14,  27,   8]],

       [[ 12,  24,   7],
        [ 20,  39,  11],
        [ 21,  42,  12],
        ...,
        [ 33,  64,  19],
        [ 30,  58,  17],
        [ 17,  34,  10]]], dtype=uint8))]), 0.0, False, {})

Some more Data:

  • Python 3.7
  • Tensorflow-Gpu 2.3.0

I could imagine there is also an error in the structure because the expected dimensions keep changing depending on the input e.g.: If I input 5 Dims it expects 6 Dims, If I input 3 Dims it expects 4 etc.

Kinda going crazy on this error. I already tried numerous solves I found online but none worked.

I would so glad if someone could help!

-Cheers

0

There are 0 best solutions below