I just started to use Ray-RLlib and I try to restructure my VSL algorithm (using SUMO) to be able to work with RLlib. This is a MARL (4 agents) DQN approach. I restructured the environment based on the documentation and created the training algorithm as well, but when I try to run the script I get the error mentioned in the title.

Training algorithm



from myEnvironment import myEnvironment
import ray
from ray.tune.registry import register_env
from ray.rllib.algorithms.dqn import DQN

ray.init()
register_env("myEnv", lambda config: myEnvironment({}))
env = myEnvironment({})

config = {
    'environment': "myEnv",
    'observation_space': env.observation_space,
    'action_space': env.action_space,
    "framework": "torch",
    "create_env_on_driver": True,
}

agent = DQN(config=config)

observation = env.reset()
for iteration in range(100):
    result = agent.train()

    # Print training progress
    print(f"Iteration {iteration}: {result}")

    # Save a checkpoint every 10 iterations
    if iteration % 10 == 0:
        checkpoint = agent.save()
        print(f"Checkpoint saved at iteration {iteration}: {checkpoint}")

Environment (I cleared a little bit and left only the important functions from the RLlib point of view)


import os, sys
import traci
import traci.constants
from ray.rllib.examples.env.mock_env import MockEnv
import time
import numpy as np
import random
from ray.rllib import MultiAgentEnv
import gymnasium as gym
from gymnasium import spaces
from ray.tune.registry import register_env


occupancy_low = 0
occupancy_high = 1
speed_low = 0
speed_high = 37
speed_observation_space = spaces.Box(low=speed_low, high=speed_high, shape=(4,), dtype=np.float32)
occupancy_observation_space = spaces.Box(low=occupancy_low, high=occupancy_high, shape=(4,), dtype=np.float32)
bit_observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
observation_space = spaces.Tuple((speed_observation_space, occupancy_observation_space, bit_observation_space))
action_space = gym.spaces.Discrete(3)


class myEnvironment(MultiAgentEnv):
    def __init__(self, env_config):
        super().__init__()
        self.gui = True
        self._agent_ids = set(range(4))
        self.observation_space = observation_space
        self.action_space = action_space
        self.getLaneID = ["vslZone_0_0", "vslZone_0_1", "vslZone_1_1", "vslZone_1_2", "vslZone_2_0", "vslZone_2_1",
                          "vslZone_3_0"]
        self.getEdgeID = ["vslZone_0", "vslZone_1", "vslZone_2", "vslZone_3"]


    def sumo_step(self):
        self.connection.simulationStep()
        obs = self.create_observation(self.getEdgeID)
        return obs

    def create_observation(self, edgeIDArray):
        observation = np.zeros((len(edgeIDArray), 12))
        agent = 0
        meanSpeed = []
        occupancy = []
        agents = {index: {} for index, _ in enumerate(edgeIDArray)}

        for edge in edgeIDArray:
            meanSpeed.append((self.connection.edge.getLastStepMeanSpeed(edge) * 3.6) / 130)
            occupancy.append(self.connection.edge.getLastStepVehicleNumber(edge) / 58)

        for agentNumber in range(len(edgeIDArray)):
            observation[agent][0:4] = meanSpeed
            observation[agent][4:8] = occupancy
            observation[agent][8 + agent] = 1
            agents[agentNumber] = {"meanSpeed": observation[agent][0:4],
                                   "occupancy": observation[agent][4:8],
                                   "agentBit": observation[agent][8:12]}
            agent += 1

        observation = agents

        return observation

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)

        self.no_veh = False
        b = random.randint(1, 5)
        for i in range(len(self.getEdgeID)):
            self.info[i] = None

        with open(f"{os.path.join(self.sim_dir, f'sampleRoutes.rou.xml')}", "r+") as file:
            config = file.read()
        with open(f"{os.path.join(self.sim_dir, 'sampleRoutes.rou.xml')}", "w") as f:  
            f.write(config)

        if self.connection is None:
            self.sumo_init()

        self.connection.load(self.sumoCmd[1:])
        self.warmup()
        observation = self.sumo_step()
        self.terminateds = set()
        self.truncateds = set()
        reset_results = [a.reset() for a in self.agents]

        return (
            {i: oi[0] for i, oi in enumerate(reset_results)},
            {i: oi[1] for i, oi in enumerate(reset_results)},
        )

    def warmup(self):

     #Basic warmup phase

    def step(self, action_dict):
        obs, rew, terminated, truncated, info = {}, {}, {}, {}, {}

        for i,action in action_dict.items():
            self.set_max_speed(action=action, edgeID=self.getEdgeID[i])

        for _ in range(60):
            state = self.sumo_step()
        done, reward = self.rewardStd(self.getEdgeID)

        for i, action in action_dict.items():
            obs[i], rew[i], terminated[i], truncated[i], info[i] = state, reward, done, done, info

        return obs, reward, done, truncated, info


    def close(self):
        pass

    def render(self):
        pass

    def rewardStd(self):

        done, rew = {}, {}
        return done, rew

So I tried to follow the documentation and I think everything is in good format (step, reset, observation_space, action_space), however I still get the abovementioned error.

0

There are 0 best solutions below