I just started to use Ray-RLlib and I try to restructure my VSL algorithm (using SUMO) to be able to work with RLlib. This is a MARL (4 agents) DQN approach. I restructured the environment based on the documentation and created the training algorithm as well, but when I try to run the script I get the error mentioned in the title.
Training algorithm
from myEnvironment import myEnvironment
import ray
from ray.tune.registry import register_env
from ray.rllib.algorithms.dqn import DQN
ray.init()
register_env("myEnv", lambda config: myEnvironment({}))
env = myEnvironment({})
config = {
'environment': "myEnv",
'observation_space': env.observation_space,
'action_space': env.action_space,
"framework": "torch",
"create_env_on_driver": True,
}
agent = DQN(config=config)
observation = env.reset()
for iteration in range(100):
result = agent.train()
# Print training progress
print(f"Iteration {iteration}: {result}")
# Save a checkpoint every 10 iterations
if iteration % 10 == 0:
checkpoint = agent.save()
print(f"Checkpoint saved at iteration {iteration}: {checkpoint}")
Environment (I cleared a little bit and left only the important functions from the RLlib point of view)
import os, sys
import traci
import traci.constants
from ray.rllib.examples.env.mock_env import MockEnv
import time
import numpy as np
import random
from ray.rllib import MultiAgentEnv
import gymnasium as gym
from gymnasium import spaces
from ray.tune.registry import register_env
occupancy_low = 0
occupancy_high = 1
speed_low = 0
speed_high = 37
speed_observation_space = spaces.Box(low=speed_low, high=speed_high, shape=(4,), dtype=np.float32)
occupancy_observation_space = spaces.Box(low=occupancy_low, high=occupancy_high, shape=(4,), dtype=np.float32)
bit_observation_space = spaces.Box(low=0, high=1, shape=(4,), dtype=np.float32)
observation_space = spaces.Tuple((speed_observation_space, occupancy_observation_space, bit_observation_space))
action_space = gym.spaces.Discrete(3)
class myEnvironment(MultiAgentEnv):
def __init__(self, env_config):
super().__init__()
self.gui = True
self._agent_ids = set(range(4))
self.observation_space = observation_space
self.action_space = action_space
self.getLaneID = ["vslZone_0_0", "vslZone_0_1", "vslZone_1_1", "vslZone_1_2", "vslZone_2_0", "vslZone_2_1",
"vslZone_3_0"]
self.getEdgeID = ["vslZone_0", "vslZone_1", "vslZone_2", "vslZone_3"]
def sumo_step(self):
self.connection.simulationStep()
obs = self.create_observation(self.getEdgeID)
return obs
def create_observation(self, edgeIDArray):
observation = np.zeros((len(edgeIDArray), 12))
agent = 0
meanSpeed = []
occupancy = []
agents = {index: {} for index, _ in enumerate(edgeIDArray)}
for edge in edgeIDArray:
meanSpeed.append((self.connection.edge.getLastStepMeanSpeed(edge) * 3.6) / 130)
occupancy.append(self.connection.edge.getLastStepVehicleNumber(edge) / 58)
for agentNumber in range(len(edgeIDArray)):
observation[agent][0:4] = meanSpeed
observation[agent][4:8] = occupancy
observation[agent][8 + agent] = 1
agents[agentNumber] = {"meanSpeed": observation[agent][0:4],
"occupancy": observation[agent][4:8],
"agentBit": observation[agent][8:12]}
agent += 1
observation = agents
return observation
def reset(self, *, seed=None, options=None):
super().reset(seed=seed)
self.no_veh = False
b = random.randint(1, 5)
for i in range(len(self.getEdgeID)):
self.info[i] = None
with open(f"{os.path.join(self.sim_dir, f'sampleRoutes.rou.xml')}", "r+") as file:
config = file.read()
with open(f"{os.path.join(self.sim_dir, 'sampleRoutes.rou.xml')}", "w") as f:
f.write(config)
if self.connection is None:
self.sumo_init()
self.connection.load(self.sumoCmd[1:])
self.warmup()
observation = self.sumo_step()
self.terminateds = set()
self.truncateds = set()
reset_results = [a.reset() for a in self.agents]
return (
{i: oi[0] for i, oi in enumerate(reset_results)},
{i: oi[1] for i, oi in enumerate(reset_results)},
)
def warmup(self):
#Basic warmup phase
def step(self, action_dict):
obs, rew, terminated, truncated, info = {}, {}, {}, {}, {}
for i,action in action_dict.items():
self.set_max_speed(action=action, edgeID=self.getEdgeID[i])
for _ in range(60):
state = self.sumo_step()
done, reward = self.rewardStd(self.getEdgeID)
for i, action in action_dict.items():
obs[i], rew[i], terminated[i], truncated[i], info[i] = state, reward, done, done, info
return obs, reward, done, truncated, info
def close(self):
pass
def render(self):
pass
def rewardStd(self):
done, rew = {}, {}
return done, rew
So I tried to follow the documentation and I think everything is in good format (step, reset, observation_space, action_space), however I still get the abovementioned error.