I work with an RL algorithm. I'm using tensorflow and tf-agents and training a DQN. My problem is that only one core of the CPU is used when calculating the 10 episodes in the environment for data collection.
My training function looks like this:
def train_step(self, n_steps):
env_steps = tf_metrics.EnvironmentSteps()
#num_episodes = tf_metrics.NumberOfEpisodes()
rew = TFSumOfRewards()
action_hist = tf_metrics.ChosenActionHistogram(
name='ChosenActionHistogram', dtype=tf.int32, buffer_size=1000
)
#add reply buffer and metrict to the observer
replay_observer = [self.replay_buffer.add_batch]
train_metrics = [env_steps, rew]
self.replay_buffer.clear()
driver = dynamic_episode_driver.DynamicEpisodeDriver(
self.train_env, self.collect_policy, observers=replay_observer + train_metrics, num_episodes=self.collect_episodes)
final_time_step, policy_state = driver.run()
print('Number of Steps: ', env_steps.result().numpy())
for train_metric in train_metrics:
train_metric.tf_summaries(train_step=self.global_step, step_metrics=train_metrics)
# Convert the replay buffer to a tf.data.Dataset
# Dataset generates trajectories with shape [Bx2x...]
AUTOTUNE = tf.data.experimental.AUTOTUNE
dataset = self.replay_buffer.as_dataset(
num_parallel_calls=AUTOTUNE,
sample_batch_size=self.batch_size,
num_steps=(self.train_sequence_length + 1)).prefetch(AUTOTUNE)
iterator = iter(dataset)
train_loss = None
for _ in range(n_steps):
# Sample a batch of data from the buffer and update the agent's network.
experience, unused_info = next(iterator)
train_loss = self.agent.train(experience)
def train_agent(self, n_epoch):
for i in range(n_epoch):
self.train_step(int(self.replay_buffer.num_frames().numpy()/self.batch_size))
if(self.IsAutoStoreCheckpoint == True):
self.store_check_point()
pass
As already written above, num_episodes = 10. So it would make sense to calculate 10 episodes in parallel before the network is trained. If I set the value num_parallel_calls to e.g. 10 nothing changes. What do I have to do to use all cores of my CPU (Ryzen 9 5950x with 16 cores)?
Thanks!
masterkey