Farama-Foundation · elliottower · Nov 14, 2023 · Sep 19, 2023 · Sep 19, 2023 · Sep 19, 2023
diff --git a/docs/tutorials/agilerl/DQN.md b/docs/tutorials/agilerl/DQN.md
@@ -1244,6 +1244,8 @@ The following code allows you to load your saved DQN agent from the previous tra
 <details>
    <summary>Full training code</summary>
 
+   > Please note that on line 612 ``max_episodes`` is set to 10 to allow fast testing of this tutorial code. This line can be deleted, and the line below it uncommented, to use the number of episodes set in the config files.
+
    ```{eval-rst}
    .. literalinclude:: ../../../tutorials/AgileRL/agilerl_dqn_curriculum.py
       :language: python

diff --git a/tutorials/AgileRL/agilerl_dqn_curriculum.py b/tutorials/AgileRL/agilerl_dqn_curriculum.py
@@ -606,7 +606,12 @@ def outcome(self, action, player, return_length=False):
 
         # Define training loop parameters
         episodes_per_epoch = 10
-        max_episodes = LESSON["max_train_episodes"]  # Total episodes
+
+        # ! NOTE: Uncomment the max_episodes line below to change the number of training episodes. ! #
+        # It is deliberately set low to allow testing to ensure this tutorial is sound.
+        max_episodes = 10
+        # max_episodes = LESSON["max_train_episodes"]  # Total episodes
+
         max_steps = 500  # Maximum steps to take in each episode
         evo_epochs = 20  # Evolution frequency
         evo_loop = 50  # Number of evaluation episodes

diff --git a/tutorials/AgileRL/agilerl_maddpg.py b/tutorials/AgileRL/agilerl_maddpg.py
@@ -154,16 +154,31 @@
     # Training loop
     for idx_epi in trange(max_episodes):
         for agent in pop:  # Loop through population
-            state = env.reset()[0]  # Reset environment at start of episode
+            state, info = env.reset()  # Reset environment at start of episode
             agent_reward = {agent_id: 0 for agent_id in env.agents}
             if INIT_HP["CHANNELS_LAST"]:
                 state = {
                     agent_id: np.moveaxis(np.expand_dims(s, 0), [3], [1])
                     for agent_id, s in state.items()
                 }
             for _ in range(max_steps):
-                action = agent.getAction(state, epsilon)  # Get next action from agent
-                next_state, reward, termination, truncation, _ = env.step(
+                agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
+                env_defined_actions = (
+                    info["env_defined_actions"]
+                    if "env_defined_actions" in info.keys()
+                    else None
+                )
+
+                # Get next action from agent
+                cont_actions, discrete_action = agent.getAction(
+                    state, epsilon, agent_mask, env_defined_actions
+                )
+                if agent.discrete_actions:
+                    action = discrete_action
+                else:
+                    action = cont_actions
+
+                next_state, reward, termination, truncation, info = env.step(
                     action
                 )  # Act in environment
 
@@ -180,7 +195,7 @@
                     break
 
                 # Save experiences to replay buffe
-                memory.save2memory(state, action, reward, next_state, termination)
+                memory.save2memory(state, cont_actions, reward, next_state, termination)
 
                 # Collect the reward
                 for agent_id, r in reward.items():

diff --git a/tutorials/AgileRL/agilerl_matd3.py b/tutorials/AgileRL/agilerl_matd3.py
@@ -136,7 +136,7 @@
     # Training loop
     for idx_epi in trange(max_episodes):
         for agent in pop:  # Loop through population
-            state, _ = env.reset()  # Reset environment at start of episode
+            state, info = env.reset()  # Reset environment at start of episode
             agent_reward = {agent_id: 0 for agent_id in env.agents}
             if INIT_HP["CHANNELS_LAST"]:
                 state = {
@@ -145,8 +145,23 @@
                 }
 
             for _ in range(max_steps):
-                action = agent.getAction(state, epsilon)  # Get next action from agent
-                next_state, reward, termination, truncation, _ = env.step(
+                agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
+                env_defined_actions = (
+                    info["env_defined_actions"]
+                    if "env_defined_actions" in info.keys()
+                    else None
+                )
+
+                # Get next action from agent
+                cont_actions, discrete_action = agent.getAction(
+                    state, epsilon, agent_mask, env_defined_actions
+                )
+                if agent.discrete_actions:
+                    action = discrete_action
+                else:
+                    action = cont_actions
+
+                next_state, reward, termination, truncation, info = env.step(
                     action
                 )  # Act in environment
 
@@ -163,7 +178,7 @@
                     break
 
                 # Save experiences to replay buffer
-                memory.save2memory(state, action, reward, next_state, termination)
+                memory.save2memory(state, cont_actions, reward, next_state, termination)
 
                 # Collect the reward
                 for agent_id, r in reward.items():

diff --git a/tutorials/AgileRL/render_agilerl_maddpg.py b/tutorials/AgileRL/render_agilerl_maddpg.py
@@ -97,7 +97,7 @@ def _label_with_episode_number(frame, episode_num):
 
     # Test loop for inference
     for ep in range(episodes):
-        state, _ = env.reset()
+        state, info = env.reset()
         agent_reward = {agent_id: 0 for agent_id in agent_ids}
         score = 0
         for _ in range(max_steps):
@@ -106,8 +106,25 @@ def _label_with_episode_number(frame, episode_num):
                     agent_id: np.moveaxis(np.expand_dims(s, 0), [3], [1])
                     for agent_id, s in state.items()
                 }
-            # Get action
-            action = maddpg.getAction(state, epsilon=0)
+
+            agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
+            env_defined_actions = (
+                info["env_defined_actions"]
+                if "env_defined_actions" in info.keys()
+                else None
+            )
+
+            # Get next action from agent
+            cont_actions, discrete_action = maddpg.getAction(
+                state,
+                epsilon=0,
+                agent_mask=agent_mask,
+                env_defined_actions=env_defined_actions,
+            )
+            if maddpg.discrete_actions:
+                action = discrete_action
+            else:
+                action = cont_actions
 
             # Save the frame for this step and append to frames list
             frame = env.render()

diff --git a/tutorials/AgileRL/render_agilerl_matd3.py b/tutorials/AgileRL/render_agilerl_matd3.py
@@ -90,12 +90,28 @@ def _label_with_episode_number(frame, episode_num):
 
     # Test loop for inference
     for ep in range(episodes):
-        state, _ = env.reset()
+        state, info = env.reset()
         agent_reward = {agent_id: 0 for agent_id in agent_ids}
         score = 0
         for _ in range(max_steps):
-            # Get action
-            action = matd3.getAction(state, epsilon=0)
+            agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
+            env_defined_actions = (
+                info["env_defined_actions"]
+                if "env_defined_actions" in info.keys()
+                else None
+            )
+
+            # Get next action from agent
+            cont_actions, discrete_action = matd3.getAction(
+                state,
+                epsilon=0,
+                agent_mask=agent_mask,
+                env_defined_actions=env_defined_actions,
+            )
+            if matd3.discrete_actions:
+                action = discrete_action
+            else:
+                action = cont_actions
 
             # Save the frame for this step and append to frames list
             frame = env.render()

diff --git a/tutorials/AgileRL/requirements.txt b/tutorials/AgileRL/requirements.txt
@@ -1,4 +1,4 @@
-agilerl>=0.1.13
+agilerl>=0.1.14
 pettingzoo[classic,atari,mpe]>=1.23.1
 SuperSuit>=3.9.0
 torch>=2.0.1