Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AgileRL curriculum learning and self-play tutorial #1124

Merged
merged 29 commits into from
Nov 14, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
56ef21f
Create self-play tutorial
nicku-a Sep 19, 2023
7e2680e
Remove wandb log files
nicku-a Sep 19, 2023
a04ef25
Formatting changes
nicku-a Sep 19, 2023
aa10c96
Alternating player turns changes
nicku-a Oct 3, 2023
a0c2ddf
DQN curriculum and self play tut
nicku-a Oct 20, 2023
df31a9e
Docstrings
nicku-a Oct 20, 2023
c2c05f3
Tutorial update
nicku-a Oct 23, 2023
e6e0177
Introduction of extra lesson
nicku-a Oct 26, 2023
4b9ad3e
Final DQN tutorial code changes
nicku-a Oct 27, 2023
ec97dde
Self-opposition gif
nicku-a Oct 27, 2023
9bae20c
Include DQN in index
nicku-a Oct 27, 2023
013e3bf
Upload connect four gif
nicku-a Oct 27, 2023
63c58e6
Upload trained model weights
nicku-a Oct 27, 2023
fd3054d
Add referernce to trained model weights
nicku-a Oct 27, 2023
1a21614
Make tutorial code sections collapsible
nicku-a Oct 27, 2023
512062f
Update lesson range
nicku-a Oct 27, 2023
46c0210
Optional WandB tracking
nicku-a Oct 27, 2023
ce8a190
Update AgileRL version and tutorials
nicku-a Nov 10, 2023
0712b2b
Save continuous actions to buffer for MADDPG and MATD3
nicku-a Nov 10, 2023
666184f
Fix pytest version as md docs has minor issues with new versions
elliottower Nov 13, 2023
44df235
Ignore tutorials in documentation testing (done in tutorial tests)
elliottower Nov 13, 2023
0f54f5f
Revert hard coded pytest version
elliottower Nov 13, 2023
c29a908
Add docs/tutorials to ignore
elliottower Nov 13, 2023
213b5fd
Test updating gh action for tutorials
elliottower Nov 13, 2023
a67209c
Merge branch 'master' into master
elliottower Nov 13, 2023
8d13bd0
AgileRL multi agent and version updates
nicku-a Nov 14, 2023
2786e31
Merge branch 'master' of https://github.com/nicku-a/PettingZoo
nicku-a Nov 14, 2023
9b64931
Use latest version of AgileRL
nicku-a Nov 14, 2023
7f45fd0
Slight changes to initial print statements to re-trigger tests
nicku-a Nov 14, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/tutorials/agilerl/DQN.md
Original file line number Diff line number Diff line change
Expand Up @@ -1244,6 +1244,8 @@ The following code allows you to load your saved DQN agent from the previous tra
<details>
<summary>Full training code</summary>

> Please note that on line 612 ``max_episodes`` is set to 10 to allow fast testing of this tutorial code. This line can be deleted, and the line below it uncommented, to use the number of episodes set in the config files.

```{eval-rst}
.. literalinclude:: ../../../tutorials/AgileRL/agilerl_dqn_curriculum.py
:language: python
Expand Down
7 changes: 6 additions & 1 deletion tutorials/AgileRL/agilerl_dqn_curriculum.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,12 @@ def outcome(self, action, player, return_length=False):

# Define training loop parameters
episodes_per_epoch = 10
max_episodes = LESSON["max_train_episodes"] # Total episodes

# ! NOTE: Uncomment the max_episodes line below to change the number of training episodes. ! #
# It is deliberately set low to allow testing to ensure this tutorial is sound.
max_episodes = 10
# max_episodes = LESSON["max_train_episodes"] # Total episodes

max_steps = 500 # Maximum steps to take in each episode
evo_epochs = 20 # Evolution frequency
evo_loop = 50 # Number of evaluation episodes
Expand Down
23 changes: 19 additions & 4 deletions tutorials/AgileRL/agilerl_maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,31 @@
# Training loop
for idx_epi in trange(max_episodes):
for agent in pop: # Loop through population
state = env.reset()[0] # Reset environment at start of episode
state, info = env.reset() # Reset environment at start of episode
agent_reward = {agent_id: 0 for agent_id in env.agents}
if INIT_HP["CHANNELS_LAST"]:
state = {
agent_id: np.moveaxis(np.expand_dims(s, 0), [3], [1])
for agent_id, s in state.items()
}
for _ in range(max_steps):
action = agent.getAction(state, epsilon) # Get next action from agent
next_state, reward, termination, truncation, _ = env.step(
agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
elliottower marked this conversation as resolved.
Show resolved Hide resolved
env_defined_actions = (
info["env_defined_actions"]
if "env_defined_actions" in info.keys()
elliottower marked this conversation as resolved.
Show resolved Hide resolved
else None
)

# Get next action from agent
cont_actions, discrete_action = agent.getAction(
elliottower marked this conversation as resolved.
Show resolved Hide resolved
state, epsilon, agent_mask, env_defined_actions
)
if agent.discrete_actions:
action = discrete_action
else:
action = cont_actions

next_state, reward, termination, truncation, info = env.step(
action
) # Act in environment

Expand All @@ -180,7 +195,7 @@
break

# Save experiences to replay buffe
memory.save2memory(state, action, reward, next_state, termination)
memory.save2memory(state, cont_actions, reward, next_state, termination)

# Collect the reward
for agent_id, r in reward.items():
Expand Down
23 changes: 19 additions & 4 deletions tutorials/AgileRL/agilerl_matd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
# Training loop
for idx_epi in trange(max_episodes):
for agent in pop: # Loop through population
state, _ = env.reset() # Reset environment at start of episode
state, info = env.reset() # Reset environment at start of episode
agent_reward = {agent_id: 0 for agent_id in env.agents}
if INIT_HP["CHANNELS_LAST"]:
state = {
Expand All @@ -145,8 +145,23 @@
}

for _ in range(max_steps):
action = agent.getAction(state, epsilon) # Get next action from agent
next_state, reward, termination, truncation, _ = env.step(
agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
env_defined_actions = (
info["env_defined_actions"]
if "env_defined_actions" in info.keys()
else None
)

# Get next action from agent
cont_actions, discrete_action = agent.getAction(
state, epsilon, agent_mask, env_defined_actions
)
if agent.discrete_actions:
action = discrete_action
else:
action = cont_actions

next_state, reward, termination, truncation, info = env.step(
action
) # Act in environment

Expand All @@ -163,7 +178,7 @@
break

# Save experiences to replay buffer
memory.save2memory(state, action, reward, next_state, termination)
memory.save2memory(state, cont_actions, reward, next_state, termination)

# Collect the reward
for agent_id, r in reward.items():
Expand Down
23 changes: 20 additions & 3 deletions tutorials/AgileRL/render_agilerl_maddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def _label_with_episode_number(frame, episode_num):

# Test loop for inference
for ep in range(episodes):
state, _ = env.reset()
state, info = env.reset()
agent_reward = {agent_id: 0 for agent_id in agent_ids}
score = 0
for _ in range(max_steps):
Expand All @@ -106,8 +106,25 @@ def _label_with_episode_number(frame, episode_num):
agent_id: np.moveaxis(np.expand_dims(s, 0), [3], [1])
for agent_id, s in state.items()
}
# Get action
action = maddpg.getAction(state, epsilon=0)

agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
env_defined_actions = (
info["env_defined_actions"]
if "env_defined_actions" in info.keys()
else None
)

# Get next action from agent
cont_actions, discrete_action = maddpg.getAction(
state,
epsilon=0,
agent_mask=agent_mask,
env_defined_actions=env_defined_actions,
)
if maddpg.discrete_actions:
action = discrete_action
else:
action = cont_actions

# Save the frame for this step and append to frames list
frame = env.render()
Expand Down
22 changes: 19 additions & 3 deletions tutorials/AgileRL/render_agilerl_matd3.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,28 @@ def _label_with_episode_number(frame, episode_num):

# Test loop for inference
for ep in range(episodes):
state, _ = env.reset()
state, info = env.reset()
agent_reward = {agent_id: 0 for agent_id in agent_ids}
score = 0
for _ in range(max_steps):
# Get action
action = matd3.getAction(state, epsilon=0)
agent_mask = info["agent_mask"] if "agent_mask" in info.keys() else None
env_defined_actions = (
info["env_defined_actions"]
if "env_defined_actions" in info.keys()
else None
)

# Get next action from agent
cont_actions, discrete_action = matd3.getAction(
state,
epsilon=0,
agent_mask=agent_mask,
env_defined_actions=env_defined_actions,
)
if matd3.discrete_actions:
action = discrete_action
else:
action = cont_actions

# Save the frame for this step and append to frames list
frame = env.render()
Expand Down
2 changes: 1 addition & 1 deletion tutorials/AgileRL/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
agilerl>=0.1.13
agilerl>=0.1.14
pettingzoo[classic,atari,mpe]>=1.23.1
SuperSuit>=3.9.0
torch>=2.0.1
Expand Down
Loading