diff --git a/docs/code_examples/aec_rps.py b/docs/code_examples/aec_rps.py index 7272f75bd..4c318a40c 100644 --- a/docs/code_examples/aec_rps.py +++ b/docs/code_examples/aec_rps.py @@ -1,5 +1,3 @@ -import functools - import gymnasium import numpy as np from gymnasium.spaces import Discrete @@ -75,26 +73,17 @@ def __init__(self, render_mode=None): zip(self.possible_agents, list(range(len(self.possible_agents)))) ) - # optional: we can define the observation and action spaces here as attributes to be used in their corresponding methods - self._action_spaces = {agent: Discrete(3) for agent in self.possible_agents} + # we want to define the spaces as fixed objects so we can seed them self._observation_spaces = { agent: Discrete(4) for agent in self.possible_agents } - self.render_mode = render_mode + self._action_spaces = {agent: Discrete(3) for agent in self.possible_agents} - # Observation space should be defined here. - # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def observation_space(self, agent): - # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ - return Discrete(4) - - # Action space should be defined here. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def action_space(self, agent): - return Discrete(3) + # observation and action spaces are defined as functions which take in an agent id + # and returns the relevant spaces. + self.observation_space = lambda agent: self._observation_spaces[agent] + self.action_space = lambda agent: self._action_spaces[agent] + self.render_mode = render_mode def render(self): """ diff --git a/docs/code_examples/parallel_rps.py b/docs/code_examples/parallel_rps.py index bf634af10..7f292fb60 100644 --- a/docs/code_examples/parallel_rps.py +++ b/docs/code_examples/parallel_rps.py @@ -1,5 +1,3 @@ -import functools - import gymnasium import numpy as np from gymnasium.spaces import Discrete @@ -76,7 +74,6 @@ def __init__(self, render_mode=None): self.agent_name_mapping = dict( zip(self.possible_agents, list(range(len(self.possible_agents)))) ) - self.render_mode = render_mode # Observation space should be defined here. # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. diff --git a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py index 6096e0ea8..7720b0588 100644 --- a/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py +++ b/tutorials/CustomEnvironment/tutorial2_adding_game_logic.py @@ -1,4 +1,3 @@ -import functools import random from copy import copy @@ -43,6 +42,13 @@ def __init__(self): self.timestep = None self.possible_agents = ["prisoner", "guard"] + self._observation_spaces = { + agent: MultiDiscrete([7 * 7] * 3) for agent in self.possible_agents + } + self.observation_space = lambda agent: self._observation_spaces[agent] + self._action_spaces = {agent: Discrete(4) for agent in self.possible_agents} + self.action_space = lambda agent: self._action_spaces[agent] + def reset(self, seed=None, options=None): """Reset set the environment to a starting point. @@ -162,17 +168,3 @@ def render(self): grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") - - # Observation space should be defined here. - # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def observation_space(self, agent): - # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ - return MultiDiscrete([7 * 7] * 3) - - # Action space should be defined here. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def action_space(self, agent): - return Discrete(4) diff --git a/tutorials/CustomEnvironment/tutorial3_action_masking.py b/tutorials/CustomEnvironment/tutorial3_action_masking.py index 24676373f..dc84876d8 100644 --- a/tutorials/CustomEnvironment/tutorial3_action_masking.py +++ b/tutorials/CustomEnvironment/tutorial3_action_masking.py @@ -1,4 +1,3 @@ -import functools import random from copy import copy @@ -43,6 +42,13 @@ def __init__(self): self.timestep = None self.possible_agents = ["prisoner", "guard"] + self._observation_spaces = { + agent: MultiDiscrete([7 * 7 - 1] * 3) for agent in self.possible_agents + } + self.observation_space = lambda agent: self._observation_spaces[agent] + self._action_spaces = {agent: Discrete(4) for agent in self.possible_agents} + self.action_space = lambda agent: self._action_spaces[agent] + def reset(self, seed=None, options=None): """Reset set the environment to a starting point. @@ -198,17 +204,3 @@ def render(self): grid[self.guard_y, self.guard_x] = "G" grid[self.escape_y, self.escape_x] = "E" print(f"{grid} \n") - - # Observation space should be defined here. - # lru_cache allows observation and action spaces to be memoized, reducing clock cycles required to get each agent's space. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def observation_space(self, agent): - # gymnasium spaces are defined and documented here: https://gymnasium.farama.org/api/spaces/ - return MultiDiscrete([7 * 7 - 1] * 3) - - # Action space should be defined here. - # If your spaces change over time, remove this line (disable caching). - @functools.lru_cache(maxsize=None) - def action_space(self, agent): - return Discrete(4)