diff --git a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb index e5d3d45c8b..83aad09aba 100644 --- a/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb +++ b/colab/Colab_UnityEnvironment_4_SB3VectorEnv.ipynb @@ -161,8 +161,8 @@ "from pathlib import Path\n", "from typing import Callable, Any\n", "\n", - "import gym\n", - "from gym import Env\n", + "import gymnasium as gym\n", + "from gymnasium import Env\n", "\n", "from stable_baselines3 import PPO\n", "from stable_baselines3.common.vec_env import VecMonitor, VecEnv, SubprocVecEnv\n", diff --git a/docs/Installation-Anaconda-Windows.md b/docs/Installation-Anaconda-Windows.md index 3b80adbdf0..a06bfc2bd1 100644 --- a/docs/Installation-Anaconda-Windows.md +++ b/docs/Installation-Anaconda-Windows.md @@ -144,7 +144,7 @@ reinforcement learning trainers to use with Unity environments. The `ml-agents-envs` subdirectory contains a Python API to interface with Unity, which the `ml-agents` package depends on. -The `gym-unity` subdirectory contains a package to interface with OpenAI Gym. +The `gym-unity` subdirectory contains a package to interface with Gymnasium. Keep in mind where the files were downloaded, as you will need the trainer config files in this directory when running `mlagents-learn`. Make sure you are diff --git a/docs/ML-Agents-Overview.md b/docs/ML-Agents-Overview.md index 0bdee003f2..f379ceaf57 100644 --- a/docs/ML-Agents-Overview.md +++ b/docs/ML-Agents-Overview.md @@ -177,10 +177,10 @@ The ML-Agents Toolkit contains five high-level components: and options outlined in this document. The Python Trainers interface solely with the Python Low-Level API. - **Gym Wrapper** (not pictured). A common way in which machine learning - researchers interact with simulation environments is via a wrapper provided by - OpenAI called [gym](https://github.com/openai/gym). We provide a gym wrapper - in the `ml-agents-envs` package and [instructions](Python-Gym-API.md) for using - it with existing machine learning algorithms which utilize gym. + researchers interact with simulation environments is via a wrapper called + [gymnasium](https://github.com/Farama-Foundation/Gymnasium) (formally known as gym). We provide a gymn wrapper in the `ml-agents-envs` package and + [instructions](Python-Gym-API.md) for using it with existing machine learning + algorithms which utilize gym. - **PettingZoo Wrapper** (not pictured) PettingZoo is python API for interacting with multi-agent simulation environments that provides a gym-like interface. We provide a PettingZoo wrapper for Unity ML-Agents diff --git a/docs/Python-Gym-API.md b/docs/Python-Gym-API.md index 97869899ce..b9d500abfc 100644 --- a/docs/Python-Gym-API.md +++ b/docs/Python-Gym-API.md @@ -1,8 +1,8 @@ # Unity ML-Agents Gym Wrapper A common way in which machine learning researchers interact with simulation -environments is via a wrapper provided by OpenAI called `gym`. For more -information on the gym interface, see [here](https://github.com/openai/gym). +environments is via a wrapper provided by the Faram Foundation called `gymnasium` +(formally known as gym). For more information on the gym interface, see [here](https://github.com/Farama-Foundation/Gymnasium). We provide a gym wrapper and instructions for using it with existing machine learning algorithms which utilize gym. Our wrapper provides interfaces on top of @@ -93,7 +93,7 @@ observation, a single discrete action and a single Agent in the scene. Add the following code to the `train_unity.py` file: ```python -import gym +import gymnasium as gym from baselines import deepq from baselines import logger diff --git a/localized_docs/KR/docs/Installation-Anaconda-Windows.md b/localized_docs/KR/docs/Installation-Anaconda-Windows.md index ffe801ad77..eb25ae4ab5 100644 --- a/localized_docs/KR/docs/Installation-Anaconda-Windows.md +++ b/localized_docs/KR/docs/Installation-Anaconda-Windows.md @@ -112,7 +112,7 @@ git clone https://github.com/Unity-Technologies/ml-agents.git `ml-agents-envs` ���� ���丮���� `ml-agents` ��Ű���� ���ӵǴ� ����Ƽ�� �������̽��� ���� ���̽� API�� ���ԵǾ� �ֽ��ϴ�. -`gym-unity` ���� ���丮���� OpenAI Gym�� �������̽��� ���� ��Ű���� ���ԵǾ� �ֽ��ϴ�. +`gym-unity` ���� ���丮���� Gymnasium �� �������̽��� ���� ��Ű���� ���ԵǾ� �ֽ��ϴ�. `mlagents-learn`�� ������ �� Ʈ���̳��� ȯ�� ���� ������ �� ���丮 �ȿ� �ʿ��ϹǷ�, ������ �ٿ�ε� �� ���丮�� ��ġ�� ����Ͻʽÿ�. ���ͳ��� ����Ǿ����� Ȯ���ϰ� Anaconda ������Ʈ���� ���� ��ɾ Ÿ���� �Ͻʽÿ�t: diff --git a/localized_docs/KR/docs/Installation.md b/localized_docs/KR/docs/Installation.md index dc525b1f1f..525686d81d 100644 --- a/localized_docs/KR/docs/Installation.md +++ b/localized_docs/KR/docs/Installation.md @@ -36,7 +36,7 @@ git clone https://github.com/Unity-Technologies/ml-agents.git `ml-agents-envs` 하위 디렉토리에는 `ml-agents` 패키지에 종속되는 유니티의 인터페이스를 위한 파이썬 API가 포함되어 있습니다. -`gym-unity` 하위 디렉토리에는 OpenAI Gym의 인터페이스를 위한 패키지가 포함되어 있습니다. +`gym-unity` 하위 디렉토리에는 Gymnasium 의 인터페이스를 위한 패키지가 포함되어 있습니다. ### 파이썬과 mlagents 패키지 설치 diff --git "a/localized_docs/RU/docs/\320\243\321\201\321\202\320\260\320\275\320\276\320\262\320\272\320\260.md" "b/localized_docs/RU/docs/\320\243\321\201\321\202\320\260\320\275\320\276\320\262\320\272\320\260.md" index eaeaa1a7ed..6b2b7948d6 100644 --- "a/localized_docs/RU/docs/\320\243\321\201\321\202\320\260\320\275\320\276\320\262\320\272\320\260.md" +++ "b/localized_docs/RU/docs/\320\243\321\201\321\202\320\260\320\275\320\276\320\262\320\272\320\260.md" @@ -12,7 +12,7 @@ ML-Agents Toolkit состоит из нескольких компоненто API для взаимодействия с Unity сценой. Этот пакет управляет передачей данных между Unity сценой и алгоритмами машинного обучения, реализованных на Python. Пакет mlagents зависит от mlagents_envs. - ([`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/main/gym-unity)) - позволяет обернуть вашу сцену - в Unity в среду OpenAI Gym. + в Unity в среду Gymnasium. - Unity [Project](https://github.com/Unity-Technologies/ml-agents/tree/main/Project), содержащий [примеры сцены](https://github.com/Unity-Technologies/ml-agents/blob/main/docs/Learning-Environment-Examples.md), где реализованы различные возможности ML-Agents для наглядности. diff --git a/localized_docs/TR/docs/Installation.md b/localized_docs/TR/docs/Installation.md index 1fb8f5660a..675b090ca2 100644 --- a/localized_docs/TR/docs/Installation.md +++ b/localized_docs/TR/docs/Installation.md @@ -7,7 +7,7 @@ ML-Agents Araç Seti birkaç bileşen içermektedir: - [`mlagents`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/ml-agents) Unity sahnenizdeki davranışları eğitmenizi sağlayan makine öğrenimi algoritmalarını içerir. Bu nedenle `mlagents` paketini kurmanız gerekecek. - [`mlagents_envs`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/ml-agents-envs) Unity sahnesiyle etkileşime girmek için Python API içermektedir. Unity sahnesi ile Python makine öğrenimi algoritmaları arasında veri mesajlaşmasını kolaylaştıran temel bir katmandır. Sonuç olarak, `mlagents,` `mlagents_envs` apisine bağımlıdır. - - [`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/gym-unity) OpenAI Gym arayüzünü destekleyen Unity sahneniz için bir Python kapsayıcı sağlar. + - [`gym_unity`](https://github.com/Unity-Technologies/ml-agents/tree/release_7_docs/gym-unity) Gymnasium arayüzünü destekleyen Unity sahneniz için bir Python kapsayıcı sağlar. - Unity [Project](../Project/) klasörü [örnek ortamlar](Learning-Environment-Examples.md) ile başlamanıza yardımcı olacak araç setinin çeşitli özelliklerini vurgulayan sahneler içermektedir. diff --git a/ml-agents-envs/README.md b/ml-agents-envs/README.md index 4db68723d2..65ca44d24b 100644 --- a/ml-agents-envs/README.md +++ b/ml-agents-envs/README.md @@ -4,7 +4,7 @@ The `mlagents_envs` Python package is part of the [ML-Agents Toolkit](https://github.com/Unity-Technologies/ml-agents). `mlagents_envs` provides three Python APIs that allows direct interaction with the Unity game engine: -- A single agent API (Gym API) +- A single agent API (Gymnasium API) - A gym-like multi-agent API (PettingZoo API) - A low-level API (LLAPI) @@ -23,7 +23,7 @@ python -m pip install mlagents_envs==1.1.0 ## Usage & More Information See -- [Gym API Guide](../docs/Python-Gym-API.md) +- [Gymnasium API Guide](../docs/Python-Gym-API.md) - [PettingZoo API Guide](../docs/Python-PettingZoo-API.md) - [Python API Guide](../docs/Python-LLAPI.md) diff --git a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py index 4bb6fdf390..bccae65c0f 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_aec_env.py @@ -1,5 +1,5 @@ from typing import Any, Optional -from gym import error +from gymnasium import error from mlagents_envs.base_env import BaseEnv from pettingzoo import AECEnv diff --git a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py index df29a95c9a..3748ec812f 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_gym_env.py @@ -3,8 +3,8 @@ import numpy as np from typing import Any, Dict, List, Optional, Tuple, Union -import gym -from gym import error, spaces +import gymnasium as gym +from gymnasium import error, spaces from mlagents_envs.base_env import ActionTuple, BaseEnv from mlagents_envs.base_env import DecisionSteps, TerminalSteps @@ -20,7 +20,7 @@ class UnityGymException(error.Error): logger = logging_util.get_logger(__name__) -GymStepResult = Tuple[np.ndarray, float, bool, Dict] +GymStepResult = Tuple[np.ndarray, float, bool, bool, Dict] class UnityToGymWrapper(gym.Env): @@ -151,11 +151,21 @@ def __init__( else: self._observation_space = list_spaces[0] # only return the first one - def reset(self) -> Union[List[np.ndarray], np.ndarray]: + def reset(self, seed: Optional[int] = None, options: Optional[dict[str, Any]] = None) -> Union[Tuple[List[np.ndarray], Dict], Tuple[np.ndarray, Dict]]: """Resets the state of the environment and returns an initial observation. - Returns: observation (object/list): the initial observation of the + Args: + seed (int, optional): The seed for the environment. Note that this does not set the seed for the Unity Environment. + options (dict, optional): Optional dict containing options for the environment. (Currently not implemented) + Returns: + observation (object/list): the initial observation of the space. + info (dict): contains auxiliary diagnostic information. """ + if options is not None: + logger.warning("Options are currently unsupported.") + if seed is not None: + super().reset(seed=seed) + logger.warning("reset(seed) does not change the seed in the Unity Environment or the action space") self._env.reset() decision_step, _ = self._env.get_steps(self.name) n_agents = len(decision_step) @@ -163,26 +173,27 @@ def reset(self) -> Union[List[np.ndarray], np.ndarray]: self.game_over = False res: GymStepResult = self._single_step(decision_step) - return res[0] + return res[0], res[4] def step(self, action: List[Any]) -> GymStepResult: """Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. - Accepts an action and returns a tuple (observation, reward, done, info). + Accepts an action and returns a tuple (observation, reward, terminated, truncated, info). Args: action (object/list): an action provided by the environment Returns: observation (object/list): agent's observation of the current environment reward (float/list) : amount of reward returned after previous action - done (boolean/list): whether the episode has ended. + terminated (boolean/list): whether the episode has ended. + truncated (boolean/list): whether the episode was truncated. info (dict): contains auxiliary diagnostic information. """ if self.game_over: raise UnityGymException( "You are calling 'step()' even though this environment has already " - "returned done = True. You must always call 'reset()' once you " - "receive 'done = True'." + "returned terminated = True. You must always call 'reset()' once you " + "receive 'terminated = True'." ) if self._flattener is not None: # Translate action into list @@ -227,9 +238,9 @@ def _single_step(self, info: Union[DecisionSteps, TerminalSteps]) -> GymStepResu visual_obs = self._get_vis_obs_list(info) self.visual_obs = self._preprocess_single(visual_obs[0][0]) - done = isinstance(info, TerminalSteps) + terminated = isinstance(info, TerminalSteps) - return (default_observation, info.reward[0], done, {"step": info}) + return (default_observation, info.reward[0], terminated, False, {"step": info}) def _preprocess_single(self, single_visual_obs: np.ndarray) -> np.ndarray: if self.uint8_visual: @@ -290,13 +301,6 @@ def close(self) -> None: """ self._env.close() - def seed(self, seed: Any = None) -> None: - """Sets the seed for this env's random number generator(s). - Currently not implemented. - """ - logger.warning("Could not seed environment %s", self.name) - return - @staticmethod def _check_agents(n_agents: int) -> None: if n_agents > 1: diff --git a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py index 09398d27fa..2e67ec1276 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_parallel_env.py @@ -1,5 +1,5 @@ from typing import Optional, Dict, Any, Tuple -from gym import error +from gymnasium import error from mlagents_envs.base_env import BaseEnv from pettingzoo import ParallelEnv @@ -20,13 +20,13 @@ def __init__(self, env: BaseEnv, seed: Optional[int] = None): """ super().__init__(env, seed) - def reset(self) -> Dict[str, Any]: + def reset(self) -> Tuple[Dict[str, Any], Dict[str, Any]]: """ Resets the environment. """ super().reset() - return self._observations + return self._observations, self._infos def step(self, actions: Dict[str, Any]) -> Tuple: self._assert_loaded() @@ -50,4 +50,4 @@ def step(self, actions: Dict[str, Any]) -> Tuple: self._cleanup_agents() self._live_agents.sort() # unnecessary, only for passing API test - return self._observations, self._rewards, self._dones, self._infos + return self._observations, self._rewards, self._dones, False, self._infos diff --git a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py index 3457f18c88..c040050a2b 100644 --- a/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py +++ b/ml-agents-envs/mlagents_envs/envs/unity_pettingzoo_base_env.py @@ -1,7 +1,7 @@ import atexit from typing import Optional, List, Set, Dict, Any, Tuple import numpy as np -from gym import error, spaces +from gymnasium import error, spaces from mlagents_envs.base_env import BaseEnv, ActionTuple from mlagents_envs.envs.env_helpers import _agent_id_to_behavior, _unwrap_batch_steps diff --git a/ml-agents-envs/setup.py b/ml-agents-envs/setup.py index fcbee96151..71106311a9 100644 --- a/ml-agents-envs/setup.py +++ b/ml-agents-envs/setup.py @@ -58,8 +58,8 @@ def run(self): "Pillow>=4.2.1", "protobuf>=3.6,<3.21", "pyyaml>=3.1.0", - "gym>=0.21.0", - "pettingzoo==1.15.0", + "gymnasium", + "pettingzoo>=1.22.0", "numpy>=1.23.5,<1.24.0", "filelock>=3.4.0", ], diff --git a/ml-agents-envs/tests/test_gym.py b/ml-agents-envs/tests/test_gym.py index 4fc2bf548c..ae9327e8f8 100644 --- a/ml-agents-envs/tests/test_gym.py +++ b/ml-agents-envs/tests/test_gym.py @@ -2,7 +2,7 @@ import pytest import numpy as np -from gym import spaces +from gymnasium import spaces from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper from mlagents_envs.base_env import ( @@ -23,14 +23,17 @@ def test_gym_wrapper(): mock_env, mock_spec, mock_decision_step, mock_terminal_step ) env = UnityToGymWrapper(mock_env) - assert isinstance(env.reset(), np.ndarray) + reset_obs, reset_info = env.reset() + assert isinstance(reset_obs, np.ndarray) + assert isinstance(reset_info, dict) actions = env.action_space.sample() assert actions.shape[0] == 2 - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert env.observation_space.contains(obs) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) - assert isinstance(done, (bool, np.bool_)) + assert isinstance(term, (bool, np.bool_)) + assert isinstance(trunc, (bool, np.bool_)) assert isinstance(info, dict) @@ -108,14 +111,17 @@ def test_gym_wrapper_visual(use_uint8): env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8) assert isinstance(env.observation_space, spaces.Box) - assert isinstance(env.reset(), np.ndarray) + reset_obs, reset_info = env.reset() + assert isinstance(reset_obs, np.ndarray) + assert isinstance(reset_info, dict) actions = env.action_space.sample() assert actions.shape[0] == 2 - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert env.observation_space.contains(obs) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) - assert isinstance(done, (bool, np.bool_)) + assert isinstance(term, (bool, np.bool_)) + assert isinstance(trunc, (bool, np.bool_)) assert isinstance(info, dict) @@ -137,32 +143,35 @@ def test_gym_wrapper_single_visual_and_vector(use_uint8): env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=True) assert isinstance(env.observation_space, spaces.Tuple) assert len(env.observation_space) == 2 - reset_obs = env.reset() + reset_obs, reset_info = env.reset() assert isinstance(reset_obs, list) + assert isinstance(reset_info, dict) assert len(reset_obs) == 2 assert all(isinstance(ob, np.ndarray) for ob in reset_obs) assert reset_obs[-1].shape == (3,) assert len(reset_obs[0].shape) == 3 actions = env.action_space.sample() assert actions.shape == (2,) - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert isinstance(obs, list) assert len(obs) == 2 assert all(isinstance(ob, np.ndarray) for ob in obs) assert reset_obs[-1].shape == (3,) assert isinstance(rew, float) - assert isinstance(done, (bool, np.bool_)) + assert isinstance(term, (bool, np.bool_)) + assert isinstance(trunc, (bool, np.bool_)) assert isinstance(info, dict) # check behavior for allow_multiple_obs = False env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=False) assert isinstance(env.observation_space, spaces.Box) - reset_obs = env.reset() + reset_obs, reset_info = env.reset() assert isinstance(reset_obs, np.ndarray) + assert isinstance(reset_info, dict) assert len(reset_obs.shape) == 3 actions = env.action_space.sample() assert actions.shape == (2,) - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert isinstance(obs, np.ndarray) @@ -184,28 +193,31 @@ def test_gym_wrapper_multi_visual_and_vector(use_uint8): env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=True) assert isinstance(env.observation_space, spaces.Tuple) assert len(env.observation_space) == 3 - reset_obs = env.reset() + reset_obs, reset_info = env.reset() assert isinstance(reset_obs, list) + assert isinstance(reset_info, dict) assert len(reset_obs) == 3 assert all(isinstance(ob, np.ndarray) for ob in reset_obs) assert reset_obs[-1].shape == (3,) actions = env.action_space.sample() assert actions.shape == (2,) - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert all(isinstance(ob, np.ndarray) for ob in obs) assert isinstance(rew, float) - assert isinstance(done, (bool, np.bool_)) + assert isinstance(term, (bool, np.bool_)) + assert isinstance(trunc, (bool, np.bool_)) assert isinstance(info, dict) # check behavior for allow_multiple_obs = False env = UnityToGymWrapper(mock_env, uint8_visual=use_uint8, allow_multiple_obs=False) assert isinstance(env.observation_space, spaces.Box) - reset_obs = env.reset() + reset_obs, reset_info = env.reset() assert isinstance(reset_obs, np.ndarray) + assert isinstance(reset_info, dict) assert len(reset_obs.shape) == 3 actions = env.action_space.sample() assert actions.shape == (2,) - obs, rew, done, info = env.step(actions) + obs, rew, term, trunc, info = env.step(actions) assert isinstance(obs, np.ndarray) diff --git a/ml-agents/tests/yamato/scripts/run_gym.py b/ml-agents/tests/yamato/scripts/run_gym.py index 6d698a662b..f9fff52759 100644 --- a/ml-agents/tests/yamato/scripts/run_gym.py +++ b/ml-agents/tests/yamato/scripts/run_gym.py @@ -17,12 +17,14 @@ def test_run_environment(env_name): print(str(env)) # Reset the environment - initial_observations = env.reset() + initial_observations, initial_info = env.reset() if len(env.observation_space.shape) == 1: # Examine the initial vector observation print(f"Agent observations look like: \n{initial_observations}") + print(f"Agent info look like: \n{initial_info}") + for _episode in range(10): env.reset() done = False