opendilab · GuoPingPan · Mar 12, 2023 · Apr 10, 2023 · Apr 10, 2023 · Apr 22, 2023
diff --git a/.gitignore b/.gitignore
@@ -2,3 +2,4 @@
 *wandb
 *demo
 *.DS_Store
+temp.py
diff --git a/my_homework/.gitignore b/my_homework/.gitignore
@@ -0,0 +1,6 @@
+.idea
+DI-engine
+ding_study
+wandb
+output
+.data
diff --git a/my_homework/README.md b/my_homework/README.md
@@ -0,0 +1,15 @@
+# PPOxFamily
+this repository is to learn family algorithms of PPO.
+
+more details in: [https://github.com/opendilab/PPOxFamily](https://github.com/opendilab/PPOxFamily)
+
+<div align="center">
+    <a href="https://github.com/opendilab/PPOxFamily"><img width="500px" height="auto" src="https://github.com/opendilab/PPOxFamily/raw/main/assets/ppof_logo.png"></a>
+</div>
+
+## TODO
+- [x] finish all the code in class one to four
+- [x] correct notes
+- [x] theorical inference processes
+- [ ] 🆕class five
+
diff --git a/my_homework/ch2/homework2.ipynb b/my_homework/ch2/homework2.ipynb
diff --git a/my_homework/ch2/homework2_3.py b/my_homework/ch2/homework2_3.py
@@ -0,0 +1,56 @@
+# Please install latest DI-engine's main branch first
+# And we will release DI-engine v0.4.6 version with stable and tuned configuration of these demos.
+from ding.bonus import PPOF
+
+
+def lunarlander_discrete():
+    # Please install lunarlander env first, `pip3 install box2d`
+    agent = PPOF(env='lunarlander_discrete', exp_name='./lunarlander_discrete_demo')
+    agent.train(step=int(1e5))
+    # Classic RL interaction loop and save replay video
+    agent.deploy(enable_save_replay=True)
+
+
+def lunarlander_continuous():
+    # Please install lunarlander env first, `pip3 install box2d`
+    agent = PPOF(env='lunarlander_continuous', exp_name='./lunarlander_continuous_demo', seed=314)
+    agent.train(step=int(1e5))
+    # Batch (Vectorized) evaluation
+    agent.batch_evaluate(env_num=4, n_evaluator_episode=8)
+
+
+def rocket_landing():
+    # Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling`
+    agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo')
+    agent.train(step=int(5e6), context='spawn')
+
+def rocket_landing_deploy():
+    # Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling`
+    agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo')
+    agent.deploy(ckpt_path="rocket_landing_demo\ckpt\iteration_38400.pth.tar", enable_save_replay=True)
+
+
+def drone_fly():
+    # Please install gym_pybullet_drones env first, `pip3 install git+https://github.com/zjowowen/gym-pybullet-drones@master`
+    agent = PPOF(env='drone_fly', exp_name='./drone_fly_demo')
+    agent.train(step=int(5e6))
+
+
+def hybrid_moving():
+    # Please install gym_hybrid env first, refer to the doc `https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_hybrid_zh.html`
+    agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo')
+    agent.train(step=int(5e6))
+
+def hybrid_moving_deploy():
+    agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo')
+    agent.deploy(enable_save_replay=True)
+
+if __name__ == "__main__":
+    # You can select and run your favorite demo
+    # lunarlander_discrete()
+    # lunarlander_continuous()
+    # rocket_landing()
+    rocket_landing_deploy()
+    # drone_fly()
+    # hybrid_moving()
+    # hybrid_moving_deploy()
diff --git a/my_homework/ch3/homework3.ipynb b/my_homework/ch3/homework3.ipynb
diff --git a/my_homework/ch3/homework3_2.py b/my_homework/ch3/homework3_2.py
@@ -0,0 +1,68 @@
+# Please install latest DI-engine's main branch first
+from ding.bonus import PPOF
+
+
+def bipedalwalker():
+    # Please install bipedalwalker env first, `pip3 install box2d`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/bipedalwalker_zh.html) for more details
+    agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo')
+    agent.train(step=int(1e6))
+    # Classic RL interaction loop and save replay video
+def bipedalwalker_deploy():
+    agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo')
+    agent.deploy(ckpt_path="bipedalwalker_demo\ckpt\iteration_155680.pth.tar",enable_save_replay=True)
+
+
+def evogym_carrier():
+    # Please install evogym env first, refer to its doc (https://github.com/EvolutionGym/evogym#installation)
+    # Or you can use our provided docker (opendilab/ding:nightly-evogym)
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/Evogym_zh.html) for more details
+    agent = PPOF(env='evogym_carrier', exp_name='./evogym_carrier_demo')
+    agent.train(step=int(1e6))
+
+
+def mario():
+    # Please install mario env first, `pip install gym-super-mario-bros`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_super_mario_bros_zh.html) for more details
+    agent = PPOF(env='mario', exp_name='./mario_demo')
+    agent.train(step=int(3e6))
+
+def mario_deploy():
+    agent = PPOF(env='mario', exp_name='./mario_demo')
+    agent.deploy(enable_save_replay=True)
+
+
+def di_sheep():
+    # Please prepare di_sheep env and modelfirst, you can copy the env and model file to to current directory,
+    # which are placed in https://github.com/opendilab/DI-sheep/blob/master/service
+    from sheep_env import SheepEnv
+    from sheep_model import SheepModel
+    env = SheepEnv(level=9)
+    obs_space = env.observation_space
+    model = SheepModel(
+        item_obs_size=obs_space['item_obs'].shape[1],
+        item_num=obs_space['item_obs'].shape[0],
+        item_encoder_type='TF',
+        bucket_obs_size=obs_space['bucket_obs'].shape[0],
+        global_obs_size=obs_space['global_obs'].shape[0],
+        ttorch_return=True,
+    )
+    agent = PPOF(env='di_sheep', exp_name='./di_sheep_demo', model=model)
+    agent.train(step=int(1e6))
+
+
+def procgen_bigfish():
+    # Please install procgen env first, `pip install procgen`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/procgen_zh.html) for more details
+    agent = PPOF(env='procgen_bigfish', exp_name='./procgen_bigfish_demo')
+    agent.train(step=int(1e7))
+
+
+if __name__ == "__main__":
+    # You can select and run your favorite demo
+    # bipedalwalker()
+    bipedalwalker_deploy()
+    # evogym_carrier()
+    # mario()
+    # di_sheep()
+    # procgen_bigfish()
diff --git a/my_homework/ch4/home_analyse.ipynb b/my_homework/ch4/home_analyse.ipynb
@@ -0,0 +1,113 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CH4 "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## homework 1"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](demo/image1.png)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Result"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### small\n",
+    "![](demo/small.png)\n",
+    "#### little\n",
+    "![](demo/little.png)\n",
+    "#### standard\n",
+    "![](demo/standard.png)\n",
+    "#### large\n",
+    "![](demo/large.png)\n",
+    "#### very large\n",
+    "![](demo/verylarge.png)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Analyse"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "从上面不同模型大小的输出图像分析模型阐述 RND 网络的过拟合和欠拟合问题\n",
+    "\n",
+    "可以看到在模型参数较小的时候，整体的 RND 网络的 reward 是非常大的，说明模型随机性很大，出现欠拟合\n",
+    "\n",
+    "随着模型参数量的增加 mse 和 reward 都在往下降，但是当模型逐渐转变到 large 和 very large 的时候 reward_min 无法收敛，发生了过拟合"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## homework 2 "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](demo/image2.png)"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "由于没法制作视频，好像是 metadrive 需要的是 gym=0.19.0 的环境，我安装之后发现运行 deploy 成功但是没法记录视频，因此只能够上传 log"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "log_path = [run-20230312_134251-g1s73ewz]()"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/my_homework/ch4/homework4.py b/my_homework/ch4/homework4.py
@@ -0,0 +1,55 @@
+# Please install latest DI-engine's main branch first
+from ding.bonus import PPOF
+
+
+def acrobot():
+    # Please install acrobot env first, `pip3 install gym`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details
+    agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo')
+    agent.train(step=int(1e5))
+
+def acrobot_deploy():
+    # Please install acrobot env first, `pip3 install gym`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details
+    agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo')
+    agent.deploy(enable_save_replay=True)
+
+
+def metadrive():
+    # Please install metadrive env first, `pip install metadrive-simulator`
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/metadrive_zh.html) for more details
+    agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo')
+    agent.train(step=int(1e6), context='spawn')
+
+def metadrive_deploy():
+    agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo')
+    agent.deploy(enable_save_replay=True)
+
+def metadrive_install_test():
+    from metadrive import MetaDriveEnv
+    env = MetaDriveEnv()
+    obs = env.reset()
+    print(obs.shape)  # 输出 (259,)
+
+def minigrid_fourroom():
+    # Please install minigrid env first, `pip install gym-minigrid`
+    # Note: minigrid env doesn't support Windows platform
+    # You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/minigrid_zh.html) for more details
+    agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom')
+    agent.train(step=int(3e6))
+
+
+def minigrid_fourroom_deploy():
+    agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom')
+    agent.deploy(enable_save_replay=True)
+
+
+
+
+if __name__ == "__main__":
+    # acrobot()
+    # acrobot_deploy()
+    # metadrive_install_test()
+    metadrive()
+    # metadrive_deploy()
+    # minigrid_fourroom()