Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hw_submission(pgp): add hw6_20230429 #73

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
*wandb
*demo
*.DS_Store
temp.py
6 changes: 6 additions & 0 deletions my_homework/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
.idea
DI-engine
ding_study
wandb
output
.data
15 changes: 15 additions & 0 deletions my_homework/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# PPOxFamily
this repository is to learn family algorithms of PPO.

more details in: [https://github.com/opendilab/PPOxFamily](https://github.com/opendilab/PPOxFamily)

<div align="center">
<a href="https://github.com/opendilab/PPOxFamily"><img width="500px" height="auto" src="https://github.com/opendilab/PPOxFamily/raw/main/assets/ppof_logo.png"></a>
</div>

## TODO
- [x] finish all the code in class one to four
- [x] correct notes
- [x] theorical inference processes
- [ ] 🆕class five

307 changes: 307 additions & 0 deletions my_homework/ch2/homework2.ipynb

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions my_homework/ch2/homework2_3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Please install latest DI-engine's main branch first
# And we will release DI-engine v0.4.6 version with stable and tuned configuration of these demos.
from ding.bonus import PPOF


def lunarlander_discrete():
# Please install lunarlander env first, `pip3 install box2d`
agent = PPOF(env='lunarlander_discrete', exp_name='./lunarlander_discrete_demo')
agent.train(step=int(1e5))
# Classic RL interaction loop and save replay video
agent.deploy(enable_save_replay=True)


def lunarlander_continuous():
# Please install lunarlander env first, `pip3 install box2d`
agent = PPOF(env='lunarlander_continuous', exp_name='./lunarlander_continuous_demo', seed=314)
agent.train(step=int(1e5))
# Batch (Vectorized) evaluation
agent.batch_evaluate(env_num=4, n_evaluator_episode=8)


def rocket_landing():
# Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling`
agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo')
agent.train(step=int(5e6), context='spawn')

def rocket_landing_deploy():
# Please install rocket env first, `pip3 install git+https://github.com/nighood/rocket-recycling@master#egg=rocket_recycling`
agent = PPOF(env='rocket_landing', exp_name='./rocket_landing_demo')
agent.deploy(ckpt_path="rocket_landing_demo\ckpt\iteration_38400.pth.tar", enable_save_replay=True)


def drone_fly():
# Please install gym_pybullet_drones env first, `pip3 install git+https://github.com/zjowowen/gym-pybullet-drones@master`
agent = PPOF(env='drone_fly', exp_name='./drone_fly_demo')
agent.train(step=int(5e6))


def hybrid_moving():
# Please install gym_hybrid env first, refer to the doc `https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_hybrid_zh.html`
agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo')
agent.train(step=int(5e6))

def hybrid_moving_deploy():
agent = PPOF(env='hybrid_moving', exp_name='./hybrid_moving_demo')
agent.deploy(enable_save_replay=True)

if __name__ == "__main__":
# You can select and run your favorite demo
# lunarlander_discrete()
# lunarlander_continuous()
# rocket_landing()
rocket_landing_deploy()
# drone_fly()
# hybrid_moving()
# hybrid_moving_deploy()
459 changes: 459 additions & 0 deletions my_homework/ch3/homework3.ipynb

Large diffs are not rendered by default.

68 changes: 68 additions & 0 deletions my_homework/ch3/homework3_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Please install latest DI-engine's main branch first
from ding.bonus import PPOF


def bipedalwalker():
# Please install bipedalwalker env first, `pip3 install box2d`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/bipedalwalker_zh.html) for more details
agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo')
agent.train(step=int(1e6))
# Classic RL interaction loop and save replay video
def bipedalwalker_deploy():
agent = PPOF(env='bipedalwalker', exp_name='./bipedalwalker_demo')
agent.deploy(ckpt_path="bipedalwalker_demo\ckpt\iteration_155680.pth.tar",enable_save_replay=True)


def evogym_carrier():
# Please install evogym env first, refer to its doc (https://github.com/EvolutionGym/evogym#installation)
# Or you can use our provided docker (opendilab/ding:nightly-evogym)
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/Evogym_zh.html) for more details
agent = PPOF(env='evogym_carrier', exp_name='./evogym_carrier_demo')
agent.train(step=int(1e6))


def mario():
# Please install mario env first, `pip install gym-super-mario-bros`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/gym_super_mario_bros_zh.html) for more details
agent = PPOF(env='mario', exp_name='./mario_demo')
agent.train(step=int(3e6))

def mario_deploy():
agent = PPOF(env='mario', exp_name='./mario_demo')
agent.deploy(enable_save_replay=True)


def di_sheep():
# Please prepare di_sheep env and modelfirst, you can copy the env and model file to to current directory,
# which are placed in https://github.com/opendilab/DI-sheep/blob/master/service
from sheep_env import SheepEnv
from sheep_model import SheepModel
env = SheepEnv(level=9)
obs_space = env.observation_space
model = SheepModel(
item_obs_size=obs_space['item_obs'].shape[1],
item_num=obs_space['item_obs'].shape[0],
item_encoder_type='TF',
bucket_obs_size=obs_space['bucket_obs'].shape[0],
global_obs_size=obs_space['global_obs'].shape[0],
ttorch_return=True,
)
agent = PPOF(env='di_sheep', exp_name='./di_sheep_demo', model=model)
agent.train(step=int(1e6))


def procgen_bigfish():
# Please install procgen env first, `pip install procgen`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/procgen_zh.html) for more details
agent = PPOF(env='procgen_bigfish', exp_name='./procgen_bigfish_demo')
agent.train(step=int(1e7))


if __name__ == "__main__":
# You can select and run your favorite demo
# bipedalwalker()
bipedalwalker_deploy()
# evogym_carrier()
# mario()
# di_sheep()
# procgen_bigfish()
113 changes: 113 additions & 0 deletions my_homework/ch4/home_analyse.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# CH4 "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## homework 1"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"![](demo/image1.png)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Result"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### small\n",
"![](demo/small.png)\n",
"#### little\n",
"![](demo/little.png)\n",
"#### standard\n",
"![](demo/standard.png)\n",
"#### large\n",
"![](demo/large.png)\n",
"#### very large\n",
"![](demo/verylarge.png)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Analyse"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"从上面不同模型大小的输出图像分析模型阐述 RND 网络的过拟合和欠拟合问题\n",
"\n",
"可以看到在模型参数较小的时候,整体的 RND 网络的 reward 是非常大的,说明模型随机性很大,出现欠拟合\n",
"\n",
"随着模型参数量的增加 mse 和 reward 都在往下降,但是当模型逐渐转变到 large 和 very large 的时候 reward_min 无法收敛,发生了过拟合"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## homework 2 "
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"![](demo/image2.png)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"由于没法制作视频,好像是 metadrive 需要的是 gym=0.19.0 的环境,我安装之后发现运行 deploy 成功但是没法记录视频,因此只能够上传 log"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"log_path = [run-20230312_134251-g1s73ewz]()"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
55 changes: 55 additions & 0 deletions my_homework/ch4/homework4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Please install latest DI-engine's main branch first
from ding.bonus import PPOF


def acrobot():
# Please install acrobot env first, `pip3 install gym`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details
agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo')
agent.train(step=int(1e5))

def acrobot_deploy():
# Please install acrobot env first, `pip3 install gym`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/acrobot_zh.html) for more details
agent = PPOF(env='acrobot', exp_name='output/ch4/acrobot_demo')
agent.deploy(enable_save_replay=True)


def metadrive():
# Please install metadrive env first, `pip install metadrive-simulator`
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/metadrive_zh.html) for more details
agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo')
agent.train(step=int(1e6), context='spawn')

def metadrive_deploy():
agent = PPOF(env='metadrive', exp_name='output/ch4/metadrive_demo')
agent.deploy(enable_save_replay=True)

def metadrive_install_test():
from metadrive import MetaDriveEnv
env = MetaDriveEnv()
obs = env.reset()
print(obs.shape) # 输出 (259,)

def minigrid_fourroom():
# Please install minigrid env first, `pip install gym-minigrid`
# Note: minigrid env doesn't support Windows platform
# You can refer to the env doc (https://di-engine-docs.readthedocs.io/zh_CN/latest/13_envs/minigrid_zh.html) for more details
agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom')
agent.train(step=int(3e6))


def minigrid_fourroom_deploy():
agent = PPOF(env='minigrid_fourroom', exp_name='output/ch4/minigrid_fourroom')
agent.deploy(enable_save_replay=True)




if __name__ == "__main__":
# acrobot()
# acrobot_deploy()
# metadrive_install_test()
metadrive()
# metadrive_deploy()
# minigrid_fourroom()
Loading