Skip to content

Commit

Permalink
Tasks remake (haosulab#195)
Browse files Browse the repository at this point in the history
* work

* sac impl

* update template

* add minimal pick cube task with "task sheet"

* task sheets

* work

* work

* work

* fix templates

* using torch rng

* use torch rng

* batched uniform sampler, stack cube v1 task

* fix bugs with scene masking, merge_actors function

* Update pick_single_ycb.py

* Update pick_single_ycb.py

* bug fixes

* stack cube task bug fix

* refactor, appending hidden_objects list allows handling of hiding e.g. goal sites

* WIP work

* Revert "WIP work"

This reverts commit 9a6710f.

* Revert "Revert "WIP work""

This reverts commit dcb3a5f.

* refactor some code, cache some properties, batch some properties

* fix naming of joints and links to also be prepended by scene-{i}

* bug fixes. Disable some functionality via a ._merged bool

* disable hand camera on panda (people should use the real sense panda config

* fix some setters

* work

* cache property for body data indices, it is now auto computed for user

* work

* change default value for static as resting objects still have a minor amount of angular rotation at times

* code updates

* decorator to assert error when gpu is initialized for calling functions user should not call.

* Update actor.py

* bug fixes

* work

* new utility functions for getting desired meshes, fixed goal site place

* performance improvements, swap obs and info

* Update __init__.py

* Update articulation.py

* Update pick_single_ycb.py

* update documentation

* tuned max episode steps for faster PPO

* use memory leak free new sapien and new buffer access api, fix controller reconfigure

* bug fixes, new tests

* test

* more bug fixes

* work

* add velocity target control gpu support. perf improvements for indexing articulation related things. Fix some bugs

* Update base_controller.py

* fix collision setting bugs

* apply vel

* fix self collision bugs

* fix gymnasium vector env wrapper to ignore the timelimit wrapper

* clean up

* fixes

* linkmerging

* bug fix

* work

* better camera angles

* work

* bug fixes

* code clean up with new wrappers
  • Loading branch information
StoneT2000 committed Jan 30, 2024
1 parent 0346890 commit a9ca4a4
Show file tree
Hide file tree
Showing 65 changed files with 1,589 additions and 1,576 deletions.
5 changes: 5 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ coverage run --source=mani_skill2/ -a -m pytest tests # run tests
coverage html --include=mani_skill2/**/*.py # see the test coverage results
```

To skip generating a coverage report and also for easy debugging you can just run
```
pytest tests/ --pdb --pdbcls=IPython.terminal.debugger:Pdb
```


## Building

Expand Down
11 changes: 9 additions & 2 deletions examples/baselines/ppo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)

```bash
python ppo.py --num_envs=512 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=100000000
python ppo.py --num_envs=1024 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
python ppo.py --num_envs=2048 --update_epochs=1 --num_minibatches=32 --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
```
python ppo.py --num_envs=1024 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
python ppo.py --num_envs=128 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="OpenCabinetDrawer-v1" --num-steps=100 --total_timesteps=50000000
```

1024, 100, max100: StackCube-v1__ppo__1__1706348323
1024, 50, max100: StackCube-v1__ppo__1__1706368801
1024, 50, max50: StackCube-v1__ppo__1__1706385843
2048, 25, max25: not a good idea
51 changes: 18 additions & 33 deletions examples/baselines/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,11 @@
import torch
import torch.nn as nn
import torch.optim as optim
import tqdm
import tyro
from torch.distributions.normal import Normal
from torch.utils.tensorboard import SummaryWriter

from mani_skill2.utils.visualization.misc import images_to_video, tile_images
# ManiSkill specific imports
import mani_skill2.envs
from mani_skill2.utils.wrappers.record import RecordEpisode
from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
Expand All @@ -38,7 +37,7 @@ class Args:
"""the entity (team) of wandb's project"""
capture_video: bool = True
"""whether to capture videos of the agent performances (check out `videos` folder)"""
save_model: bool = False
save_model: bool = True
"""whether to save model into the `runs/{run_name}` folder"""
upload_model: bool = False
"""whether to upload the saved model to huggingface"""
Expand All @@ -56,7 +55,7 @@ class Args:
"""the number of parallel environments"""
num_eval_envs: int = 8
"""the number of parallel evaluation environments"""
num_steps: int = 100
num_steps: int = 50
"""the number of steps to run in each environment per policy rollout"""
anneal_lr: bool = False
"""Toggle learning rate annealing for policy and value networks"""
Expand Down Expand Up @@ -212,6 +211,9 @@ def get_action_and_value(self, x, action=None):
action_space_low, action_space_high = torch.from_numpy(envs.single_action_space.low).to(device), torch.from_numpy(envs.single_action_space.high).to(device)
def clip_action(action: torch.Tensor):
return torch.clamp(action.detach(), action_space_low, action_space_high)

# model_path = "/home/stao/work/research/maniskill/ManiSkill2/examples/baselines/ppo/runs/StackCube-v1__ppo__1__1706294550/ppo_3076.cleanrl_model"
# agent.load_state_dict(torch.load(model_path))
for iteration in range(1, args.num_iterations + 1):
timeout_bonus = torch.zeros((args.num_steps, args.num_envs), device=device)
if iteration % args.eval_freq == 1:
Expand All @@ -224,12 +226,17 @@ def clip_action(action: torch.Tensor):
if eval_truncations.any():
eval_done = True
info = eval_infos["final_info"]
# print(info)
episodic_return = info['episode']['r'].mean().cpu().numpy()
print(f"eval_episodic_return={episodic_return}")
writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"], global_step)

writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
# exit()
if args.save_model and iteration % args.eval_freq == 1:
model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
torch.save(agent.state_dict(), model_path)
print(f"model saved to {model_path}")
# Annealing the rate if instructed to do so.
if args.anneal_lr:
frac = 1.0 - (iteration - 1.0) / args.num_iterations
Expand Down Expand Up @@ -266,7 +273,7 @@ def clip_action(action: torch.Tensor):
print(f"global_step={global_step}, episodic_return={episodic_return}")
writer.add_scalar("charts/success_rate", info["success"].float().mean().cpu().numpy(), global_step)
writer.add_scalar("charts/episodic_return", episodic_return, global_step)
writer.add_scalar("charts/episodic_length", info["elapsed_steps"], global_step)
writer.add_scalar("charts/episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
# bootstrap value if not done
with torch.no_grad():
next_value = agent.get_value(next_obs).reshape(1, -1)
Expand Down Expand Up @@ -361,32 +368,10 @@ def clip_action(action: torch.Tensor):
writer.add_scalar("losses/explained_variance", explained_var, global_step)
print("SPS:", int(global_step / (time.time() - start_time)))
writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)

# if args.save_model:
# model_path = f"runs/{run_name}/{args.exp_name}.cleanrl_model"
# torch.save(agent.state_dict(), model_path)
# print(f"model saved to {model_path}")
# from cleanrl_utils.evals.ppo_eval import evaluate

# episodic_returns = evaluate(
# model_path,
# make_env,
# args.env_id,
# eval_episodes=10,
# run_name=f"{run_name}-eval",
# Model=Agent,
# device=device,
# gamma=args.gamma,
# )
# for idx, episodic_return in enumerate(episodic_returns):
# writer.add_scalar("eval/episodic_return", episodic_return, idx)

# if args.upload_model:
# from cleanrl_utils.huggingface import push_to_hub

# repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
# repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
# push_to_hub(args, episodic_returns, repo_id, "PPO", f"runs/{run_name}", f"videos/{run_name}-eval")
if args.save_model:
model_path = f"runs/{run_name}/{args.exp_name}_final.cleanrl_model"
torch.save(agent.state_dict(), model_path)
print(f"model saved to {model_path}")

envs.close()
writer.close()
5 changes: 2 additions & 3 deletions examples/benchmarking/benchmark_gpu_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
def main(args):
num_envs = args.num_envs

sapien.physx.set_gpu_memory_config(found_lost_pairs_capacity=2**26, max_rigid_patch_count=120000)
sapien.physx.set_gpu_memory_config(found_lost_pairs_capacity=2**26, max_rigid_patch_count=2**19, max_rigid_contact_count=2**20)
env = gym.make(args.env_id, num_envs=num_envs, obs_mode=args.obs_mode, enable_shadow=True, render_mode=args.render_mode, control_mode="pd_joint_delta_pos", sim_freq=100, control_freq=50)
print(f"[INFO]: Gym observation space: {env.observation_space}")
print(f"[INFO]: Gym action space: {env.action_space}")

print(f"[INFO]: Gym action space: {env.unwrapped.single_action_space}")
images = []
video_nrows=int(np.sqrt(num_envs))
with torch.inference_mode():
Expand Down
4 changes: 3 additions & 1 deletion gpu_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,6 @@ all _load functions are "single-thread" mode for the most part if you use the Ma
all _initialize functions however are NOT "single-thread".


Parallel IK?
Parallel IK?

cannot set collision groups in gpu sim after links are built...
18 changes: 15 additions & 3 deletions mani_skill2/agents/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(

def initialize(self):
"""
Initialize the agent, which includes running _after_init() and initializing/restting the controller
Initialize the agent, which includes running _after_init() and initializing/resetting the controller
"""
self._after_init()
self.controller.reset()
Expand Down Expand Up @@ -168,6 +168,18 @@ def action_space(self):
else:
return self.controller.action_space

@property
def single_action_space(self):
if self._control_mode is None:
return spaces.Dict(
{
uid: controller.single_action_space
for uid, controller in self.controllers.items()
}
)
else:
return self.controller.single_action_space

def set_action(self, action):
"""
Set the agent's action which is to be executed in the next environment timestep
Expand Down Expand Up @@ -230,8 +242,8 @@ def reset(self, init_qpos=None):
"""
if init_qpos is not None:
self.robot.set_qpos(init_qpos)
self.robot.set_qvel(np.zeros(self.robot.dof))
self.robot.set_qf(np.zeros(self.robot.dof))
self.robot.set_qvel(np.zeros(self.robot.max_dof))
self.robot.set_qf(np.zeros(self.robot.max_dof))
self.set_control_mode(self._default_control_mode)

# -------------------------------------------------------------------------- #
Expand Down
65 changes: 42 additions & 23 deletions mani_skill2/agents/controllers/base_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,20 @@
import numpy as np
import sapien
import sapien.physx as physx
import torch
from gymnasium import spaces
from gymnasium.vector.utils import batch_space

from mani_skill2.agents.utils import (
flatten_action_spaces,
get_active_joint_indices,
get_active_joints,
get_joints_by_names,
)
from mani_skill2.envs.scene import ManiSkillScene
from mani_skill2.utils.common import clip_and_scale_action, normalize_action_space
from mani_skill2.utils.sapien_utils import to_tensor
from mani_skill2.utils.structs.articulation import Articulation
from mani_skill2.utils.structs.joint import Joint
from mani_skill2.utils.structs.types import Array


Expand All @@ -25,9 +27,14 @@ class BaseController:
The controller is an interface for the robot to interact with the environment.
"""

joints: List[physx.PhysxArticulationJoint] # active joints controlled
joint_indices: List[int] # indices of active joints controlled
joints: List[Joint] # active joints controlled
joint_indices: torch.Tensor # indices of active joints controlled
action_space: spaces.Space
"""the action space. If the number of parallel environments is > 1, this action space is also batched"""
single_action_space: spaces.Space
"""The unbatched version of the action space which is also typically already normalized by this class"""
_original_single_action_space: spaces.Space
"""The unbatched, original action space without any additional processing like normalization"""

def __init__(
self,
Expand All @@ -51,16 +58,26 @@ def __init__(

self._initialize_joints()
self._initialize_action_space()

# NOTE(jigu): It is intended not to be a required field in config.
self._normalize_action = getattr(self.config, "normalize_action", False)
if self._normalize_action:
self._clip_and_scale_action_space()

self.action_space = self.single_action_space
if self.scene.num_envs > 1:
self.action_space = batch_space(
self.single_action_space, n=self.scene.num_envs
)

@property
def device(self):
return self.articulation.device

def _initialize_joints(self):
joint_names = self.config.joint_names
try:
self.joints = get_active_joints(self.articulation, joint_names)
# We only track the joints we can control, the active ones.
self.joints = get_joints_by_names(self.articulation, joint_names)
self.joint_indices = get_active_joint_indices(
self.articulation, joint_names
)
Expand All @@ -74,9 +91,6 @@ def _initialize_joints(self):
def _initialize_action_space(self):
raise NotImplementedError

def _batch_action_space(self):
self.action_space = batch_space(self.action_space, n=self.scene.num_envs)

@property
def control_freq(self):
return self._control_freq
Expand All @@ -99,7 +113,7 @@ def set_drive_property(self):
raise NotImplementedError

def reset(self):
"""Called after switching the controller."""
"""Resets the controller by setting drive properties if needed"""
self.set_drive_property()

def _preprocess_action(self, action: Array):
Expand Down Expand Up @@ -137,9 +151,14 @@ def set_state(self, state: dict):
# Normalize action
# -------------------------------------------------------------------------- #
def _clip_and_scale_action_space(self):
self._action_space = self.action_space
self.action_space = normalize_action_space(self._action_space)
low, high = self._action_space.low, self._action_space.high
self._original_single_action_space = self.single_action_space
self.single_action_space = normalize_action_space(
self._original_single_action_space
)
low, high = (
self._original_single_action_space.low,
self._original_single_action_space.high,
)
self.action_space_low = to_tensor(low)
self.action_space_high = to_tensor(high)

Expand Down Expand Up @@ -181,19 +200,24 @@ def __init__(
self.controllers[uid] = config.controller_cls(
config, articulation, control_freq, sim_freq=sim_freq, scene=scene
)

self._initialize_action_space()
self._initialize_joints()
self._assert_fully_actuated()

self.action_space = self.single_action_space
if self.scene.num_envs > 1:
self.action_space = batch_space(
self.single_action_space, n=self.scene.num_envs
)

def _initialize_action_space(self):
# Explicitly create a list of key-value tuples
# Otherwise, spaces.Dict will sort keys if a dict is provided
named_spaces = [
(uid, controller.action_space)
(uid, controller.single_action_space)
for uid, controller in self.controllers.items()
]
self.action_space = spaces.Dict(named_spaces)
self.single_action_space = spaces.Dict(named_spaces)

def _initialize_joints(self):
self.joints = []
Expand Down Expand Up @@ -233,7 +257,7 @@ def before_simulation_step(self):
gravity=True, coriolis_and_centrifugal=True
)
else:
qf = np.zeros(self.articulation.dof)
qf = np.zeros(self.articulation.max_dof)
for controller in self.controllers.values():
ret = controller.before_simulation_step()
if ret is not None and "qf" in ret:
Expand All @@ -258,15 +282,10 @@ class CombinedController(DictController):

def _initialize_action_space(self):
super()._initialize_action_space()
self.action_space, self.action_mapping = flatten_action_spaces(
self.action_space.spaces
self.single_action_space, self.action_mapping = flatten_action_spaces(
self.single_action_space.spaces
)

def _batch_action_space(self):
for controller in self.controllers.values():
controller._batch_action_space()
super()._batch_action_space()

def set_action(self, action: np.ndarray):
# Sanity check
# TODO (stao): optimization, do we really need this sanity check? Does gymnasium already do this for us
Expand Down
5 changes: 3 additions & 2 deletions mani_skill2/agents/controllers/passive_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ def set_drive_property(self):
joint.set_friction(friction[i])

def _initialize_action_space(self):
# Can not be printed
self.action_space = spaces.Box(np.empty(0), np.empty(0), dtype=np.float32)
self.single_action_space = spaces.Box(
np.empty(0), np.empty(0), dtype=np.float32
)

def set_action(self, action: np.ndarray):
pass
Expand Down
4 changes: 3 additions & 1 deletion mani_skill2/agents/controllers/pd_base_vel.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ def set_action(self, action: Array):
rot_mat[:, 1, 1] = torch.cos(ori)
vel = (rot_mat @ action[:, :2].float().unsqueeze(-1)).squeeze(-1)
new_action = torch.hstack([vel, action[:, 2:]])
self.articulation.set_joint_drive_velocity_targets(new_action, self.joints)
self.articulation.set_joint_drive_velocity_targets(
new_action, self.joints, self.joint_indices
)


class PDBaseVelControllerConfig(PDJointVelControllerConfig):
Expand Down
Loading

0 comments on commit a9ca4a4

Please sign in to comment.