Tasks remake (haosulab#195)

* work * sac impl * update template * add minimal pick cube task with "task sheet" * task sheets * work * work * work * fix templates * using torch rng * use torch rng * batched uniform sampler, stack cube v1 task * fix bugs with scene masking, merge_actors function * Update pick_single_ycb.py * Update pick_single_ycb.py * bug fixes * stack cube task bug fix * refactor, appending hidden_objects list allows handling of hiding e.g. goal sites * WIP work * Revert "WIP work" This reverts commit 9a6710f. * Revert "Revert "WIP work"" This reverts commit dcb3a5f. * refactor some code, cache some properties, batch some properties * fix naming of joints and links to also be prepended by scene-{i} * bug fixes. Disable some functionality via a ._merged bool * disable hand camera on panda (people should use the real sense panda config * fix some setters * work * cache property for body data indices, it is now auto computed for user * work * change default value for static as resting objects still have a minor amount of angular rotation at times * code updates * decorator to assert error when gpu is initialized for calling functions user should not call. * Update actor.py * bug fixes * work * new utility functions for getting desired meshes, fixed goal site place * performance improvements, swap obs and info * Update __init__.py * Update articulation.py * Update pick_single_ycb.py * update documentation * tuned max episode steps for faster PPO * use memory leak free new sapien and new buffer access api, fix controller reconfigure * bug fixes, new tests * test * more bug fixes * work * add velocity target control gpu support. perf improvements for indexing articulation related things. Fix some bugs * Update base_controller.py * fix collision setting bugs * apply vel * fix self collision bugs * fix gymnasium vector env wrapper to ignore the timelimit wrapper * clean up * fixes * linkmerging * bug fix * work * better camera angles * work * bug fixes * code clean up with new wrappers
itsygao · Jan 30, 2024 · a9ca4a4 · a9ca4a4
1 parent 0346890
commit a9ca4a4
Show file tree

Hide file tree

Showing 65 changed files with 1,589 additions and 1,576 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -33,6 +33,11 @@ coverage run --source=mani_skill2/ -a -m pytest tests # run tests
 coverage html --include=mani_skill2/**/*.py # see the test coverage results
 ```
 
+To skip generating a coverage report and also for easy debugging you can just run
+```
+pytest tests/ --pdb --pdbcls=IPython.terminal.debugger:Pdb
+```
+
 
 ## Building
 

diff --git a/examples/baselines/ppo/README.md b/examples/baselines/ppo/README.md
@@ -3,6 +3,13 @@
 Code adapted from [CleanRL](https://github.com/vwxyzjn/cleanrl/)
 
 ```bash
-python ppo.py --num_envs=512 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=100000000
+python ppo.py --num_envs=1024 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="PickCube-v1" --total_timesteps=50000000
 python ppo.py --num_envs=2048 --update_epochs=1 --num_minibatches=32  --env_id="PushCube-v1" --total_timesteps=100000000 --num-steps=12
-```
+python ppo.py --num_envs=1024 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="StackCube-v1" --total_timesteps=100000000
+python ppo.py --num_envs=128 --update_epochs=8 --target_kl=0.1 --num_minibatches=32 --env_id="OpenCabinetDrawer-v1" --num-steps=100 --total_timesteps=50000000
+```
+
+1024, 100, max100: StackCube-v1__ppo__1__1706348323
+1024, 50, max100: StackCube-v1__ppo__1__1706368801
+1024, 50, max50: StackCube-v1__ppo__1__1706385843
+2048, 25, max25: not a good idea
diff --git a/examples/baselines/ppo/ppo.py b/examples/baselines/ppo/ppo.py
@@ -10,12 +10,11 @@
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import tqdm
 import tyro
 from torch.distributions.normal import Normal
 from torch.utils.tensorboard import SummaryWriter
 
-from mani_skill2.utils.visualization.misc import images_to_video, tile_images
+# ManiSkill specific imports
 import mani_skill2.envs
 from mani_skill2.utils.wrappers.record import RecordEpisode
 from mani_skill2.vector.wrappers.gymnasium import ManiSkillVectorEnv
@@ -38,7 +37,7 @@ class Args:
     """the entity (team) of wandb's project"""
     capture_video: bool = True
     """whether to capture videos of the agent performances (check out `videos` folder)"""
-    save_model: bool = False
+    save_model: bool = True
     """whether to save model into the `runs/{run_name}` folder"""
     upload_model: bool = False
     """whether to upload the saved model to huggingface"""
@@ -56,7 +55,7 @@ class Args:
     """the number of parallel environments"""
     num_eval_envs: int = 8
     """the number of parallel evaluation environments"""
-    num_steps: int = 100
+    num_steps: int = 50
     """the number of steps to run in each environment per policy rollout"""
     anneal_lr: bool = False
     """Toggle learning rate annealing for policy and value networks"""
@@ -212,6 +211,9 @@ def get_action_and_value(self, x, action=None):
     action_space_low, action_space_high = torch.from_numpy(envs.single_action_space.low).to(device), torch.from_numpy(envs.single_action_space.high).to(device)
     def clip_action(action: torch.Tensor):
         return torch.clamp(action.detach(), action_space_low, action_space_high)
+
+    # model_path = "/home/stao/work/research/maniskill/ManiSkill2/examples/baselines/ppo/runs/StackCube-v1__ppo__1__1706294550/ppo_3076.cleanrl_model"
+    # agent.load_state_dict(torch.load(model_path))
     for iteration in range(1, args.num_iterations + 1):
         timeout_bonus = torch.zeros((args.num_steps, args.num_envs), device=device)
         if iteration % args.eval_freq == 1:
@@ -224,12 +226,17 @@ def clip_action(action: torch.Tensor):
                 if eval_truncations.any():
                     eval_done = True
             info = eval_infos["final_info"]
+            # print(info)
             episodic_return = info['episode']['r'].mean().cpu().numpy()
             print(f"eval_episodic_return={episodic_return}")
             writer.add_scalar("charts/eval_success_rate", info["success"].float().mean().cpu().numpy(), global_step)
             writer.add_scalar("charts/eval_episodic_return", episodic_return, global_step)
-            writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"], global_step)
-
+            writer.add_scalar("charts/eval_episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
+        # exit()
+        if args.save_model and iteration % args.eval_freq == 1:
+            model_path = f"runs/{run_name}/{args.exp_name}_{iteration}.cleanrl_model"
+            torch.save(agent.state_dict(), model_path)
+            print(f"model saved to {model_path}")
         # Annealing the rate if instructed to do so.
         if args.anneal_lr:
             frac = 1.0 - (iteration - 1.0) / args.num_iterations
@@ -266,7 +273,7 @@ def clip_action(action: torch.Tensor):
                 print(f"global_step={global_step}, episodic_return={episodic_return}")
                 writer.add_scalar("charts/success_rate", info["success"].float().mean().cpu().numpy(), global_step)
                 writer.add_scalar("charts/episodic_return", episodic_return, global_step)
-                writer.add_scalar("charts/episodic_length", info["elapsed_steps"], global_step)
+                writer.add_scalar("charts/episodic_length", info["elapsed_steps"].float().mean().cpu().numpy(), global_step)
         # bootstrap value if not done
         with torch.no_grad():
             next_value = agent.get_value(next_obs).reshape(1, -1)
@@ -361,32 +368,10 @@ def clip_action(action: torch.Tensor):
         writer.add_scalar("losses/explained_variance", explained_var, global_step)
         print("SPS:", int(global_step / (time.time() - start_time)))
         writer.add_scalar("charts/SPS", int(global_step / (time.time() - start_time)), global_step)
-
-    # if args.save_model:
-    #     model_path = f"runs/{run_name}/{args.exp_name}.cleanrl_model"
-    #     torch.save(agent.state_dict(), model_path)
-    #     print(f"model saved to {model_path}")
-    #     from cleanrl_utils.evals.ppo_eval import evaluate
-
-    #     episodic_returns = evaluate(
-    #         model_path,
-    #         make_env,
-    #         args.env_id,
-    #         eval_episodes=10,
-    #         run_name=f"{run_name}-eval",
-    #         Model=Agent,
-    #         device=device,
-    #         gamma=args.gamma,
-    #     )
-    #     for idx, episodic_return in enumerate(episodic_returns):
-    #         writer.add_scalar("eval/episodic_return", episodic_return, idx)
-
-    #     if args.upload_model:
-    #         from cleanrl_utils.huggingface import push_to_hub
-
-    #         repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
-    #         repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
-    #         push_to_hub(args, episodic_returns, repo_id, "PPO", f"runs/{run_name}", f"videos/{run_name}-eval")
+    if args.save_model:
+        model_path = f"runs/{run_name}/{args.exp_name}_final.cleanrl_model"
+        torch.save(agent.state_dict(), model_path)
+        print(f"model saved to {model_path}")
 
     envs.close()
     writer.close()
diff --git a/examples/benchmarking/benchmark_gpu_sim.py b/examples/benchmarking/benchmark_gpu_sim.py
@@ -16,11 +16,10 @@
 def main(args):
     num_envs = args.num_envs
 
-    sapien.physx.set_gpu_memory_config(found_lost_pairs_capacity=2**26, max_rigid_patch_count=120000)
+    sapien.physx.set_gpu_memory_config(found_lost_pairs_capacity=2**26, max_rigid_patch_count=2**19, max_rigid_contact_count=2**20)
     env = gym.make(args.env_id, num_envs=num_envs, obs_mode=args.obs_mode, enable_shadow=True, render_mode=args.render_mode, control_mode="pd_joint_delta_pos", sim_freq=100, control_freq=50)
     print(f"[INFO]: Gym observation space: {env.observation_space}")
-    print(f"[INFO]: Gym action space: {env.action_space}")
-
+    print(f"[INFO]: Gym action space: {env.unwrapped.single_action_space}")
     images = []
     video_nrows=int(np.sqrt(num_envs))
     with torch.inference_mode():

diff --git a/gpu_notes.md b/gpu_notes.md
@@ -53,4 +53,6 @@ all _load functions are "single-thread" mode for the most part if you use the Ma
 all _initialize functions however are NOT "single-thread".
 
 
-Parallel IK?
+Parallel IK?
+
+cannot set collision groups in gpu sim after links are built...
diff --git a/mani_skill2/agents/base_agent.py b/mani_skill2/agents/base_agent.py
@@ -75,7 +75,7 @@ def __init__(
 
     def initialize(self):
         """
-        Initialize the agent, which includes running _after_init() and initializing/restting the controller
+        Initialize the agent, which includes running _after_init() and initializing/resetting the controller
         """
         self._after_init()
         self.controller.reset()
@@ -168,6 +168,18 @@ def action_space(self):
         else:
             return self.controller.action_space
 
+    @property
+    def single_action_space(self):
+        if self._control_mode is None:
+            return spaces.Dict(
+                {
+                    uid: controller.single_action_space
+                    for uid, controller in self.controllers.items()
+                }
+            )
+        else:
+            return self.controller.single_action_space
+
     def set_action(self, action):
         """
         Set the agent's action which is to be executed in the next environment timestep
@@ -230,8 +242,8 @@ def reset(self, init_qpos=None):
         """
         if init_qpos is not None:
             self.robot.set_qpos(init_qpos)
-        self.robot.set_qvel(np.zeros(self.robot.dof))
-        self.robot.set_qf(np.zeros(self.robot.dof))
+        self.robot.set_qvel(np.zeros(self.robot.max_dof))
+        self.robot.set_qf(np.zeros(self.robot.max_dof))
         self.set_control_mode(self._default_control_mode)
 
     # -------------------------------------------------------------------------- #

diff --git a/mani_skill2/agents/controllers/base_controller.py b/mani_skill2/agents/controllers/base_controller.py
@@ -5,18 +5,20 @@
 import numpy as np
 import sapien
 import sapien.physx as physx
+import torch
 from gymnasium import spaces
 from gymnasium.vector.utils import batch_space
 
 from mani_skill2.agents.utils import (
     flatten_action_spaces,
     get_active_joint_indices,
-    get_active_joints,
+    get_joints_by_names,
 )
 from mani_skill2.envs.scene import ManiSkillScene
 from mani_skill2.utils.common import clip_and_scale_action, normalize_action_space
 from mani_skill2.utils.sapien_utils import to_tensor
 from mani_skill2.utils.structs.articulation import Articulation
+from mani_skill2.utils.structs.joint import Joint
 from mani_skill2.utils.structs.types import Array
 
 
@@ -25,9 +27,14 @@ class BaseController:
     The controller is an interface for the robot to interact with the environment.
     """
 
-    joints: List[physx.PhysxArticulationJoint]  # active joints controlled
-    joint_indices: List[int]  # indices of active joints controlled
+    joints: List[Joint]  # active joints controlled
+    joint_indices: torch.Tensor  # indices of active joints controlled
     action_space: spaces.Space
+    """the action space. If the number of parallel environments is > 1, this action space is also batched"""
+    single_action_space: spaces.Space
+    """The unbatched version of the action space which is also typically already normalized by this class"""
+    _original_single_action_space: spaces.Space
+    """The unbatched, original action space without any additional processing like normalization"""
 
     def __init__(
         self,
@@ -51,16 +58,26 @@ def __init__(
 
         self._initialize_joints()
         self._initialize_action_space()
-
         # NOTE(jigu): It is intended not to be a required field in config.
         self._normalize_action = getattr(self.config, "normalize_action", False)
         if self._normalize_action:
             self._clip_and_scale_action_space()
 
+        self.action_space = self.single_action_space
+        if self.scene.num_envs > 1:
+            self.action_space = batch_space(
+                self.single_action_space, n=self.scene.num_envs
+            )
+
+    @property
+    def device(self):
+        return self.articulation.device
+
     def _initialize_joints(self):
         joint_names = self.config.joint_names
         try:
-            self.joints = get_active_joints(self.articulation, joint_names)
+            # We only track the joints we can control, the active ones.
+            self.joints = get_joints_by_names(self.articulation, joint_names)
             self.joint_indices = get_active_joint_indices(
                 self.articulation, joint_names
             )
@@ -74,9 +91,6 @@ def _initialize_joints(self):
     def _initialize_action_space(self):
         raise NotImplementedError
 
-    def _batch_action_space(self):
-        self.action_space = batch_space(self.action_space, n=self.scene.num_envs)
-
     @property
     def control_freq(self):
         return self._control_freq
@@ -99,7 +113,7 @@ def set_drive_property(self):
         raise NotImplementedError
 
     def reset(self):
-        """Called after switching the controller."""
+        """Resets the controller by setting drive properties if needed"""
         self.set_drive_property()
 
     def _preprocess_action(self, action: Array):
@@ -137,9 +151,14 @@ def set_state(self, state: dict):
     # Normalize action
     # -------------------------------------------------------------------------- #
     def _clip_and_scale_action_space(self):
-        self._action_space = self.action_space
-        self.action_space = normalize_action_space(self._action_space)
-        low, high = self._action_space.low, self._action_space.high
+        self._original_single_action_space = self.single_action_space
+        self.single_action_space = normalize_action_space(
+            self._original_single_action_space
+        )
+        low, high = (
+            self._original_single_action_space.low,
+            self._original_single_action_space.high,
+        )
         self.action_space_low = to_tensor(low)
         self.action_space_high = to_tensor(high)
 
@@ -181,19 +200,24 @@ def __init__(
             self.controllers[uid] = config.controller_cls(
                 config, articulation, control_freq, sim_freq=sim_freq, scene=scene
             )
-
         self._initialize_action_space()
         self._initialize_joints()
         self._assert_fully_actuated()
 
+        self.action_space = self.single_action_space
+        if self.scene.num_envs > 1:
+            self.action_space = batch_space(
+                self.single_action_space, n=self.scene.num_envs
+            )
+
     def _initialize_action_space(self):
         # Explicitly create a list of key-value tuples
         # Otherwise, spaces.Dict will sort keys if a dict is provided
         named_spaces = [
-            (uid, controller.action_space)
+            (uid, controller.single_action_space)
             for uid, controller in self.controllers.items()
         ]
-        self.action_space = spaces.Dict(named_spaces)
+        self.single_action_space = spaces.Dict(named_spaces)
 
     def _initialize_joints(self):
         self.joints = []
@@ -233,7 +257,7 @@ def before_simulation_step(self):
                     gravity=True, coriolis_and_centrifugal=True
                 )
             else:
-                qf = np.zeros(self.articulation.dof)
+                qf = np.zeros(self.articulation.max_dof)
             for controller in self.controllers.values():
                 ret = controller.before_simulation_step()
                 if ret is not None and "qf" in ret:
@@ -258,15 +282,10 @@ class CombinedController(DictController):
 
     def _initialize_action_space(self):
         super()._initialize_action_space()
-        self.action_space, self.action_mapping = flatten_action_spaces(
-            self.action_space.spaces
+        self.single_action_space, self.action_mapping = flatten_action_spaces(
+            self.single_action_space.spaces
         )
 
-    def _batch_action_space(self):
-        for controller in self.controllers.values():
-            controller._batch_action_space()
-        super()._batch_action_space()
-
     def set_action(self, action: np.ndarray):
         # Sanity check
         # TODO (stao): optimization, do we really need this sanity check? Does gymnasium already do this for us

diff --git a/mani_skill2/agents/controllers/passive_controller.py b/mani_skill2/agents/controllers/passive_controller.py
@@ -25,8 +25,9 @@ def set_drive_property(self):
             joint.set_friction(friction[i])
 
     def _initialize_action_space(self):
-        # Can not be printed
-        self.action_space = spaces.Box(np.empty(0), np.empty(0), dtype=np.float32)
+        self.single_action_space = spaces.Box(
+            np.empty(0), np.empty(0), dtype=np.float32
+        )
 
     def set_action(self, action: np.ndarray):
         pass

diff --git a/mani_skill2/agents/controllers/pd_base_vel.py b/mani_skill2/agents/controllers/pd_base_vel.py
@@ -29,7 +29,9 @@ def set_action(self, action: Array):
         rot_mat[:, 1, 1] = torch.cos(ori)
         vel = (rot_mat @ action[:, :2].float().unsqueeze(-1)).squeeze(-1)
         new_action = torch.hstack([vel, action[:, 2:]])
-        self.articulation.set_joint_drive_velocity_targets(new_action, self.joints)
+        self.articulation.set_joint_drive_velocity_targets(
+            new_action, self.joints, self.joint_indices
+        )
 
 
 class PDBaseVelControllerConfig(PDJointVelControllerConfig):