Merge branch 'dev' into ms3-docs

haosulab · Jan 24, 2024 · 55106f8 · 55106f8
2 parents 7f6702a + e9717ca
commit 55106f8
Show file tree

Hide file tree

Showing 194 changed files with 2,299 additions and 2,051 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,36 @@
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.3.0
+    hooks:
+      - id: check-ast
+      - id: check-merge-conflict
+      - id: check-toml
+      # - id: check-yaml
+      - id: end-of-file-fixer
+        files: \.py$
+      - id: trailing-whitespace
+        files: \.py$
+  - repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+      - id: black
+        exclude: 'warp_maniskill/.*|docs/.*|examples/.*'
+        args:
+          - --line-length=88
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        exclude: 'warp_maniskill/.*|docs/.*|examples/.*'
+        args:
+          - --profile=black
+  - repo: https://github.com/myint/autoflake
+    rev: v1.4
+    hooks:
+      - id: autoflake
+        exclude: 'warp_maniskill/.*|docs/.*|examples/.*'
+        args:
+          - -r
+          - --in-place
+          - --remove-unused-variables
+          # - --remove-all-unused-imports
diff --git a/examples/benchmarking/benchmark_cpu_sim.py b/examples/benchmarking/benchmark_cpu_sim.py
@@ -37,4 +37,4 @@
     dtime = time.time() - stime
     FPS = num_envs * N / dtime
     print(f"{FPS=:0.3f}. {N=} frames in {dtime:0.3f}s with {num_envs} parallel envs with step+reset")
-    env.close()
+    env.close()
diff --git a/examples/benchmarking/benchmark_gpu_sim.py b/examples/benchmarking/benchmark_gpu_sim.py
@@ -20,7 +20,7 @@ def main(args):
     env = gym.make(args.env_id, num_envs=num_envs, obs_mode=args.obs_mode, enable_shadow=True, render_mode=args.render_mode, control_mode="pd_joint_delta_pos", sim_freq=100, control_freq=50)
     print(f"[INFO]: Gym observation space: {env.observation_space}")
     print(f"[INFO]: Gym action space: {env.action_space}")
-    
+
     images = []
     video_nrows=int(np.sqrt(num_envs))
     with torch.inference_mode():
@@ -59,7 +59,7 @@ def main(args):
     env.close()
 def parse_args():
     parser = argparse.ArgumentParser()
-    parser.add_argument("-e", "--env-id", type=str, default="PickCube-v0")
+    parser.add_argument("-e", "--env-id", type=str, default="PickCube-v1")
     parser.add_argument("-o", "--obs-mode", type=str, default="none")
     parser.add_argument("-n", "--num-envs", type=int, default=256)
     parser.add_argument(

diff --git a/examples/benchmarking/benchmark_orbit_sim.py b/examples/benchmarking/benchmark_orbit_sim.py
@@ -107,4 +107,4 @@ def main():
         raise
     finally:
         # close sim app
-        simulation_app.close()
+        simulation_app.close()
diff --git a/examples/tutorials/reinforcement-learning/cleanrl_ppo_liftcube_state.py b/examples/tutorials/reinforcement-learning/cleanrl_ppo_liftcube_state.py
@@ -159,7 +159,7 @@ def get_action_and_value(self, x, action=None):
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
     args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
-    
+
     if args.track:
         import wandb
 
@@ -191,12 +191,12 @@ def get_action_and_value(self, x, action=None):
     envs = gym.vector.AsyncVectorEnv(
         [make_env(args.env_id, i, args.capture_video, run_name, args.gamma) for i in range(args.num_envs)]
     )
-    
+
     # sapien.physx.set_gpu_memory_config(found_lost_pairs_capacity=2**26, max_rigid_patch_count=200000)
     # sim_freq, control_freq = 100, 20
     # envs = gym.make(args.env_id, num_envs=args.num_envs, render_mode="rgb_array", obs_mode="state", control_mode="pd_joint_delta_pos", sim_freq=sim_freq, control_freq=control_freq)
     # eval_envs = gym.make(args.env_id, num_envs=8, render_mode="rgb_array", obs_mode="state", control_mode="pd_joint_delta_pos", sim_freq=sim_freq, control_freq=control_freq)
-    
+
     assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
 
     agent = Agent(envs).to(device)
@@ -403,4 +403,4 @@ def get_action_and_value(self, x, action=None):
             push_to_hub(args, episodic_returns, repo_id, "PPO", f"runs/{run_name}", f"videos/{run_name}-eval")
 
     envs.close()
-    writer.close()
+    writer.close()
diff --git a/examples/tutorials/reinforcement-learning/cleanrl_ppo_liftcube_state_gpu.py b/examples/tutorials/reinforcement-learning/cleanrl_ppo_liftcube_state_gpu.py
@@ -166,7 +166,7 @@ def get_action_and_value(self, x, action=None):
     args.minibatch_size = int(args.batch_size // args.num_minibatches)
     args.num_iterations = args.total_timesteps // args.batch_size
     run_name = f"{args.env_id}__{args.exp_name}__{args.seed}__{int(time.time())}"
-    
+
     if args.track:
         import wandb
 
@@ -202,7 +202,7 @@ def get_action_and_value(self, x, action=None):
     sim_freq, control_freq = 100, 20
     envs = gym.make(args.env_id, num_envs=args.num_envs, render_mode="rgb_array", obs_mode="state", control_mode="pd_joint_delta_pos", sim_freq=sim_freq, control_freq=control_freq)
     eval_envs = gym.make(args.env_id, num_envs=8, render_mode="rgb_array", obs_mode="state", control_mode="pd_joint_delta_pos", sim_freq=sim_freq, control_freq=control_freq)
-    
+
     assert isinstance(envs.single_action_space, gym.spaces.Box), "only continuous action space is supported"
 
     agent = Agent(envs).to(device)
@@ -273,9 +273,9 @@ def clip_action(action: torch.Tensor):
             frac = 1.0 - (iteration - 1.0) / args.num_iterations
             lrnow = frac * args.learning_rate
             optimizer.param_groups[0]["lr"] = lrnow
-        
+
         # next_obs, _ = envs.reset() # TODO: remove and make an auto reset function later
-        
+
         for step in range(0, args.num_steps):
             global_step += args.num_envs
             obs[step] = next_obs
@@ -438,4 +438,4 @@ def clip_action(action: torch.Tensor):
     #         push_to_hub(args, episodic_returns, repo_id, "PPO", f"runs/{run_name}", f"videos/{run_name}-eval")
 
     envs.close()
-    writer.close()
+    writer.close()
diff --git a/examples/tutorials/reinforcement-learning/cleanrl_sac_pickcube_state_gpu.py b/examples/tutorials/reinforcement-learning/cleanrl_sac_pickcube_state_gpu.py
@@ -309,4 +309,4 @@ def get_action(self, x):
                     writer.add_scalar("losses/alpha_loss", alpha_loss.item(), global_step)
 
     envs.close()
-    writer.close()
+    writer.close()
diff --git a/mani_skill2/__init__.py b/mani_skill2/__init__.py
@@ -46,7 +46,7 @@ def get_commit_info(show_modified_files=False, show_untracked_files=False):
             commit_info["modified"] = modified_files
 
         if show_untracked_files:
-            untracked_files = repo.untracked_files
+            repo.untracked_files
             commit_info["untracked"] = modified_files
 
         # https://github.com/gitpython-developers/GitPython/issues/718#issuecomment-360267779

diff --git a/mani_skill2/agents/base_agent.py b/mani_skill2/agents/base_agent.py
@@ -105,11 +105,9 @@ def _after_loading_articulation(self):
         """After loading articulation and before setting up controller. Not recommended, but is useful for when creating
         robot classes that inherit controllers from another and only change which joints are controlled
         """
-        pass
 
     def _after_init(self):
         """After initialization. E.g., caching the end-effector link."""
-        pass
 
     # -------------------------------------------------------------------------- #
     # Controllers

diff --git a/mani_skill2/agents/controllers/base_controller.py b/mani_skill2/agents/controllers/base_controller.py
@@ -106,13 +106,12 @@ def _preprocess_action(self, action: Array):
         # TODO(jigu): support discrete action
         if self.scene.num_envs > 1:
             action_dim = self.action_space.shape[1]
-            assert action.shape == (self.scene.num_envs, action_dim), (
-                action.shape,
-                action_dim,
-            )
         else:
             action_dim = self.action_space.shape[0]
-            assert action.shape == (action_dim,), (action.shape, action_dim)
+        assert action.shape == (self.scene.num_envs, action_dim), (
+            action.shape,
+            action_dim,
+        )
 
         if self._normalize_action:
             action = self._clip_and_scale_action(action)
@@ -126,7 +125,6 @@ def set_action(self, action: Array):
 
     def before_simulation_step(self):
         """Called before each simulation step in one control step."""
-        pass
 
     def get_state(self) -> dict:
         """Get the controller state."""
@@ -206,11 +204,8 @@ def _initialize_joints(self):
 
     def _assert_fully_actuated(self):
         active_joints = self.articulation.get_active_joints()
-        if len(active_joints) != len(self.joints) or not np.all(
-            [
-                active_joint == joint
-                for active_joint, joint in zip(active_joints, self.joints)
-            ]
+        if len(active_joints) != len(self.joints) or set(active_joints) != set(
+            self.joints
         ):
             print("active_joints:", [x.name for x in active_joints])
             print("controlled_joints:", [x.name for x in self.joints])
@@ -277,17 +272,15 @@ def set_action(self, action: np.ndarray):
         # TODO (stao): optimization, do we really need this sanity check? Does gymnasium already do this for us
         if self.scene.num_envs > 1:
             action_dim = self.action_space.shape[1]
-            assert action.shape == (self.scene.num_envs, action_dim), (
-                action.shape,
-                action_dim,
-            )
         else:
             action_dim = self.action_space.shape[0]
-            assert action.shape == (action_dim,), (action.shape, action_dim)
-
+        assert action.shape == (self.scene.num_envs, action_dim), (
+            action.shape,
+            action_dim,
+        )
         for uid, controller in self.controllers.items():
             start, end = self.action_mapping[uid]
-            controller.set_action(to_tensor(action[..., start:end]))
+            controller.set_action(action[:, start:end])
 
     def to_action_dict(self, action: np.ndarray):
         """Convert a flat action to a dict of actions."""

diff --git a/mani_skill2/agents/controllers/pd_base_vel.py b/mani_skill2/agents/controllers/pd_base_vel.py
@@ -1,6 +1,8 @@
 import numpy as np
+import torch
 
 from mani_skill2.utils.geometry import rotate_2d_vec_by_angle
+from mani_skill2.utils.structs.types import Array
 
 from .pd_joint_vel import PDJointVelController, PDJointVelControllerConfig
 
@@ -15,18 +17,18 @@ def _initialize_action_space(self):
         assert len(self.joints) >= 3, len(self.joints)
         super()._initialize_action_space()
 
-    def set_action(self, action: np.ndarray):
+    def set_action(self, action: Array):
         action = self._preprocess_action(action)
-
         # Convert to ego-centric action
         # Assume the 3rd DoF stands for orientation
-        ori = self.qpos[2]
-        vel = rotate_2d_vec_by_angle(action[:2], ori)
-        new_action = np.hstack([vel, action[2:]])
-
-        for i, joint in enumerate(self.joints):
-            joint.set_drive_velocity_target(new_action[i])
-
-
+        ori = self.qpos[:, 2]
+        rot_mat = torch.zeros(ori.shape[0], 2, 2, device=action.device)
+        rot_mat[:, 0, 0] = torch.cos(ori)
+        rot_mat[:, 0, 1] = -torch.sin(ori)
+        rot_mat[:, 1, 0] = torch.sin(ori)
+        rot_mat[:, 1, 1] = torch.cos(ori)
+        vel = (rot_mat @ action[:, :2].unsqueeze(-1)).squeeze(-1)
+        new_action = torch.hstack([vel, action[:, 2:]])
+        self.articulation.set_joint_drive_velocity_targets(new_action, self.joints)
 class PDBaseVelControllerConfig(PDJointVelControllerConfig):
     controller_cls = PDBaseVelController