From 586f929659ea71108ea5362fe4925eb4e6109d30 Mon Sep 17 00:00:00 2001 From: StoneT2000 Date: Tue, 5 Mar 2024 00:34:42 -0800 Subject: [PATCH] fix bug where controller reset did nothandle partial reset correctly and new tests --- mani_skill2/agents/controllers/pd_ee_pose.py | 4 +- .../agents/controllers/pd_joint_pos.py | 4 +- .../agents/controllers/pd_joint_pos_vel.py | 5 +- mani_skill2/envs/scene.py | 3 +- tests/test_wrappers.py | 68 ++++++++++++++++++- 5 files changed, 77 insertions(+), 7 deletions(-) diff --git a/mani_skill2/agents/controllers/pd_ee_pose.py b/mani_skill2/agents/controllers/pd_ee_pose.py index 80369f33d..18c4652db 100644 --- a/mani_skill2/agents/controllers/pd_ee_pose.py +++ b/mani_skill2/agents/controllers/pd_ee_pose.py @@ -87,7 +87,9 @@ def ee_pose_at_base(self): def reset(self): super().reset() - self._target_pose = self.ee_pose_at_base + self._target_pose[self.scene._reset_mask] = self.ee_pose_at_base[ + self.scene._reset_mask + ] def compute_ik(self, target_pose: Pose, action: Array, max_iterations=100): # Assume the target pose is defined in the base frame diff --git a/mani_skill2/agents/controllers/pd_joint_pos.py b/mani_skill2/agents/controllers/pd_joint_pos.py index 3d7c2514c..9f3affa68 100644 --- a/mani_skill2/agents/controllers/pd_joint_pos.py +++ b/mani_skill2/agents/controllers/pd_joint_pos.py @@ -44,8 +44,8 @@ def set_drive_property(self): def reset(self): super().reset() self._step = 0 # counter of simulation steps after action is set - self._start_qpos = self.qpos - self._target_qpos = self.qpos + self._start_qpos[self.scene._reset_mask] = self.qpos[self.scene._reset_mask] + self._target_qpos[self.scene._reset_mask] = self.qpos[self.scene._reset_mask] def set_drive_targets(self, targets): self.articulation.set_joint_drive_targets( diff --git a/mani_skill2/agents/controllers/pd_joint_pos_vel.py b/mani_skill2/agents/controllers/pd_joint_pos_vel.py index 9bbeeb64e..048e8c52f 100644 --- a/mani_skill2/agents/controllers/pd_joint_pos_vel.py +++ b/mani_skill2/agents/controllers/pd_joint_pos_vel.py @@ -5,7 +5,6 @@ import torch from gymnasium import spaces -from .base_controller import BaseController, ControllerConfig from .pd_joint_pos import PDJointPosController, PDJointPosControllerConfig @@ -23,7 +22,9 @@ def _initialize_action_space(self): def reset(self): super().reset() - self._target_qvel = np.zeros_like(self._target_qpos) + self._target_qvel[self.scene._reset_mask] = torch.zeros_like( + self._target_qpos[self.scene._reset_mask], device=self.device + ) def set_drive_velocity_targets(self, targets): self.articulation.set_joint_drive_velocity_targets( diff --git a/mani_skill2/envs/scene.py b/mani_skill2/envs/scene.py index e694b3bd8..aec619fe6 100644 --- a/mani_skill2/envs/scene.py +++ b/mani_skill2/envs/scene.py @@ -49,7 +49,8 @@ def __init__( self.human_render_cameras: Dict[str, Camera] = OrderedDict() self._reset_mask = torch.ones(len(sub_scenes), dtype=bool, device=self.device) - """Used internally by various wrapped objects like Actor and Link to auto mask out sub-scenes so they do not get modified during partial env resets""" + """Used internally by various objects like Actor, Link, and Controllers to auto mask out sub-scenes so they do not get modified during + partial env resets""" @property def timestep(self): diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py index bff9713f1..70caacbca 100644 --- a/tests/test_wrappers.py +++ b/tests/test_wrappers.py @@ -29,6 +29,7 @@ def test_recordepisode_wrapper_gpu(env_id, obs_mode): output_dir=f"videos/pytest/{env_id}-gpu", trajectory_name=f"test_traj_{obs_mode}", info_on_video=False, + max_steps_per_video=50, save_trajectory=False, ) env = ManiSkillVectorEnv( @@ -81,7 +82,8 @@ def test_recordepisode_wrapper_gpu_render_sensor(env_id, obs_mode): env, output_dir=f"videos/pytest/{env_id}-gpu-render-sensor", trajectory_name=f"test_traj_{obs_mode}", - save_trajectory=False, + save_trajectory=True, + max_steps_per_video=50, info_on_video=False, ) env = ManiSkillVectorEnv( @@ -120,6 +122,70 @@ def test_recordepisode_wrapper_render_sensor(env_id, obs_mode): del env +@pytest.mark.gpu_sim +@pytest.mark.parametrize("env_id", ENV_IDS[:1]) +@pytest.mark.parametrize("obs_mode", OBS_MODES[:1]) +def test_recordepisode_wrapper_partial_reset_gpu(env_id, obs_mode): + env = gym.make( + env_id, + obs_mode=obs_mode, + render_mode="rgb_array", + num_envs=16, + sim_cfg=LOW_MEM_SIM_CFG, + ) + env = RecordEpisode( + env, + output_dir=f"videos/pytest/{env_id}-gpu-partial-resets", + trajectory_name=f"test_traj_{obs_mode}", + save_trajectory=True, + max_steps_per_video=50, + info_on_video=False, + ) + env = ManiSkillVectorEnv( + env, + max_episode_steps=10, + ) # this is used purely to just fix the timelimit wrapper problems + env.reset() + action_space = env.action_space + for i in range(20): + obs, rew, terminated, truncated, info = env.step(action_space.sample()) + if i == 13: + env.reset(options=dict(env_idx=[0, 1, 14, 15])) + env.close() + del env + + +@pytest.mark.parametrize("env_id", ENV_IDS[:1]) +@pytest.mark.parametrize("obs_mode", OBS_MODES[:1]) +def test_recordepisode_wrapper_partial_reset(env_id, obs_mode): + env = gym.make( + env_id, + obs_mode=obs_mode, + num_envs=1, + sim_cfg=LOW_MEM_SIM_CFG, + ) + env = RecordEpisode( + env, + output_dir=f"videos/pytest/{env_id}-gpu-partial-resets", + trajectory_name=f"test_traj_{obs_mode}", + save_trajectory=True, + max_steps_per_video=50, + info_on_video=False, + ) + env = ManiSkillVectorEnv( + env, + max_episode_steps=10, + ) # this is used purely to just fix the timelimit wrapper problems + env.reset() + action_space = env.action_space + for i in range(20): + obs, rew, terminated, truncated, info = env.step(action_space.sample()) + if i == 13: + env.reset() + env.close() + del env + + @pytest.mark.gpu_sim @pytest.mark.parametrize("env_id", [ENV_IDS[0]]) def test_visualencoders_gpu(env_id):