bug fix to handle both single and multiple envs

haosulab · Mar 5, 2024 · ae0df3b · ae0df3b
1 parent 1b3d1c9
commit ae0df3b
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 56 deletions.
diff --git a/mani_skill2/utils/wrappers/record.py b/mani_skill2/utils/wrappers/record.py
@@ -35,6 +35,10 @@ def append_dict_array(
     Assumes both `x1, x2` have the same dictionary structure if they are dictionaries.
     They may also both be lists/sequences in which case this is just appending like normal"""
     if isinstance(x1, np.ndarray):
+        if len(x1.shape) > len(x2.shape):
+            # if different dims, check if extra dim is just a 1 due to single env in batch mode and if so, add it to x2.
+            if x1.shape[1] == 1:
+                x2 = x2[:, None, :]
         return np.concatenate([x1, x2])
     elif isinstance(x1, list):
         return x1 + x2
@@ -141,7 +145,8 @@ class Step:
 
 class RecordEpisode(gym.Wrapper):
     """Record trajectories or videos for episodes. You generally should always apply this wrapper last, particularly if you include
-    observation wrappers which modify the returned observations.
+    observation wrappers which modify the returned observations. The only wrappers that may go after this one is any of the vector env
+    interface wrappers that map the maniskill env to a e.g. gym vector env interface.
 
     Trajectory data is saved with two files, the actual data in a .h5 file via H5py and metadata in a JSON file of the same basename.
 
@@ -329,11 +334,12 @@ def reset(
             #     control_mode=getattr(self.unwrapped, "control_mode", None),
             #     elapsed_steps=0,
             # )
+            action = batch(self.action_space.sample())
             first_step = Step(
                 state=to_numpy(batch(state_dict)),
                 observation=to_numpy(batch(obs)),
                 # note first reward/action etc. are ignored when saving trajectories to disk
-                action=batch(self.action_space.sample()),
+                action=action,
                 reward=np.zeros(
                     (
                         1,
@@ -350,6 +356,9 @@ def reset(
                 fail=np.zeros((1, self.num_envs), dtype=bool),
                 env_episode_ptr=np.zeros((self.num_envs,), dtype=int),
             )
+            if self.num_envs == 1:
+                first_step.observation = batch(first_step.observation)
+                first_step.action = batch(first_step.action)
             env_idx = np.arange(self.num_envs)
             if "env_idx" in options:
                 env_idx = to_numpy(options["env_idx"])
@@ -367,7 +376,6 @@ def recursive_replace(x, y):
                         for k in x.keys():
                             recursive_replace(x[k], y[k])
 
-                # import ipdb;ipdb.set_trace()
                 recursive_replace(self._trajectory_buffer.state, first_step.state)
                 recursive_replace(
                     self._trajectory_buffer.observation, first_step.observation
@@ -412,6 +420,7 @@ def step(self, action):
             self._trajectory_buffer.observation = append_dict_array(
                 self._trajectory_buffer.observation, to_numpy(batch(obs))
             )
+
             self._trajectory_buffer.action = append_dict_array(
                 self._trajectory_buffer.action, to_numpy(batch(action))
             )

diff --git a/manualtest/record_test.py b/manualtest/record_test.py
@@ -10,55 +10,40 @@
 if __name__ == "__main__":
     # sapien.set_log_level("info")
     # , "StackCube-v1", "PickCube-v1", "PushCube-v1", "PickSingleYCB-v1", "OpenCabinet-v1"
-    num_envs = 16
-    for env_id in ["PickCube-v1"]:
-        env = gym.make(
-            env_id,
-            num_envs=num_envs,
-            enable_shadow=True,
-            # robot_uids="fetch",
-            obs_mode="state_dict",
-            reward_mode="normalized_dense",
-            render_mode="rgb_array",
-            control_mode="pd_joint_delta_pos",
-            # sim_cfg=dict(sim_freq=100),
-            # control_mode="pd_ee_delta_pos",
-            # sim_freq=100,
-            # control_freq=20,
-            # force_use_gpu_sim=True,
-            # reconfiguration_freq=1,
-        )
-        env = RecordEpisode(
-            env,
-            output_dir="videos/manual_test",
-            trajectory_name=f"{env_id}",
-            info_on_video=False,
-            video_fps=30,
-            save_trajectory=True,
-            max_steps_per_video=50,
-        )
-        # env = ManiSkillVectorEnv(env)
-
-        # env.reset(seed=52, options=dict(reconfigure=True))
-        env.reset()
-        env.reset()
-        env.reset()
-        env.reset()
-        # for i in range(180):
-        #     env.step(env.action_space.sample())
-        env.step(env.action_space.sample())
-        env.step(env.action_space.sample())
-        # print("partial reset")
-        env.reset(options=dict(env_idx=[0, 1, 2]))
-        for i in range(50):
-            env.step(env.action_space.sample())
-        # for i in range(60):
-        #     env.step(env.action_space.sample())
-        print("prep close")
-        env.close()
-        # import h5py
-
-        # data = h5py.File("videos/manual_test/PickCube-v1.h5")
-        # import ipdb
-
-        # ipdb.set_trace()
+    env_id = "PickCube-v1"
+    obs_mode = "state"
+    num_envs = 1
+    env = gym.make(
+        env_id,
+        obs_mode=obs_mode,
+        num_envs=1,
+        render_mode="rgb_array",
+    )
+    # env = RecordEpisode(
+    #     env,
+    #     output_dir=f"videos/manual_test/{env_id}-partial-resets",
+    #     trajectory_name=f"test_traj_{obs_mode}",
+    #     save_trajectory=True,
+    #     max_steps_per_video=50,
+    #     info_on_video=False,
+    # )
+    env = RecordEpisode(
+        env,
+        output_dir="videos/manual_test",
+        trajectory_name=f"{env_id}",
+        info_on_video=False,
+        video_fps=30,
+        save_trajectory=True,
+        max_steps_per_video=50,
+    )
+    env = ManiSkillVectorEnv(
+        env,
+    )  # this is used purely to just fix the timelimit wrapper problems
+    env.reset()
+    action_space = env.action_space
+    for i in range(20):
+        obs, rew, terminated, truncated, info = env.step(action_space.sample())
+        if i == 13:
+            env.reset()
+    env.close()
+    del env
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -163,11 +163,12 @@ def test_recordepisode_wrapper_partial_reset(env_id, obs_mode):
         env_id,
         obs_mode=obs_mode,
         num_envs=1,
+        render_mode="rgb_array",
         sim_cfg=LOW_MEM_SIM_CFG,
     )
     env = RecordEpisode(
         env,
-        output_dir=f"videos/pytest/{env_id}-gpu-partial-resets",
+        output_dir=f"videos/pytest/{env_id}-partial-resets",
         trajectory_name=f"test_traj_{obs_mode}",
         save_trajectory=True,
         max_steps_per_video=50,