Skip to content

Commit

Permalink
bug fix to handle both single and multiple envs
Browse files Browse the repository at this point in the history
  • Loading branch information
StoneT2000 committed Mar 5, 2024
1 parent 1b3d1c9 commit ae0df3b
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 56 deletions.
15 changes: 12 additions & 3 deletions mani_skill2/utils/wrappers/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ def append_dict_array(
Assumes both `x1, x2` have the same dictionary structure if they are dictionaries.
They may also both be lists/sequences in which case this is just appending like normal"""
if isinstance(x1, np.ndarray):
if len(x1.shape) > len(x2.shape):
# if different dims, check if extra dim is just a 1 due to single env in batch mode and if so, add it to x2.
if x1.shape[1] == 1:
x2 = x2[:, None, :]
return np.concatenate([x1, x2])
elif isinstance(x1, list):
return x1 + x2
Expand Down Expand Up @@ -141,7 +145,8 @@ class Step:

class RecordEpisode(gym.Wrapper):
"""Record trajectories or videos for episodes. You generally should always apply this wrapper last, particularly if you include
observation wrappers which modify the returned observations.
observation wrappers which modify the returned observations. The only wrappers that may go after this one is any of the vector env
interface wrappers that map the maniskill env to a e.g. gym vector env interface.
Trajectory data is saved with two files, the actual data in a .h5 file via H5py and metadata in a JSON file of the same basename.
Expand Down Expand Up @@ -329,11 +334,12 @@ def reset(
# control_mode=getattr(self.unwrapped, "control_mode", None),
# elapsed_steps=0,
# )
action = batch(self.action_space.sample())
first_step = Step(
state=to_numpy(batch(state_dict)),
observation=to_numpy(batch(obs)),
# note first reward/action etc. are ignored when saving trajectories to disk
action=batch(self.action_space.sample()),
action=action,
reward=np.zeros(
(
1,
Expand All @@ -350,6 +356,9 @@ def reset(
fail=np.zeros((1, self.num_envs), dtype=bool),
env_episode_ptr=np.zeros((self.num_envs,), dtype=int),
)
if self.num_envs == 1:
first_step.observation = batch(first_step.observation)
first_step.action = batch(first_step.action)
env_idx = np.arange(self.num_envs)
if "env_idx" in options:
env_idx = to_numpy(options["env_idx"])
Expand All @@ -367,7 +376,6 @@ def recursive_replace(x, y):
for k in x.keys():
recursive_replace(x[k], y[k])

# import ipdb;ipdb.set_trace()
recursive_replace(self._trajectory_buffer.state, first_step.state)
recursive_replace(
self._trajectory_buffer.observation, first_step.observation
Expand Down Expand Up @@ -412,6 +420,7 @@ def step(self, action):
self._trajectory_buffer.observation = append_dict_array(
self._trajectory_buffer.observation, to_numpy(batch(obs))
)

self._trajectory_buffer.action = append_dict_array(
self._trajectory_buffer.action, to_numpy(batch(action))
)
Expand Down
89 changes: 37 additions & 52 deletions manualtest/record_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,55 +10,40 @@
if __name__ == "__main__":
# sapien.set_log_level("info")
# , "StackCube-v1", "PickCube-v1", "PushCube-v1", "PickSingleYCB-v1", "OpenCabinet-v1"
num_envs = 16
for env_id in ["PickCube-v1"]:
env = gym.make(
env_id,
num_envs=num_envs,
enable_shadow=True,
# robot_uids="fetch",
obs_mode="state_dict",
reward_mode="normalized_dense",
render_mode="rgb_array",
control_mode="pd_joint_delta_pos",
# sim_cfg=dict(sim_freq=100),
# control_mode="pd_ee_delta_pos",
# sim_freq=100,
# control_freq=20,
# force_use_gpu_sim=True,
# reconfiguration_freq=1,
)
env = RecordEpisode(
env,
output_dir="videos/manual_test",
trajectory_name=f"{env_id}",
info_on_video=False,
video_fps=30,
save_trajectory=True,
max_steps_per_video=50,
)
# env = ManiSkillVectorEnv(env)

# env.reset(seed=52, options=dict(reconfigure=True))
env.reset()
env.reset()
env.reset()
env.reset()
# for i in range(180):
# env.step(env.action_space.sample())
env.step(env.action_space.sample())
env.step(env.action_space.sample())
# print("partial reset")
env.reset(options=dict(env_idx=[0, 1, 2]))
for i in range(50):
env.step(env.action_space.sample())
# for i in range(60):
# env.step(env.action_space.sample())
print("prep close")
env.close()
# import h5py

# data = h5py.File("videos/manual_test/PickCube-v1.h5")
# import ipdb

# ipdb.set_trace()
env_id = "PickCube-v1"
obs_mode = "state"
num_envs = 1
env = gym.make(
env_id,
obs_mode=obs_mode,
num_envs=1,
render_mode="rgb_array",
)
# env = RecordEpisode(
# env,
# output_dir=f"videos/manual_test/{env_id}-partial-resets",
# trajectory_name=f"test_traj_{obs_mode}",
# save_trajectory=True,
# max_steps_per_video=50,
# info_on_video=False,
# )
env = RecordEpisode(
env,
output_dir="videos/manual_test",
trajectory_name=f"{env_id}",
info_on_video=False,
video_fps=30,
save_trajectory=True,
max_steps_per_video=50,
)
env = ManiSkillVectorEnv(
env,
) # this is used purely to just fix the timelimit wrapper problems
env.reset()
action_space = env.action_space
for i in range(20):
obs, rew, terminated, truncated, info = env.step(action_space.sample())
if i == 13:
env.reset()
env.close()
del env
3 changes: 2 additions & 1 deletion tests/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,11 +163,12 @@ def test_recordepisode_wrapper_partial_reset(env_id, obs_mode):
env_id,
obs_mode=obs_mode,
num_envs=1,
render_mode="rgb_array",
sim_cfg=LOW_MEM_SIM_CFG,
)
env = RecordEpisode(
env,
output_dir=f"videos/pytest/{env_id}-gpu-partial-resets",
output_dir=f"videos/pytest/{env_id}-partial-resets",
trajectory_name=f"test_traj_{obs_mode}",
save_trajectory=True,
max_steps_per_video=50,
Expand Down

0 comments on commit ae0df3b

Please sign in to comment.