From 69450a953b0d350ba250d728985a0126ea018666 Mon Sep 17 00:00:00 2001 From: StoneT2000 Date: Sat, 20 Jan 2024 18:24:03 -0800 Subject: [PATCH] fix pick cube task reward --- mani_skill2/envs/pick_and_place/pick_cube.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/mani_skill2/envs/pick_and_place/pick_cube.py b/mani_skill2/envs/pick_and_place/pick_cube.py index 9eda85908..d0fa72935 100644 --- a/mani_skill2/envs/pick_and_place/pick_cube.py +++ b/mani_skill2/envs/pick_and_place/pick_cube.py @@ -36,14 +36,17 @@ def _initialize_actors(self): xyz = np.zeros((self.num_envs, 3)) xyz[..., :2] = self._episode_rng.uniform(-0.1, 0.1, [self.num_envs, 2]) xyz[..., 2] = self.cube_half_size[2] - q = [1, 0, 0, 0] + qs = [1, 0, 0, 0] if self.obj_init_rot_z: - ori = self._episode_rng.uniform(0, 2 * np.pi) - q = euler2quat(0, 0, ori) - + qs = [] + for i in range(self.num_envs): + ori = self._episode_rng.uniform(0, 2 * np.pi) + q = euler2quat(0, 0, ori) + qs.append(q) + qs = to_tensor(qs) # to set a batch of poses, use the Pose object or provide a raw tensor obj_pose = Pose.create_from_pq( - p=xyz, q=np.array(q)[None, :].repeat(self.num_envs, axis=0) + p=xyz, q=qs ) self.obj.set_pose(obj_pose) @@ -124,7 +127,7 @@ def compute_dense_reward(self, obs, action, info): static_reward = 1 - torch.tanh( 5 * torch.linalg.norm(self.agent.robot.get_qvel()[..., :-2], axis=1) ) - reward += static_reward * info["is_robot_static"] * info["is_grasped"] + reward += static_reward * info["is_obj_placed"] * info["is_grasped"] reward[info["success"]] = 5