From 69450a953b0d350ba250d728985a0126ea018666 Mon Sep 17 00:00:00 2001
From: StoneT2000 <stonezt2019@gmail.com>
Date: Sat, 20 Jan 2024 18:24:03 -0800
Subject: [PATCH] fix pick cube task reward

---
 mani_skill2/envs/pick_and_place/pick_cube.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/mani_skill2/envs/pick_and_place/pick_cube.py b/mani_skill2/envs/pick_and_place/pick_cube.py
index 9eda85908..d0fa72935 100644
--- a/mani_skill2/envs/pick_and_place/pick_cube.py
+++ b/mani_skill2/envs/pick_and_place/pick_cube.py
@@ -36,14 +36,17 @@ def _initialize_actors(self):
         xyz = np.zeros((self.num_envs, 3))
         xyz[..., :2] = self._episode_rng.uniform(-0.1, 0.1, [self.num_envs, 2])
         xyz[..., 2] = self.cube_half_size[2]
-        q = [1, 0, 0, 0]
+        qs = [1, 0, 0, 0]
         if self.obj_init_rot_z:
-            ori = self._episode_rng.uniform(0, 2 * np.pi)
-            q = euler2quat(0, 0, ori)
-
+            qs = []
+            for i in range(self.num_envs):
+                ori = self._episode_rng.uniform(0, 2 * np.pi)
+                q = euler2quat(0, 0, ori)
+                qs.append(q)
+            qs = to_tensor(qs)
         # to set a batch of poses, use the Pose object or provide a raw tensor
         obj_pose = Pose.create_from_pq(
-            p=xyz, q=np.array(q)[None, :].repeat(self.num_envs, axis=0)
+            p=xyz, q=qs
         )
 
         self.obj.set_pose(obj_pose)
@@ -124,7 +127,7 @@ def compute_dense_reward(self, obs, action, info):
         static_reward = 1 - torch.tanh(
             5 * torch.linalg.norm(self.agent.robot.get_qvel()[..., :-2], axis=1)
         )
-        reward += static_reward * info["is_robot_static"] * info["is_grasped"]
+        reward += static_reward * info["is_obj_placed"] * info["is_grasped"]
 
         reward[info["success"]] = 5