update documentation

haosulab · Jan 27, 2024 · bb07727 · bb07727
1 parent 190edb3
commit bb07727
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 15 deletions.
diff --git a/mani_skill2/envs/minimal_template.py b/mani_skill2/envs/minimal_template.py
@@ -46,12 +46,12 @@ def _load_actors(self):
     def _initialize_actors(self):
         pass
 
-    def _get_obs_extra(self):
-        return OrderedDict()
-
-    def evaluate(self, obs: Any):
+    def evaluate(self):
         return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)}
 
+    def _get_obs_extra(self, info: Dict):
+        return OrderedDict()
+
     def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
         return torch.zeros(self.num_envs, device=self.device)
 

diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py
@@ -707,7 +707,11 @@ def step_action(self, action) -> Union[None, torch.Tensor]:
         return action
 
     def evaluate(self) -> dict:
-        """Evaluate whether the environment is currently in a success state."""
+        """
+        Evaluate whether the environment is currently in a success state by returning a dictionary with a "success" key.
+        This function may also return additional data that has been computed (e.g. is the robot grasping some object) so that they may be
+        reused when generating observations and rewards.
+        """
         raise NotImplementedError
 
     def get_info(self):

diff --git a/mani_skill2/envs/tasks/pick_single_ycb.py b/mani_skill2/envs/tasks/pick_single_ycb.py
@@ -155,7 +155,7 @@ def evaluate(self):
             success=torch.logical_and(is_obj_placed, is_robot_static),
         )
 
-    def _get_obs_extra(self, obs: Dict):
+    def _get_obs_extra(self, info: Dict):
         obs = OrderedDict(
             tcp_pose=self.agent.tcp.pose.raw_pose,
             goal_pos=self.goal_site.pose.p,

diff --git a/mani_skill2/envs/template.py b/mani_skill2/envs/template.py
@@ -126,21 +126,23 @@ def _initialize_task(self):
     the code below all impact some part of `self.step` function
     """
 
-    def _get_obs_extra(self):
-        # should return an OrderedDict of additional observation data for your tasks
-        # this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes
-        # and included as part of a flattened observation when obs_mode="state"
-        return OrderedDict()
-
     def evaluate(self, obs: Any):
-        # should return a dictionary containing "success": bool indicating if the environment is in success state or not. The value here is also what the sparse reward is
-        # for the task. You may also include additional keys which will populate the info object returned by self.step.
+        # should return a dictionary containing "success": bool array indicating if the environment is in success state or not. The value here is also what the sparse reward is
+        # for the task. You may also include additional keys which will populate the info object returned by self.step and will be fed into `_get_obs_extra` and `_compute_dense_reward``
         # note that as everything is batched, you must return a batched array of self.num_envs booleans (or 0/1 values) as done in the example below
         return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)}
 
+    def _get_obs_extra(self, info: Dict):
+        # should return an OrderedDict of additional observation data for your tasks
+        # this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes
+        # and included as part of a flattened observation when obs_mode="state". Moreover, you have access to the info object
+        # which is generated by the `evaluate` function above
+        return OrderedDict()
+
     def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
         # you can optionally provide a dense reward function by returning a scalar value here. This is used when reward_mode="dense"
-        # note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below
+        # note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below.
+        # Moreover, you have access to the info object which is generated by the `evaluate` function above
         return torch.zeros(self.num_envs, device=self.device)
 
     def compute_normalized_dense_reward(