diff --git a/mani_skill2/envs/minimal_template.py b/mani_skill2/envs/minimal_template.py index d3f4a30f3..43a003333 100644 --- a/mani_skill2/envs/minimal_template.py +++ b/mani_skill2/envs/minimal_template.py @@ -46,12 +46,12 @@ def _load_actors(self): def _initialize_actors(self): pass - def _get_obs_extra(self): - return OrderedDict() - - def evaluate(self, obs: Any): + def evaluate(self): return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)} + def _get_obs_extra(self, info: Dict): + return OrderedDict() + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): return torch.zeros(self.num_envs, device=self.device) diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py index f051dba06..7862376a2 100644 --- a/mani_skill2/envs/sapien_env.py +++ b/mani_skill2/envs/sapien_env.py @@ -707,7 +707,11 @@ def step_action(self, action) -> Union[None, torch.Tensor]: return action def evaluate(self) -> dict: - """Evaluate whether the environment is currently in a success state.""" + """ + Evaluate whether the environment is currently in a success state by returning a dictionary with a "success" key. + This function may also return additional data that has been computed (e.g. is the robot grasping some object) so that they may be + reused when generating observations and rewards. + """ raise NotImplementedError def get_info(self): diff --git a/mani_skill2/envs/tasks/pick_single_ycb.py b/mani_skill2/envs/tasks/pick_single_ycb.py index e979e3eec..24a2c9b7f 100644 --- a/mani_skill2/envs/tasks/pick_single_ycb.py +++ b/mani_skill2/envs/tasks/pick_single_ycb.py @@ -155,7 +155,7 @@ def evaluate(self): success=torch.logical_and(is_obj_placed, is_robot_static), ) - def _get_obs_extra(self, obs: Dict): + def _get_obs_extra(self, info: Dict): obs = OrderedDict( tcp_pose=self.agent.tcp.pose.raw_pose, goal_pos=self.goal_site.pose.p, diff --git a/mani_skill2/envs/template.py b/mani_skill2/envs/template.py index 38670d587..6719e2c30 100644 --- a/mani_skill2/envs/template.py +++ b/mani_skill2/envs/template.py @@ -126,21 +126,23 @@ def _initialize_task(self): the code below all impact some part of `self.step` function """ - def _get_obs_extra(self): - # should return an OrderedDict of additional observation data for your tasks - # this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes - # and included as part of a flattened observation when obs_mode="state" - return OrderedDict() - def evaluate(self, obs: Any): - # should return a dictionary containing "success": bool indicating if the environment is in success state or not. The value here is also what the sparse reward is - # for the task. You may also include additional keys which will populate the info object returned by self.step. + # should return a dictionary containing "success": bool array indicating if the environment is in success state or not. The value here is also what the sparse reward is + # for the task. You may also include additional keys which will populate the info object returned by self.step and will be fed into `_get_obs_extra` and `_compute_dense_reward`` # note that as everything is batched, you must return a batched array of self.num_envs booleans (or 0/1 values) as done in the example below return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)} + def _get_obs_extra(self, info: Dict): + # should return an OrderedDict of additional observation data for your tasks + # this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes + # and included as part of a flattened observation when obs_mode="state". Moreover, you have access to the info object + # which is generated by the `evaluate` function above + return OrderedDict() + def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict): # you can optionally provide a dense reward function by returning a scalar value here. This is used when reward_mode="dense" - # note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below + # note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below. + # Moreover, you have access to the info object which is generated by the `evaluate` function above return torch.zeros(self.num_envs, device=self.device) def compute_normalized_dense_reward(