Skip to content

Commit

Permalink
update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
StoneT2000 committed Jan 27, 2024
1 parent 190edb3 commit bb07727
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 15 deletions.
8 changes: 4 additions & 4 deletions mani_skill2/envs/minimal_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ def _load_actors(self):
def _initialize_actors(self):
pass

def _get_obs_extra(self):
return OrderedDict()

def evaluate(self, obs: Any):
def evaluate(self):
return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)}

def _get_obs_extra(self, info: Dict):
return OrderedDict()

def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
return torch.zeros(self.num_envs, device=self.device)

Expand Down
6 changes: 5 additions & 1 deletion mani_skill2/envs/sapien_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,11 @@ def step_action(self, action) -> Union[None, torch.Tensor]:
return action

def evaluate(self) -> dict:
"""Evaluate whether the environment is currently in a success state."""
"""
Evaluate whether the environment is currently in a success state by returning a dictionary with a "success" key.
This function may also return additional data that has been computed (e.g. is the robot grasping some object) so that they may be
reused when generating observations and rewards.
"""
raise NotImplementedError

def get_info(self):
Expand Down
2 changes: 1 addition & 1 deletion mani_skill2/envs/tasks/pick_single_ycb.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def evaluate(self):
success=torch.logical_and(is_obj_placed, is_robot_static),
)

def _get_obs_extra(self, obs: Dict):
def _get_obs_extra(self, info: Dict):
obs = OrderedDict(
tcp_pose=self.agent.tcp.pose.raw_pose,
goal_pos=self.goal_site.pose.p,
Expand Down
20 changes: 11 additions & 9 deletions mani_skill2/envs/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,21 +126,23 @@ def _initialize_task(self):
the code below all impact some part of `self.step` function
"""

def _get_obs_extra(self):
# should return an OrderedDict of additional observation data for your tasks
# this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes
# and included as part of a flattened observation when obs_mode="state"
return OrderedDict()

def evaluate(self, obs: Any):
# should return a dictionary containing "success": bool indicating if the environment is in success state or not. The value here is also what the sparse reward is
# for the task. You may also include additional keys which will populate the info object returned by self.step.
# should return a dictionary containing "success": bool array indicating if the environment is in success state or not. The value here is also what the sparse reward is
# for the task. You may also include additional keys which will populate the info object returned by self.step and will be fed into `_get_obs_extra` and `_compute_dense_reward``
# note that as everything is batched, you must return a batched array of self.num_envs booleans (or 0/1 values) as done in the example below
return {"success": torch.zeros(self.num_envs, device=self.device, dtype=bool)}

def _get_obs_extra(self, info: Dict):
# should return an OrderedDict of additional observation data for your tasks
# this will be included as part of the observation in the "extra" key when obs_mode="state_dict" or any of the visual obs_modes
# and included as part of a flattened observation when obs_mode="state". Moreover, you have access to the info object
# which is generated by the `evaluate` function above
return OrderedDict()

def compute_dense_reward(self, obs: Any, action: torch.Tensor, info: Dict):
# you can optionally provide a dense reward function by returning a scalar value here. This is used when reward_mode="dense"
# note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below
# note that as everything is batched, you must return a batch of of self.num_envs rewards as done in the example below.
# Moreover, you have access to the info object which is generated by the `evaluate` function above
return torch.zeros(self.num_envs, device=self.device)

def compute_normalized_dense_reward(
Expand Down

0 comments on commit bb07727

Please sign in to comment.