refactor, appending hidden_objects list allows handling of hiding e.g…

…. goal sites
haosulab · Jan 25, 2024 · 46c6b03 · 46c6b03
1 parent 35d425a
commit 46c6b03
Show file tree

Hide file tree

Showing 7 changed files with 50 additions and 27 deletions.
diff --git a/mani_skill2/envs/sapien_env.py b/mani_skill2/envs/sapien_env.py
@@ -39,6 +39,8 @@
     to_tensor,
     unbatch,
 )
+from mani_skill2.utils.structs.actor import Actor
+from mani_skill2.utils.structs.articulation import Articulation
 from mani_skill2.utils.structs.types import Array
 from mani_skill2.utils.visualization.misc import observations_to_images, tile_images
 
@@ -89,19 +91,25 @@ class BaseEnv(gym.Env):
 
     physx_system: Union[sapien.physx.PhysxCpuSystem, sapien.physx.PhysxGpuSystem] = None
 
-    # the main scene, which manages all sub scenes. In CPU simulation there is only one sub-scene
     _scene: ManiSkillScene = None
+    """the main scene, which manages all sub scenes. In CPU simulation there is only one sub-scene"""
 
     _agent_cls: Type[BaseAgent]
     agent: BaseAgent
+
     _sensors: Dict[str, BaseSensor]
+    """all sensors configured in this environment"""
     _sensor_cfgs: Dict[str, BaseSensorConfig]
+    """all sensor configurations"""
     _agent_camera_cfgs: Dict[str, CameraConfig]
 
     # render cameras are sensors that are not part of any observations
     _render_cameras: Dict[str, Camera]
     _render_camera_cfgs: Dict[str, CameraConfig]
 
+    _hidden_objects: List[Union[Actor, Articulation]] = []
+    """list of objects that are hidden during rendering when generating visual observations / running render_cameras()"""
+
     def __init__(
         self,
         num_envs: int = 1,
@@ -377,14 +385,13 @@ def capture_sensor_data(self):
                     "Other modalities of sensor data not implemented yet"
                 )
 
-    def get_images(self) -> Dict[str, Dict[str, np.ndarray]]:
-        # TODO (stao): support general sensors later.
-        """Get (raw) images from all cameras (blocking)."""
-        images = OrderedDict()
-        for name, cam in self._sensors.items():
-            if isinstance(cam, Camera):
-                images[name] = cam.get_images()
-        return images
+    def get_sensor_data(self) -> Dict[str, Dict[str, np.ndarray]]:
+        """Get raw sensor data such as images"""
+        sensor_data = OrderedDict()
+        for name, sensor in self._sensors.items():
+            if isinstance(sensor, Camera):
+                sensor_data[name] = sensor.get_images()
+        return sensor_data
 
     def get_camera_params(self) -> Dict[str, Dict[str, np.ndarray]]:
         """Get camera parameters from all cameras."""
@@ -394,13 +401,15 @@ def get_camera_params(self) -> Dict[str, Dict[str, np.ndarray]]:
         return params
 
     def _get_obs_images(self) -> OrderedDict:
+        for obj in self._hidden_objects:
+            obj.hide_visual()
         self.update_render()
         self.capture_sensor_data()
         return OrderedDict(
             agent=self._get_obs_agent(),
             extra=self._get_obs_extra(),
             camera_param=self.get_camera_params(),
-            image=self.get_images(),
+            image=self.get_sensor_data(),
         )
 
     @property
@@ -775,6 +784,7 @@ def _clear(self):
         self._sensors = OrderedDict()
         self._render_cameras = OrderedDict()
         self._scene = None
+        self._hidden_objects = []
 
     def close(self):
         self._clear()
@@ -840,7 +850,9 @@ def render_human(self):
             self._viewer.set_camera_pose(
                 self._render_cameras["render_camera"].camera.global_pose
             )
-        self._scene.update_render()
+        for obj in self._hidden_objects:
+            obj.show_visual()
+        self.update_render()
 
         # TODO (stao): currently in GPU mode we cannot render all sub-scenes together in the GUI yet. So we have this
         # naive solution which shows whatever scene is selected by self._viewer_scene_idx
@@ -870,6 +882,8 @@ def render_human(self):
 
     def render_rgb_array(self, camera_name: str = None):
         """Render an RGB image from the specified camera."""
+        for obj in self._hidden_objects:
+            obj.show_visual()
         self.update_render()
         images = []
         # TODO (stao): refactor this code either into ManiSkillScene class and/or merge the code, it's pretty similar?
@@ -899,16 +913,22 @@ def render_cameras(self):
         Renders all sensors that the agent can use and see and displays them
         """
         images = []
+        for obj in self._hidden_objects:
+            obj.hide_visual()
         self.update_render()
         self.capture_sensor_data()
-        cameras_images = self.get_images()
+        cameras_images = self.get_sensor_data()
         for camera_images in cameras_images.values():
             images.extend(observations_to_images(camera_images))
         return tile_images(images)
 
     def render(self):
         """
-        Either opens a viewer if render_mode is "human", or returns an array that you can use to save videos
+        Either opens a viewer if render_mode is "human", or returns an array that you can use to save videos.
+
+        render_mode is "rgb_array", usually a higher quality image is rendered for the purpose of viewing only.
+
+        if render_mode is "cameras", all visual observations the agent can see is provided
         """
         if self.render_mode is None:
             raise RuntimeError("render_mode is not set.")

diff --git a/mani_skill2/envs/tasks/pick_cube.py b/mani_skill2/envs/tasks/pick_cube.py
@@ -70,6 +70,7 @@ def _load_actors(self):
             body_type="kinematic",
             add_collision=False,
         )
+        self._hidden_objects.append(self.goal_site)
 
     def _initialize_actors(self):
         with torch.device(self.device):

diff --git a/mani_skill2/envs/tasks/pick_single_ycb.py b/mani_skill2/envs/tasks/pick_single_ycb.py
@@ -103,6 +103,7 @@ def _load_actors(self):
             body_type="kinematic",
             add_collision=False,
         )
+        self._hidden_objects.append(self.goal_site)
 
     def _initialize_actors(self):
         with torch.device(self.device):

diff --git a/mani_skill2/envs/tasks/push_cube.py b/mani_skill2/envs/tasks/push_cube.py
@@ -67,7 +67,8 @@ def __init__(self, *args, robot_uid="panda", robot_init_qpos_noise=0.02, **kwarg
         super().__init__(*args, robot_uid=robot_uid, **kwargs)
 
     def _register_sensors(self):
-        # registers one camera looking at the robot, cube, and target
+        # registers one 128x128 camera looking at the robot, cube, and target
+        # a smaller sized camera will be lower quality, but render faster
         pose = look_at(eye=[0.3, 0, 0.6], target=[-0.1, 0, 0.1])
         return [
             CameraConfig("base_camera", pose.p, pose.q, 128, 128, np.pi / 2, 0.01, 10)
@@ -107,6 +108,12 @@ def _load_actors(self):
             body_type="kinematic",
         )
 
+        # optionally you can automatically hide some Actors from view by appending to the self._hidden_objects list. When visual observations
+        # are generated or env.render_cameras() is called or env.render() is called with render_mode="cameras", the actor will not show up.
+        # This is useful if you intend to add some visual goal sites as e.g. done in PickCube that aren't actually part of the task
+        # and are there just for generating evaluation videos.
+        # self._hidden_objects.append(self.goal_region)
+
     def _initialize_actors(self):
         # use the torch.device context manager to automatically create tensors on CPU or CUDA depending on self.device, the device the environment runs on
         with torch.device(self.device):

diff --git a/mani_skill2/utils/building/actor_builder.py b/mani_skill2/utils/building/actor_builder.py
@@ -72,17 +72,13 @@ def build(self, name):
         initial_pose_np = to_numpy(initial_pose.raw_pose)
 
         entities = []
-        parallelized = len(self.scene.sub_scenes) > 1
 
         for scene_idx, sub_scene in enumerate(self.scene.sub_scenes):
             if self.scene_mask is not None and self.scene_mask[scene_idx] == False:
                 continue
             entity = self.build_entity()
             # prepend scene idx to entity name if there is more than one scene
-            if parallelized:
-                entity.name = f"scene-{scene_idx}_{self.name}"
-            else:
-                entity.name = self.name
+            entity.name = f"scene-{scene_idx}_{self.name}"
             # set pose before adding to scene
             if initial_pose_b == 1:
                 entity.pose = to_sapien_pose(initial_pose_np)

diff --git a/mani_skill2/utils/building/articulation_builder.py b/mani_skill2/utils/building/articulation_builder.py
@@ -56,7 +56,6 @@ def build(self, name=None, fix_root_link=None):
             # if scene mask is none, set it here
             self.scene_mask = to_tensor(torch.ones((self.scene.num_envs), dtype=bool))
 
-        parallelized = len(self.scene.sub_scenes) > 1
         articulations = []
 
         for scene_idx, scene in enumerate(self.scene.sub_scenes):
@@ -69,14 +68,10 @@ def build(self, name=None, fix_root_link=None):
                 )
             links[0].pose = self.initial_pose
             for l in links:
-                if parallelized:
-                    l.name = f"scene-{scene_idx}_{l.name}"
+                l.name = f"scene-{scene_idx}_{l.name}"
                 scene.add_entity(l)
             articulation: physx.PhysxArticulation = l.components[0].articulation
-            if parallelized:
-                articulation.name = f"scene-{scene_idx}_{self.name}"
-            else:
-                articulation.name = f"{self.name}"
+            articulation.name = f"scene-{scene_idx}_{self.name}"
             articulations.append(articulation)
 
         articulation = Articulation._create_from_physx_articulations(

diff --git a/mani_skill2/utils/structs/actor.py b/mani_skill2/utils/structs/actor.py
@@ -148,7 +148,8 @@ def hide_visual(self):
         Hides this actor from view. In CPU simulation the visual body is simply set to visibility 0
 
         For GPU simulation, currently this is implemented by moving the actor very far away as visiblity cannot be changed on the fly.
-        As a result we do not permit hiding and showing visuals of objects with collision shapes as this affects the actual simulation
+        As a result we do not permit hiding and showing visuals of objects with collision shapes as this affects the actual simulation.
+        Note that this operation can also be fairly slow as we need to run px.gpu_apply_rigid_dynamic_data and px.gpu_fetch_rigid_dynamic_data.
         """
         assert not self.has_collision_shapes()
         if self.hidden:
@@ -161,6 +162,7 @@ def hide_visual(self):
             temp_pose[..., :3] += 99999
             self.pose = temp_pose
             self.px.gpu_apply_rigid_dynamic_data()
+            self.px.gpu_fetch_rigid_dynamic_data()
         else:
             self._objs[0].find_component_by_type(
                 sapien.render.RenderBodyComponent
@@ -175,6 +177,7 @@ def show_visual(self):
             if hasattr(self, "last_pose"):
                 self.pose = self.last_pose
                 self.px.gpu_apply_rigid_dynamic_data()
+                self.px.gpu_fetch_rigid_dynamic_data()
         else:
             self._objs[0].find_component_by_type(
                 sapien.render.RenderBodyComponent