rlbench evals seem to work

r-pad · May 14, 2024 · 2e20ffb · 2e20ffb
1 parent 1758a03
commit 2e20ffb
Show file tree

Hide file tree

Showing 5 changed files with 242 additions and 14 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:12.4.1-base-ubuntu22.04
+FROM nvidia/cuda:12.2.2-base-ubuntu20.04
 
 ENV DEBIAN_FRONTEND=noninteractive
 

diff --git a/configs/eval_rlbench.yaml b/configs/eval_rlbench.yaml
@@ -18,6 +18,8 @@ defaults:
 
 policy_spec:
   collision_checking: True
+  num_points: 256
+  break_symmetry: False
   model: ${model}
 
 

diff --git a/launch.sh b/launch.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+
+# This is a script that should take in three arguments:
+# 1. the platfrom to run on (e.g. "autobot" or "local")
+# 2. the index of which GPU to use
+# 3. the command and arguments to run
+
+# Example usage:
+# ./launch_autobot.sh autobot 0 python scripts/train_residual_flow.py
+
+# Get the first argument:
+PLATFORM=$1
+
+# Get the second argument:
+GPU_INDEX=$2
+shift
+shift
+
+# Get the third argument:
+COMMAND=$@
+
+
+echo Platform: $PLATFORM
+echo GPU Index: $GPU_INDEX
+echo Command: $COMMAND
+
+# We want to get the name of the current branch.
+branch_name=$(git branch --show-current)
+sanitized_branch_name="${branch_name//\//-}"
+
+# Now check to see if the branch name exists as a tag on docker
+if ! docker manifest inspect beisner/taxpose:${sanitized_branch_name} > /dev/null 2>&1; then
+    tag="latest"
+else
+    tag="${sanitized_branch_name}"
+fi
+
+# Override tag if DOCKER_IMAGE is set
+if [ ! -z "$DOCKER_TAG" ]; then
+    tag="${DOCKER_TAG}"
+fi
+
+echo "Using image: beisner/taxpose:${tag}"
+
+
+# If the platform is "autobot", then we need to use singularity to run the command.
+if [ $PLATFORM == "autobot" ]; then
+    echo "Running on autobot"
+
+    # For the following directories, check to see if they exist. If they don't, create them. Use an array.
+    # Directories to check:
+    DIRECTORIES=("/scratch/$(whoami)/data" "/scratch/$(whoami)/logs" "/scratch/$(whoami)/artifacts" "/scratch/$(whoami)/.config" "/scratch/$(whoami)/tmp" "/scratch/$(whoami)/home")
+
+    for DIRECTORY in "${DIRECTORIES[@]}"; do
+        if [ ! -d $DIRECTORY ]; then
+            mkdir -p $DIRECTORY
+        fi
+    done
+
+    # Run on signularity.
+    APPTAINERENV_CUDA_VISIBLE_DEVICES=$GPU_INDEX \
+    APPTAINERENV_WANDB_DOCKER_IMAGE=taxpose \
+    APPTAINERENV_MPLCONFIGDIR=/opt/.config \
+    apptainer run \
+    --nv \
+    --no-mount hostfs \
+    --pwd /opt/$(whoami)/code \
+    --workdir /opt/tmp \
+    -B /home/$(whoami)/code/rpad/taxpose:/opt/$(whoami)/code \
+    -B /scratch/$(whoami)/data:/data \
+    -B /scratch/$(whoami)/logs:/opt/logs \
+    -B /scratch/$(whoami)/artifacts:/opt/artifacts \
+    -B /scratch/$(whoami)/.config:/opt/.config \
+    -B /scratch/$(whoami)/tmp:/tmp \
+    -B /scratch/$(whoami)/home:/home/$(whoami) \
+    docker://beisner/taxpose:${tag} \
+    $COMMAND \
+    log_dir=/opt/logs \
+    data_root=/data \
+    wandb.artifact_dir=/opt/artifacts \
+
+# If the platform is "local-docker", then we need to use docker to run the command.
+elif [ $PLATFORM == "local-docker" ]; then
+    echo "Running locally with docker"
+
+    docker run \
+    --gpus "device=$GPU_INDEX" \
+    -it \
+    -e WANDB_API_KEY="${WANDB_API_KEY}" \
+    -e WANDB_DOCKER_IMAGE=taxpose \
+    -v /usr/share/glvnd/egl_vendor.d/10_nvidia.json:/usr/share/glvnd/egl_vendor.d/10_nvidia.json \
+    -v /home/beisner/datasets/:/data \
+    -v /home/beisner/code/rpad/taxpose/artifacts:/opt/artifacts \
+    -v /home/beisner/code/rpad/taxpose/logs:/opt/logs \
+    -v /home/beisner/code/rpad/taxpose:/opt/baeisner/code \
+    beisner/taxpose:${tag} \
+    $COMMAND \
+    log_dir=/opt/logs \
+    data_root=/data \
+    wandb.artifact_dir=/opt/artifacts
+
+elif [ $PLATFORM == "local-apptainer" ]; then
+    echo "Running locally with apptainer"
+
+    APPTAINERENV_CUDA_VISIBLE_DEVICES=$GPU_INDEX \
+    APPTAINERENV_WANDB_DOCKER_IMAGE=taxpose \
+    APPTAINERENV_MPLCONFIGDIR=/opt/.config \
+    APPTAINERENV_VGL_DEVICE=egl$GPU_INDEX \
+    APPTAINERENV_PYENV_VERSION= \
+    apptainer run \
+    --nv \
+    --no-mount hostfs \
+    --pwd /opt/$(whoami)/code \
+    --contain \
+    -B /home/$(whoami)/code/rpad/taxpose:/opt/$(whoami)/code \
+    -B /home/$(whoami)/datasets:/data \
+    -B /home/$(whoami)/code/rpad/taxpose/logs:/opt/logs \
+    -B /home/$(whoami)/code/rpad/taxpose/artifacts:/opt/artifacts \
+    -B /home/$(whoami)/.config:/opt/.config \
+    -B /home/$(whoami)/.tmp:/tmp \
+    -B /home/$(whoami)/tmp_home:/home/$(whoami) \
+    -B /usr/share/glvnd/egl_vendor.d/10_nvidia.json:/usr/share/glvnd/egl_vendor.d/10_nvidia.json \
+    docker://beisner/taxpose:${tag} \
+    $COMMAND \
+    log_dir=/opt/logs \
+    data_root=/data \
+    wandb.artifact_dir=/opt/artifacts
+
+# If the platform is "local", then we can just run the command.
+elif [ $PLATFORM == "local" ]; then
+    echo "Running locally"
+
+    CUDA_VISIBLE_DEVICES=$GPU_INDEX \
+    WANDB_DOCKER_IMAGE=taxpose \
+    $COMMAND
+
+else
+    echo "Platform not recognized"
+fi
diff --git a/scripts/eval_rlbench.py b/scripts/eval_rlbench.py
@@ -257,20 +257,20 @@ def __init__(
                 model_path = checkpoints_cfg.ckpt_file
             else:
                 model_path = checkpoints_cfg[phase].ckpt_file
-            if model_path is not None:
-                self.models[phase] = self.load_model(
-                    model_path,
-                    policy_spec.model,
-                    wandb_cfg,
-                    task_cfg.phases[phase],
-                    run=run,
-                )
+            self.models[phase] = self.load_model(
+                model_path,
+                policy_spec.model,
+                wandb_cfg,
+                task_cfg.phases[phase],
+                run=run,
+            )
 
         self.model_cfg = policy_spec.model
         self.task_name = task_cfg.name
         self.debug_viz = debug_viz
         self.action_mode = task_cfg.action_mode
         self.anchor_mode = task_cfg.anchor_mode
+        self.policy_spec = policy_spec
 
     @staticmethod
     def render(obs, inputs, preds, T_action_world, T_actionfinal_world):
@@ -317,7 +317,7 @@ def render(obs, inputs, preds, T_action_world, T_actionfinal_world):
 
     @staticmethod
     def load_model(model_path, model_cfg, wandb_cfg, task_cfg, run=None):
-        ckpt_file = get_weights_path(model_path, wandb_cfg, run=run)
+
         network = create_network(model_cfg)
         model = EquivarianceTrainingModule(
             network,
@@ -326,8 +326,10 @@ def load_model(model_path, model_cfg, wandb_cfg, task_cfg, run=None):
             sigmoid_on=True,
             flow_supervision="both",
         )
-        weights = torch.load(ckpt_file)["state_dict"]
-        model.load_state_dict(weights)
+        if model_path is not None:
+            ckpt_file = get_weights_path(model_path, wandb_cfg, run=run)
+            weights = torch.load(ckpt_file)["state_dict"]
+            model.load_state_dict(weights)
 
         model.eval()
         model = model.cuda()
@@ -342,17 +344,18 @@ def predict(self, obs, phase: str, handlemap) -> Tuple[np.ndarray, Dict[str, Any
             self.anchor_mode,
             handlemap,
         )
+
         model = self.models[phase]
         device = model.device
 
         action_pc = inputs["action_pc"].unsqueeze(0).to(device)
         anchor_pc = inputs["anchor_pc"].unsqueeze(0).to(device)
 
-        K = self.model_cfg.num_points
+        K = self.policy_spec.num_points
         action_pc, _ = sample_farthest_points(action_pc, K=K, random_start_point=True)
         anchor_pc, _ = sample_farthest_points(anchor_pc, K=K, random_start_point=True)
 
-        if self.model_cfg.break_symmetry:
+        if self.policy_spec.break_symmetry:
             raise NotImplementedError()
             action_symmetry_features = bottle_symmetry_features(
                 action_pc.cpu().numpy()[0]

diff --git a/taxpose/utils/website.py b/taxpose/utils/website.py
@@ -0,0 +1,84 @@
+import jinja2
+
+# This is a template which will accept a dictionary with the following:
+# - title: The title of the page.
+# - phase_plots: A list of plotly plots (as divs) to display.
+# - video: An optional video to display.
+PHASE_TEMPLATE = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>{{ title }}</title>
+    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+    <style>
+        /* Make sure the plots are displayed in a row, and that they don't take up more than a certain width*/
+        /* Also make sure the row has a max number of elements = 3 and overflows to the next row */
+        .plot-container {
+            display: flex;
+            flex-wrap: wrap;
+            justify-content: center;
+        }
+        .plot {
+            flex: 1; /* Ensures each plot takes equal space */
+            padding: 10px; /* Optional: adds space between plots */
+            min-width: 500px; /* Optional: sets a minimum width for each plot */
+            max-width: 500px; /* Optional: sets a maximum width for each plot */]
+        }
+    </style>
+</head>
+<body>
+    <h1>{{ title }}</h1>
+    <h2>Phase Plots</h2>
+    <div class="plot-container">
+    {% for plot in phase_plots %}
+        <div class="plot">
+            {{ plot }}
+        </div>
+    {% endfor %}
+    </div>
+    {% if video %}
+    <h2>Video</h2>
+    <div>
+        <video src="{{ video }}" width="640" height="360" controls></video>
+    </div>
+    {% endif %}
+</body>
+</html>
+"""
+
+### This is a template which will accept a dictionary with the following:
+# - title: The title of the page.
+# - episode_nums: A list of episode numbers to display, with links to the corresponding pages.
+EXPERIMENT_TEMPLATE = """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>{{ title }}</title>
+</head>
+<body>
+    <h1>{{ title }}</h1>
+    <h2>Episodes</h2>
+    <ul>
+    {% for episode_num in episode_nums %}
+        <li><a href="episodes/{{ episode_num }}/index.html">{{ episode_num }}</a></li>
+    {% endfor %}
+    </ul>
+</body>
+</html>
+"""
+
+
+def render_episode_page(title, phase_plots, video=None):
+    """
+    Renders a page with the given title, phase plots, and optional video.
+    """
+    template = jinja2.Template(PHASE_TEMPLATE)
+    return template.render(title=title, phase_plots=phase_plots, video=video)
+
+
+def render_experiment_page(title, episode_nums):
+    """
+    Renders a page with the given title and episode numbers.
+    """
+    template = jinja2.Template(EXPERIMENT_TEMPLATE)
+    return template.render(title=title, episode_nums=episode_nums)