Merge pull request #59 from lsst/tickets/PREOPS-4685

tickets/PREOPS-4685: use lsst.resources for loading data in the prenight briefing
lsst · Dec 13, 2023 · a5983cc · a5983cc
2 parents 349e631 + 4b35c29
commit a5983cc
Show file tree

Hide file tree

Showing 10 changed files with 131 additions and 57 deletions.
diff --git a/.github/workflows/build_container.yaml b/.github/workflows/build_container.yaml
@@ -31,9 +31,10 @@ jobs:
         run: |
           mamba install --quiet --file=requirements.txt
           mamba install --quiet --file=test-requirements.txt
+          pip install lsst.resources
           mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version
           echo "rs-version" `cat ${{ github.workspace }}/rs_version`
-          echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT   
+          echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
 
       - name: Access rubin-sched-data cache
         id: cache-rs
@@ -77,10 +78,3 @@ jobs:
           echo Pushed ghcr.io/${{ github.repository }}:${{ steps.build.outputs.tag }}
           echo Fully qualified image digest: ${{ steps.build.outputs.fully_qualified_image_digest }}
           echo Tag of the image: ${{ steps.build.outputs.tag }}
-
-
-
-
-
-
-
diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml
@@ -42,9 +42,10 @@ jobs:
         run: |
           mamba install --quiet --file=requirements.txt
           mamba install --quiet --file=test-requirements.txt
+          pip install lsst.resources
           mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version
           echo "rs-version" `cat ${{ github.workspace }}/rs_version`
-          echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT   
+          echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
 
       - name: Access rubin-sched-data cache
         id: cache-rs
@@ -115,4 +116,4 @@ jobs:
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
-          password: ${{ secrets.SP_PYPI_UPLOADS }}
+          password: ${{ secrets.SP_PYPI_UPLOADS }}
diff --git a/container_environment.yaml b/container_environment.yaml
@@ -15,5 +15,7 @@ dependencies:
  - uranography
  - param
  - git
- - pip
  - wget
+ - pip
+ - pip:
+   - lsst.resources
diff --git a/environment.yaml b/environment.yaml
@@ -10,3 +10,6 @@ dependencies:
  - firefox
  - geckodriver
  - build
+ - pip
+ - pip:
+   - lsst.resources
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,7 @@ dependencies = [
     "param",
     "pytz",
     "rubin-scheduler",
+    "lsst.resources",
     "uranography >= 1.1.0 ",
 ]
 

diff --git a/requirements.txt b/requirements.txt
@@ -11,3 +11,4 @@ param
 pytz
 uranography
 rubin-scheduler
+pip
diff --git a/schedview/app/prenight/prenight.py b/schedview/app/prenight/prenight.py
@@ -1,10 +1,9 @@
 import argparse
-import importlib.resources
 import json
 import logging
 import os
 import sys
-from glob import glob
+import urllib.parse
 from pathlib import Path
 
 import astropy.utils.iers
@@ -15,11 +14,13 @@
 import panel as pn
 import param
 from astropy.time import Time
+from lsst.resources import ResourcePath
 from rubin_scheduler.scheduler.model_observatory import ModelObservatory
 from rubin_scheduler.utils import survey_start_mjd
 
 import schedview.collect.footprint
 import schedview.collect.opsim
+import schedview.collect.resources
 import schedview.compute.astro
 import schedview.compute.scheduler
 import schedview.param
@@ -42,8 +43,11 @@
 DEFAULT_MODEL_OBSERVATORY = ModelObservatory(init_load_length=1)
 DEFAULT_MODEL_OBSERVATORY.sky_model.load_length = 1
 
-PACKAGE_DATA_DIR = importlib.resources.files("schedview.data").as_posix()
-USDF_DATA_DIR = "/sdf/group/rubin/web_data/sim-data/schedview"
+PACKAGE_RESOURCE_URI = "resource://schedview/data"
+USDF_RESOURCE_URI = "file:///sdf/group/rubin/web_data/sim-data/schedview"
+
+# To be changed to an S3 bucket at the USDF, when it's ready
+DEFAULT_RESOURCE_URI = USDF_RESOURCE_URI
 
 astropy.utils.iers.conf.iers_degraded_accuracy = "warn"
 
@@ -278,8 +282,8 @@ def _update_visits(self):
 
         self.logger.info("Starting to update visits.")
         try:
-            if not os.path.exists(self.opsim_output_fname):
-                raise FileNotFoundError(f"File not found: {self.opsim_output_fname}")
+            if not ResourcePath(self.opsim_output_fname).exists():
+                raise FileNotFoundError(f"Resource not found: {self.opsim_output_fname}")
 
             visits = schedview.collect.opsim.read_opsim(
                 self.opsim_output_fname,
@@ -523,7 +527,13 @@ def _update_reward_df(self):
         self.logger.info("Starting to update reward dataframe.")
 
         try:
-            reward_df = pd.read_hdf(self.rewards_fname, "reward_df")
+            reward_resource = ResourcePath(self.rewards_fname)
+            if not reward_resource.exists():
+                raise FileNotFoundError(f"Resource not found: {self.rewards_fname}")
+
+            with reward_resource.as_local() as local_resource:
+                local_fname = Path(urllib.parse.urlparse(str(local_resource)).path)
+                reward_df = pd.read_hdf(local_fname, "reward_df")
             self.logger.info("Finished updating reward dataframe.")
         except Exception as e:
             self.logger.error(e)
@@ -584,7 +594,14 @@ def _update_obs_rewards(self):
 
         self.logger.info("Starting to update obs_rewards.")
         try:
-            obs_rewards = pd.read_hdf(self.rewards_fname, "obs_rewards")
+            reward_resource = ResourcePath(self.rewards_fname)
+            if not reward_resource.exists():
+                raise FileNotFoundError(f"Resource not found: {self.rewards_fname}")
+
+            with reward_resource.as_local() as local_resource:
+                local_fname = Path(urllib.parse.urlparse(str(local_resource)).path)
+                obs_rewards = pd.read_hdf(local_fname, "obs_rewards")
+
             self._obs_rewards = obs_rewards
             self.logger.info("Finished updating obs_rewards.")
         except Exception as e:
@@ -820,15 +837,13 @@ def clear_caches(session_context):
 class RestrictedInputPrenight(Prenight):
     """A pre-night dashboard that restricts the data to files in a dir."""
 
-    opsim_output_fname = schedview.param.FileSelectorWithEmptyOption(
-        path=f"{PACKAGE_DATA_DIR}/*opsim*.db", label="OpSim output database", default=None, allow_None=True
+    opsim_output_fname = param.Selector(
+        objects=[], label="OpSim output database", default=None, allow_None=True
     )
 
-    rewards_fname = schedview.param.FileSelectorWithEmptyOption(
-        path=f"{PACKAGE_DATA_DIR}/*rewards*.h5", label="rewards HDF5 file", default=None, allow_None=True
-    )
+    rewards_fname = param.Selector(objects=[], label="rewards HDF5 file", default=None, allow_None=True)
 
-    def __init__(self, data_dir=None, **kwargs):
+    def __init__(self, resource_uri=DEFAULT_RESOURCE_URI, **kwargs):
         # A few arguments (opsim_db, rewards) will be used
         # later in this method to set the options for parameters, but
         # are not themselves parameters. So, remove them them the
@@ -841,24 +856,24 @@ def __init__(self, data_dir=None, **kwargs):
         # they can be updated by key.
         fname_params = {
             "opsim_db": self.param["opsim_output_fname"],
-            "reward": self.param["rewards_fname"],
+            "rewards": self.param["rewards_fname"],
         }
 
-        # In cases where the caller has not specified a value, set
-        # the paths to a glob matching the expected file name format
-        # for each type.
-        if data_dir is not None:
-            fname_glob = {
-                "opsim_db": f"{data_dir}/*opsim*.db",
-                "reward": f"{data_dir}/*rewards*.h5",
-            }
+        fname_patterns = {
+            "opsim_db": r".*opsim.*\.db",
+            "rewards": r".*rewards.*\.h5",
+        }
 
-        # Actually assign the names or globs to the path references.
+        # Get the resources available for each file type
         for arg_name in fname_params:
             if arg_name in kwargs:
-                fname_params[arg_name].update(path=kwargs[arg_name])
-            elif data_dir is not None:
-                fname_params[arg_name].update(path=fname_glob[arg_name])
+                matching_resources = [kwargs[arg_name]]
+            else:
+                matching_resources = schedview.collect.resources.find_file_resources(
+                    resource_uri, file_filter=fname_patterns[arg_name]
+                )
+            matching_resources = [None] + matching_resources
+            fname_params[arg_name].objects = matching_resources
 
 
 def prenight_app(*args, **kwargs):
@@ -874,9 +889,9 @@ def prenight_app(*args, **kwargs):
         prenight = Prenight()
     else:
         try:
-            data_dir = kwargs["data_dir"]
+            resource_uri = kwargs["resource_uri"]
         except KeyError:
-            data_dir = None
+            resource_uri = None
 
         specified_data_files = {}
         data_args = set(["opsim_db", "rewards"]) & set(kwargs.keys())
@@ -887,10 +902,10 @@ def prenight_app(*args, **kwargs):
             if data_arg in kwargs:
                 specified_data_files[data_arg] = str(file_path)
 
-        prenight = RestrictedInputPrenight(data_dir=data_dir, **specified_data_files)
+        prenight = RestrictedInputPrenight(resource_uri=resource_uri, **specified_data_files)
 
     try:
-        del kwargs["data_dir"]
+        del kwargs["resource_uri"]
     except KeyError:
         pass
 
@@ -928,13 +943,12 @@ def parse_prenight_args():
         help="The path to the rewards HDF5 file.",
     )
 
-    default_data_dir = f"{USDF_DATA_DIR}/*" if os.path.exists(USDF_DATA_DIR) else PACKAGE_DATA_DIR
     parser.add_argument(
-        "--data_dir",
+        "--resource_uri",
         "-d",
         type=str,
-        default=default_data_dir,
-        help="The base directory for data files.",
+        default=DEFAULT_RESOURCE_URI,
+        help="The base URI for data files.",
     )
 
     parser.add_argument(
@@ -987,8 +1001,8 @@ def parse_prenight_args():
 
     args = parser.parse_args()
 
-    if len(glob(args.data_dir)) == 0 and not args.data_from_urls:
-        args.data_dir = PACKAGE_DATA_DIR
+    if not ResourcePath(args.resource_uri).exists():
+        args.resource_uri = PACKAGE_RESOURCE_URI
 
     if args.night is not None:
         args.night_date = Time(pd.Timestamp(args.night, tz="UTC")).datetime.date()

diff --git a/schedview/collect/opsim.py b/schedview/collect/opsim.py
@@ -1,17 +1,19 @@
 import sqlite3
+import urllib
 
 import numpy as np
 import pandas as pd
 from astropy.time import Time
+from lsst.resources import ResourcePath
 
 
-def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"):
+def read_opsim(opsim_uri, start_time="2000-01-01", end_time="2100-01-01"):
     """Read visits from an opsim database.
 
     Parameters
     ----------
-    filename : `str`
-        The file from which to load visits
+    opsim_uri : `str`
+        The uri from which to load visits
     start_time : `str`, `astropy.time.Time`
         The start time for visits to be loaded
     end_time : `str`, `astropy.time.Time`
@@ -25,12 +27,16 @@ def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"):
     start_mjd = Time(start_time).mjd
     end_mjd = Time(end_time).mjd
 
-    with sqlite3.connect(filename) as sim_connection:
-        visits = pd.read_sql_query(
-            f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}",
-            sim_connection,
-            index_col="observationId",
-        )
+    original_resource_path = ResourcePath(opsim_uri)
+    with original_resource_path.as_local() as local_resource_path:
+        filename = urllib.parse.urlparse(str(local_resource_path)).path
+
+        with sqlite3.connect(filename) as sim_connection:
+            visits = pd.read_sql_query(
+                f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}",
+                sim_connection,
+                index_col="observationId",
+            )
 
     visits["start_date"] = pd.to_datetime(
         visits["observationStartMJD"] + 2400000.5, origin="julian", unit="D", utc=True

diff --git a/schedview/collect/resources.py b/schedview/collect/resources.py
@@ -0,0 +1,27 @@
+from lsst.resources import ResourcePath
+
+
+def find_file_resources(base_resource_uri, file_filter=None):
+    """Find matching files in a resource.
+
+    Parameters
+    ----------
+    base_resource_uri : `str`
+        The uri of the resource to search
+    file_filter : `str` or `re.Pattern`, optional
+        Regex to filter out files from the list before it is returned.
+
+    Returns
+    -------
+    files : `list` of `str`
+        The list of matching files available at the resource.
+    """
+    base_resource = ResourcePath(base_resource_uri)
+    accumulated_files = []
+    for dir_path, dir_names, file_names in base_resource.walk(file_filter=file_filter):
+        for file_name in file_names:
+            qualified_file_name = dir_path.join(file_name).geturl()
+            if qualified_file_name not in accumulated_files:
+                accumulated_files.append(qualified_file_name)
+
+    return accumulated_files
diff --git a/tests/test_resources.py b/tests/test_resources.py
@@ -0,0 +1,25 @@
+import unittest
+from pathlib import Path
+from tempfile import TemporaryDirectory
+
+from schedview.collect.resources import find_file_resources
+
+
+class TestResources(unittest.TestCase):
+    def test_find_file_resources(self):
+        # Generate some test files
+        test_file_names = ["foo/bar.txt", "foo/baz.txt", "foo/qux/moo.txt"]
+        made_files = []
+        with TemporaryDirectory() as temp_dir_name:
+            temp_dir = Path(temp_dir_name)
+            for file_name in test_file_names:
+                file_path = temp_dir.joinpath(file_name)
+                file_path.parent.mkdir(parents=True, exist_ok=True)
+                made_files.append(file_path.as_uri())
+                with open(file_path, "w") as file_io:
+                    file_io.write("Test content.")
+
+            # Verify that we found exactly the files we made
+            found_files = find_file_resources(temp_dir)
+
+        assert set(made_files) == set(found_files)