From 38717b5daf7fc09950e25df71c8586261fbecd15 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Mon, 11 Dec 2023 14:38:57 -0800 Subject: [PATCH 1/6] use lsst.resources to load opsim data --- schedview/collect/opsim.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/schedview/collect/opsim.py b/schedview/collect/opsim.py index e6251859..43dc1cf1 100644 --- a/schedview/collect/opsim.py +++ b/schedview/collect/opsim.py @@ -1,17 +1,19 @@ import sqlite3 +import urllib import numpy as np import pandas as pd from astropy.time import Time +from lsst.resources import ResourcePath -def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"): +def read_opsim(opsim_uri, start_time="2000-01-01", end_time="2100-01-01"): """Read visits from an opsim database. Parameters ---------- - filename : `str` - The file from which to load visits + opsim_uri : `str` + The uri from which to load visits start_time : `str`, `astropy.time.Time` The start time for visits to be loaded end_time : `str`, `astropy.time.Time` @@ -25,12 +27,16 @@ def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"): start_mjd = Time(start_time).mjd end_mjd = Time(end_time).mjd - with sqlite3.connect(filename) as sim_connection: - visits = pd.read_sql_query( - f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}", - sim_connection, - index_col="observationId", - ) + original_resource_path = ResourcePath(opsim_uri) + with original_resource_path.as_local() as local_resource_path: + filename = urllib.parse.urlparse(str(local_resource_path)).path + + with sqlite3.connect(filename) as sim_connection: + visits = pd.read_sql_query( + f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}", + sim_connection, + index_col="observationId", + ) visits["start_date"] = pd.to_datetime( visits["observationStartMJD"] + 2400000.5, origin="julian", unit="D", utc=True From f340371f107c18e6912c8b3c46773da4971621f6 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Mon, 11 Dec 2023 14:40:24 -0800 Subject: [PATCH 2/6] support finding available files in a resource --- schedview/collect/resources.py | 27 +++++++++++++++++++++++++++ tests/test_resources.py | 25 +++++++++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 schedview/collect/resources.py create mode 100644 tests/test_resources.py diff --git a/schedview/collect/resources.py b/schedview/collect/resources.py new file mode 100644 index 00000000..55089f48 --- /dev/null +++ b/schedview/collect/resources.py @@ -0,0 +1,27 @@ +from lsst.resources import ResourcePath + + +def find_file_resources(base_resource_uri, file_filter=None): + """Find matching files in a resource. + + Parameters + ---------- + base_resource_uri : `str` + The uri of the resource to search + file_filter : `str` or `re.Pattern`, optional + Regex to filter out files from the list before it is returned. + + Returns + ------- + files : `list` of `str` + The list of matching files available at the resource. + """ + base_resource = ResourcePath(base_resource_uri) + accumulated_files = [] + for dir_path, dir_names, file_names in base_resource.walk(file_filter=file_filter): + for file_name in file_names: + qualified_file_name = dir_path.join(file_name).geturl() + if qualified_file_name not in accumulated_files: + accumulated_files.append(qualified_file_name) + + return accumulated_files diff --git a/tests/test_resources.py b/tests/test_resources.py new file mode 100644 index 00000000..7e289f5e --- /dev/null +++ b/tests/test_resources.py @@ -0,0 +1,25 @@ +import unittest +from pathlib import Path +from tempfile import TemporaryDirectory + +from schedview.collect.resources import find_file_resources + + +class TestResources(unittest.TestCase): + def test_find_file_resources(self): + # Generate some test files + test_file_names = ["foo/bar.txt", "foo/baz.txt", "foo/qux/moo.txt"] + made_files = [] + with TemporaryDirectory() as temp_dir_name: + temp_dir = Path(temp_dir_name) + for file_name in test_file_names: + file_path = temp_dir.joinpath(file_name) + file_path.parent.mkdir(parents=True, exist_ok=True) + made_files.append(file_path.as_uri()) + with open(file_path, "w") as file_io: + file_io.write("Test content.") + + # Verify that we found exactly the files we made + found_files = find_file_resources(temp_dir) + + self.assertListEqual(made_files, found_files) From c80ad0417da4d8248e02621ee848543a699debb9 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Mon, 11 Dec 2023 16:00:48 -0800 Subject: [PATCH 3/6] use lsst.resources to load files in prenight --- schedview/app/prenight/prenight.py | 88 +++++++++++++++++------------- 1 file changed, 51 insertions(+), 37 deletions(-) diff --git a/schedview/app/prenight/prenight.py b/schedview/app/prenight/prenight.py index d416303e..fceab5d8 100644 --- a/schedview/app/prenight/prenight.py +++ b/schedview/app/prenight/prenight.py @@ -1,10 +1,9 @@ import argparse -import importlib.resources import json import logging import os import sys -from glob import glob +import urllib.parse from pathlib import Path import astropy.utils.iers @@ -15,11 +14,13 @@ import panel as pn import param from astropy.time import Time +from lsst.resources import ResourcePath from rubin_scheduler.scheduler.model_observatory import ModelObservatory from rubin_scheduler.utils import survey_start_mjd import schedview.collect.footprint import schedview.collect.opsim +import schedview.collect.resources import schedview.compute.astro import schedview.compute.scheduler import schedview.param @@ -42,8 +43,11 @@ DEFAULT_MODEL_OBSERVATORY = ModelObservatory(init_load_length=1) DEFAULT_MODEL_OBSERVATORY.sky_model.load_length = 1 -PACKAGE_DATA_DIR = importlib.resources.files("schedview.data").as_posix() -USDF_DATA_DIR = "/sdf/group/rubin/web_data/sim-data/schedview" +PACKAGE_RESOURCE_URI = "resource://schedview/data" +USDF_RESOURCE_URI = "file:///sdf/group/rubin/web_data/sim-data/schedview" + +# To be changed to an S3 bucket at the USDF, when it's ready +DEFAULT_RESOURCE_URI = USDF_RESOURCE_URI astropy.utils.iers.conf.iers_degraded_accuracy = "warn" @@ -278,8 +282,8 @@ def _update_visits(self): self.logger.info("Starting to update visits.") try: - if not os.path.exists(self.opsim_output_fname): - raise FileNotFoundError(f"File not found: {self.opsim_output_fname}") + if not ResourcePath(self.opsim_output_fname).exists(): + raise FileNotFoundError(f"Resource not found: {self.opsim_output_fname}") visits = schedview.collect.opsim.read_opsim( self.opsim_output_fname, @@ -523,7 +527,13 @@ def _update_reward_df(self): self.logger.info("Starting to update reward dataframe.") try: - reward_df = pd.read_hdf(self.rewards_fname, "reward_df") + reward_resource = ResourcePath(self.rewards_fname) + if not reward_resource.exists(): + raise FileNotFoundError(f"Resource not found: {self.rewards_fname}") + + with reward_resource.as_local() as local_resource: + local_fname = Path(urllib.parse.urlparse(str(local_resource)).path) + reward_df = pd.read_hdf(local_fname, "reward_df") self.logger.info("Finished updating reward dataframe.") except Exception as e: self.logger.error(e) @@ -584,7 +594,14 @@ def _update_obs_rewards(self): self.logger.info("Starting to update obs_rewards.") try: - obs_rewards = pd.read_hdf(self.rewards_fname, "obs_rewards") + reward_resource = ResourcePath(self.rewards_fname) + if not reward_resource.exists(): + raise FileNotFoundError(f"Resource not found: {self.rewards_fname}") + + with reward_resource.as_local() as local_resource: + local_fname = Path(urllib.parse.urlparse(str(local_resource)).path) + obs_rewards = pd.read_hdf(local_fname, "obs_rewards") + self._obs_rewards = obs_rewards self.logger.info("Finished updating obs_rewards.") except Exception as e: @@ -820,15 +837,13 @@ def clear_caches(session_context): class RestrictedInputPrenight(Prenight): """A pre-night dashboard that restricts the data to files in a dir.""" - opsim_output_fname = schedview.param.FileSelectorWithEmptyOption( - path=f"{PACKAGE_DATA_DIR}/*opsim*.db", label="OpSim output database", default=None, allow_None=True + opsim_output_fname = param.Selector( + objects=[], label="OpSim output database", default=None, allow_None=True ) - rewards_fname = schedview.param.FileSelectorWithEmptyOption( - path=f"{PACKAGE_DATA_DIR}/*rewards*.h5", label="rewards HDF5 file", default=None, allow_None=True - ) + rewards_fname = param.Selector(objects=[], label="rewards HDF5 file", default=None, allow_None=True) - def __init__(self, data_dir=None, **kwargs): + def __init__(self, resource_uri=DEFAULT_RESOURCE_URI, **kwargs): # A few arguments (opsim_db, rewards) will be used # later in this method to set the options for parameters, but # are not themselves parameters. So, remove them them the @@ -841,24 +856,24 @@ def __init__(self, data_dir=None, **kwargs): # they can be updated by key. fname_params = { "opsim_db": self.param["opsim_output_fname"], - "reward": self.param["rewards_fname"], + "rewards": self.param["rewards_fname"], } - # In cases where the caller has not specified a value, set - # the paths to a glob matching the expected file name format - # for each type. - if data_dir is not None: - fname_glob = { - "opsim_db": f"{data_dir}/*opsim*.db", - "reward": f"{data_dir}/*rewards*.h5", - } + fname_patterns = { + "opsim_db": r".*opsim.*\.db", + "rewards": r".*rewards.*\.h5", + } - # Actually assign the names or globs to the path references. + # Get the resources available for each file type for arg_name in fname_params: if arg_name in kwargs: - fname_params[arg_name].update(path=kwargs[arg_name]) - elif data_dir is not None: - fname_params[arg_name].update(path=fname_glob[arg_name]) + matching_resources = [kwargs[arg_name]] + else: + matching_resources = schedview.collect.resources.find_file_resources( + resource_uri, file_filter=fname_patterns[arg_name] + ) + matching_resources = [None] + matching_resources + fname_params[arg_name].objects = matching_resources def prenight_app(*args, **kwargs): @@ -874,9 +889,9 @@ def prenight_app(*args, **kwargs): prenight = Prenight() else: try: - data_dir = kwargs["data_dir"] + resource_uri = kwargs["resource_uri"] except KeyError: - data_dir = None + resource_uri = None specified_data_files = {} data_args = set(["opsim_db", "rewards"]) & set(kwargs.keys()) @@ -887,10 +902,10 @@ def prenight_app(*args, **kwargs): if data_arg in kwargs: specified_data_files[data_arg] = str(file_path) - prenight = RestrictedInputPrenight(data_dir=data_dir, **specified_data_files) + prenight = RestrictedInputPrenight(resource_uri=resource_uri, **specified_data_files) try: - del kwargs["data_dir"] + del kwargs["resource_uri"] except KeyError: pass @@ -928,13 +943,12 @@ def parse_prenight_args(): help="The path to the rewards HDF5 file.", ) - default_data_dir = f"{USDF_DATA_DIR}/*" if os.path.exists(USDF_DATA_DIR) else PACKAGE_DATA_DIR parser.add_argument( - "--data_dir", + "--resource_uri", "-d", type=str, - default=default_data_dir, - help="The base directory for data files.", + default=DEFAULT_RESOURCE_URI, + help="The base URI for data files.", ) parser.add_argument( @@ -987,8 +1001,8 @@ def parse_prenight_args(): args = parser.parse_args() - if len(glob(args.data_dir)) == 0 and not args.data_from_urls: - args.data_dir = PACKAGE_DATA_DIR + if not ResourcePath(args.resource_uri).exists(): + args.resource_uri = PACKAGE_RESOURCE_URI if args.night is not None: args.night_date = Time(pd.Timestamp(args.night, tz="UTC")).datetime.date() From 2fe6de11cca59a2069e27bf6b1aed5f429a0a019 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 Dec 2023 07:49:43 -0800 Subject: [PATCH 4/6] include lsst.resources in dependencies --- .github/workflows/build_container.yaml | 10 ++-------- .github/workflows/test_and_build.yaml | 5 +++-- container_environment.yaml | 4 +++- environment.yaml | 3 +++ pyproject.toml | 1 + requirements.txt | 1 + 6 files changed, 13 insertions(+), 11 deletions(-) diff --git a/.github/workflows/build_container.yaml b/.github/workflows/build_container.yaml index 9da2e85e..3af219a7 100644 --- a/.github/workflows/build_container.yaml +++ b/.github/workflows/build_container.yaml @@ -31,9 +31,10 @@ jobs: run: | mamba install --quiet --file=requirements.txt mamba install --quiet --file=test-requirements.txt + pip install lsst.resources mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version echo "rs-version" `cat ${{ github.workspace }}/rs_version` - echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT + echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT - name: Access rubin-sched-data cache id: cache-rs @@ -77,10 +78,3 @@ jobs: echo Pushed ghcr.io/${{ github.repository }}:${{ steps.build.outputs.tag }} echo Fully qualified image digest: ${{ steps.build.outputs.fully_qualified_image_digest }} echo Tag of the image: ${{ steps.build.outputs.tag }} - - - - - - - diff --git a/.github/workflows/test_and_build.yaml b/.github/workflows/test_and_build.yaml index 4cb3494b..576e5842 100644 --- a/.github/workflows/test_and_build.yaml +++ b/.github/workflows/test_and_build.yaml @@ -42,9 +42,10 @@ jobs: run: | mamba install --quiet --file=requirements.txt mamba install --quiet --file=test-requirements.txt + pip install lsst.resources mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version echo "rs-version" `cat ${{ github.workspace }}/rs_version` - echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT + echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT - name: Access rubin-sched-data cache id: cache-rs @@ -115,4 +116,4 @@ jobs: uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ - password: ${{ secrets.SP_PYPI_UPLOADS }} \ No newline at end of file + password: ${{ secrets.SP_PYPI_UPLOADS }} diff --git a/container_environment.yaml b/container_environment.yaml index 04854660..9cb26cea 100644 --- a/container_environment.yaml +++ b/container_environment.yaml @@ -15,5 +15,7 @@ dependencies: - uranography - param - git - - pip - wget + - pip + - pip: + - lsst.resources diff --git a/environment.yaml b/environment.yaml index af71c5e7..ee4bc211 100644 --- a/environment.yaml +++ b/environment.yaml @@ -10,3 +10,6 @@ dependencies: - firefox - geckodriver - build + - pip + - pip: + - lsst.resources diff --git a/pyproject.toml b/pyproject.toml index 5f950ece..772f14d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,6 +34,7 @@ dependencies = [ "param", "pytz", "rubin-scheduler", + "lsst.resounces", "uranography >= 1.1.0 ", ] diff --git a/requirements.txt b/requirements.txt index 703828a4..86c1bd96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ param pytz uranography rubin-scheduler +pip From 70cc8bf87b7ab0fd6442e01129d0c69229dfa001 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 Dec 2023 07:55:59 -0800 Subject: [PATCH 5/6] fix dependency spelling --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 772f14d0..b917ff11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ dependencies = [ "param", "pytz", "rubin-scheduler", - "lsst.resounces", + "lsst.resources", "uranography >= 1.1.0 ", ] From 4b35c295aef5cac0cc62b457f213d08e129f26d2 Mon Sep 17 00:00:00 2001 From: Eric Neilsen Date: Tue, 12 Dec 2023 13:45:26 -0800 Subject: [PATCH 6/6] be robust to order differences is find_file_resources test --- tests/test_resources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_resources.py b/tests/test_resources.py index 7e289f5e..0ed44585 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -22,4 +22,4 @@ def test_find_file_resources(self): # Verify that we found exactly the files we made found_files = find_file_resources(temp_dir) - self.assertListEqual(made_files, found_files) + assert set(made_files) == set(found_files)