Skip to content

Commit

Permalink
Merge pull request #59 from lsst/tickets/PREOPS-4685
Browse files Browse the repository at this point in the history
tickets/PREOPS-4685: use lsst.resources for loading data in the prenight briefing
  • Loading branch information
ehneilsen authored Dec 13, 2023
2 parents 349e631 + 4b35c29 commit a5983cc
Show file tree
Hide file tree
Showing 10 changed files with 131 additions and 57 deletions.
10 changes: 2 additions & 8 deletions .github/workflows/build_container.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ jobs:
run: |
mamba install --quiet --file=requirements.txt
mamba install --quiet --file=test-requirements.txt
pip install lsst.resources
mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version
echo "rs-version" `cat ${{ github.workspace }}/rs_version`
echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
- name: Access rubin-sched-data cache
id: cache-rs
Expand Down Expand Up @@ -77,10 +78,3 @@ jobs:
echo Pushed ghcr.io/${{ github.repository }}:${{ steps.build.outputs.tag }}
echo Fully qualified image digest: ${{ steps.build.outputs.fully_qualified_image_digest }}
echo Tag of the image: ${{ steps.build.outputs.tag }}
5 changes: 3 additions & 2 deletions .github/workflows/test_and_build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,10 @@ jobs:
run: |
mamba install --quiet --file=requirements.txt
mamba install --quiet --file=test-requirements.txt
pip install lsst.resources
mamba list rubin-scheduler | grep -v "#" | awk '{print $2}' > ${{ github.workspace }}/rs_version
echo "rs-version" `cat ${{ github.workspace }}/rs_version`
echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
echo "rs-version=`cat ${{ github.workspace }}/rs_version`" >> $GITHUB_OUTPUT
- name: Access rubin-sched-data cache
id: cache-rs
Expand Down Expand Up @@ -115,4 +116,4 @@ jobs:
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
password: ${{ secrets.SP_PYPI_UPLOADS }}
password: ${{ secrets.SP_PYPI_UPLOADS }}
4 changes: 3 additions & 1 deletion container_environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ dependencies:
- uranography
- param
- git
- pip
- wget
- pip
- pip:
- lsst.resources
3 changes: 3 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@ dependencies:
- firefox
- geckodriver
- build
- pip
- pip:
- lsst.resources
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"param",
"pytz",
"rubin-scheduler",
"lsst.resources",
"uranography >= 1.1.0 ",
]

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ param
pytz
uranography
rubin-scheduler
pip
88 changes: 51 additions & 37 deletions schedview/app/prenight/prenight.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import argparse
import importlib.resources
import json
import logging
import os
import sys
from glob import glob
import urllib.parse
from pathlib import Path

import astropy.utils.iers
Expand All @@ -15,11 +14,13 @@
import panel as pn
import param
from astropy.time import Time
from lsst.resources import ResourcePath
from rubin_scheduler.scheduler.model_observatory import ModelObservatory
from rubin_scheduler.utils import survey_start_mjd

import schedview.collect.footprint
import schedview.collect.opsim
import schedview.collect.resources
import schedview.compute.astro
import schedview.compute.scheduler
import schedview.param
Expand All @@ -42,8 +43,11 @@
DEFAULT_MODEL_OBSERVATORY = ModelObservatory(init_load_length=1)
DEFAULT_MODEL_OBSERVATORY.sky_model.load_length = 1

PACKAGE_DATA_DIR = importlib.resources.files("schedview.data").as_posix()
USDF_DATA_DIR = "/sdf/group/rubin/web_data/sim-data/schedview"
PACKAGE_RESOURCE_URI = "resource://schedview/data"
USDF_RESOURCE_URI = "file:///sdf/group/rubin/web_data/sim-data/schedview"

# To be changed to an S3 bucket at the USDF, when it's ready
DEFAULT_RESOURCE_URI = USDF_RESOURCE_URI

astropy.utils.iers.conf.iers_degraded_accuracy = "warn"

Expand Down Expand Up @@ -278,8 +282,8 @@ def _update_visits(self):

self.logger.info("Starting to update visits.")
try:
if not os.path.exists(self.opsim_output_fname):
raise FileNotFoundError(f"File not found: {self.opsim_output_fname}")
if not ResourcePath(self.opsim_output_fname).exists():
raise FileNotFoundError(f"Resource not found: {self.opsim_output_fname}")

visits = schedview.collect.opsim.read_opsim(
self.opsim_output_fname,
Expand Down Expand Up @@ -523,7 +527,13 @@ def _update_reward_df(self):
self.logger.info("Starting to update reward dataframe.")

try:
reward_df = pd.read_hdf(self.rewards_fname, "reward_df")
reward_resource = ResourcePath(self.rewards_fname)
if not reward_resource.exists():
raise FileNotFoundError(f"Resource not found: {self.rewards_fname}")

with reward_resource.as_local() as local_resource:
local_fname = Path(urllib.parse.urlparse(str(local_resource)).path)
reward_df = pd.read_hdf(local_fname, "reward_df")
self.logger.info("Finished updating reward dataframe.")
except Exception as e:
self.logger.error(e)
Expand Down Expand Up @@ -584,7 +594,14 @@ def _update_obs_rewards(self):

self.logger.info("Starting to update obs_rewards.")
try:
obs_rewards = pd.read_hdf(self.rewards_fname, "obs_rewards")
reward_resource = ResourcePath(self.rewards_fname)
if not reward_resource.exists():
raise FileNotFoundError(f"Resource not found: {self.rewards_fname}")

with reward_resource.as_local() as local_resource:
local_fname = Path(urllib.parse.urlparse(str(local_resource)).path)
obs_rewards = pd.read_hdf(local_fname, "obs_rewards")

self._obs_rewards = obs_rewards
self.logger.info("Finished updating obs_rewards.")
except Exception as e:
Expand Down Expand Up @@ -820,15 +837,13 @@ def clear_caches(session_context):
class RestrictedInputPrenight(Prenight):
"""A pre-night dashboard that restricts the data to files in a dir."""

opsim_output_fname = schedview.param.FileSelectorWithEmptyOption(
path=f"{PACKAGE_DATA_DIR}/*opsim*.db", label="OpSim output database", default=None, allow_None=True
opsim_output_fname = param.Selector(
objects=[], label="OpSim output database", default=None, allow_None=True
)

rewards_fname = schedview.param.FileSelectorWithEmptyOption(
path=f"{PACKAGE_DATA_DIR}/*rewards*.h5", label="rewards HDF5 file", default=None, allow_None=True
)
rewards_fname = param.Selector(objects=[], label="rewards HDF5 file", default=None, allow_None=True)

def __init__(self, data_dir=None, **kwargs):
def __init__(self, resource_uri=DEFAULT_RESOURCE_URI, **kwargs):
# A few arguments (opsim_db, rewards) will be used
# later in this method to set the options for parameters, but
# are not themselves parameters. So, remove them them the
Expand All @@ -841,24 +856,24 @@ def __init__(self, data_dir=None, **kwargs):
# they can be updated by key.
fname_params = {
"opsim_db": self.param["opsim_output_fname"],
"reward": self.param["rewards_fname"],
"rewards": self.param["rewards_fname"],
}

# In cases where the caller has not specified a value, set
# the paths to a glob matching the expected file name format
# for each type.
if data_dir is not None:
fname_glob = {
"opsim_db": f"{data_dir}/*opsim*.db",
"reward": f"{data_dir}/*rewards*.h5",
}
fname_patterns = {
"opsim_db": r".*opsim.*\.db",
"rewards": r".*rewards.*\.h5",
}

# Actually assign the names or globs to the path references.
# Get the resources available for each file type
for arg_name in fname_params:
if arg_name in kwargs:
fname_params[arg_name].update(path=kwargs[arg_name])
elif data_dir is not None:
fname_params[arg_name].update(path=fname_glob[arg_name])
matching_resources = [kwargs[arg_name]]
else:
matching_resources = schedview.collect.resources.find_file_resources(
resource_uri, file_filter=fname_patterns[arg_name]
)
matching_resources = [None] + matching_resources
fname_params[arg_name].objects = matching_resources


def prenight_app(*args, **kwargs):
Expand All @@ -874,9 +889,9 @@ def prenight_app(*args, **kwargs):
prenight = Prenight()
else:
try:
data_dir = kwargs["data_dir"]
resource_uri = kwargs["resource_uri"]
except KeyError:
data_dir = None
resource_uri = None

specified_data_files = {}
data_args = set(["opsim_db", "rewards"]) & set(kwargs.keys())
Expand All @@ -887,10 +902,10 @@ def prenight_app(*args, **kwargs):
if data_arg in kwargs:
specified_data_files[data_arg] = str(file_path)

prenight = RestrictedInputPrenight(data_dir=data_dir, **specified_data_files)
prenight = RestrictedInputPrenight(resource_uri=resource_uri, **specified_data_files)

try:
del kwargs["data_dir"]
del kwargs["resource_uri"]
except KeyError:
pass

Expand Down Expand Up @@ -928,13 +943,12 @@ def parse_prenight_args():
help="The path to the rewards HDF5 file.",
)

default_data_dir = f"{USDF_DATA_DIR}/*" if os.path.exists(USDF_DATA_DIR) else PACKAGE_DATA_DIR
parser.add_argument(
"--data_dir",
"--resource_uri",
"-d",
type=str,
default=default_data_dir,
help="The base directory for data files.",
default=DEFAULT_RESOURCE_URI,
help="The base URI for data files.",
)

parser.add_argument(
Expand Down Expand Up @@ -987,8 +1001,8 @@ def parse_prenight_args():

args = parser.parse_args()

if len(glob(args.data_dir)) == 0 and not args.data_from_urls:
args.data_dir = PACKAGE_DATA_DIR
if not ResourcePath(args.resource_uri).exists():
args.resource_uri = PACKAGE_RESOURCE_URI

if args.night is not None:
args.night_date = Time(pd.Timestamp(args.night, tz="UTC")).datetime.date()
Expand Down
24 changes: 15 additions & 9 deletions schedview/collect/opsim.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import sqlite3
import urllib

import numpy as np
import pandas as pd
from astropy.time import Time
from lsst.resources import ResourcePath


def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"):
def read_opsim(opsim_uri, start_time="2000-01-01", end_time="2100-01-01"):
"""Read visits from an opsim database.
Parameters
----------
filename : `str`
The file from which to load visits
opsim_uri : `str`
The uri from which to load visits
start_time : `str`, `astropy.time.Time`
The start time for visits to be loaded
end_time : `str`, `astropy.time.Time`
Expand All @@ -25,12 +27,16 @@ def read_opsim(filename, start_time="2000-01-01", end_time="2100-01-01"):
start_mjd = Time(start_time).mjd
end_mjd = Time(end_time).mjd

with sqlite3.connect(filename) as sim_connection:
visits = pd.read_sql_query(
f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}",
sim_connection,
index_col="observationId",
)
original_resource_path = ResourcePath(opsim_uri)
with original_resource_path.as_local() as local_resource_path:
filename = urllib.parse.urlparse(str(local_resource_path)).path

with sqlite3.connect(filename) as sim_connection:
visits = pd.read_sql_query(
f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}",
sim_connection,
index_col="observationId",
)

visits["start_date"] = pd.to_datetime(
visits["observationStartMJD"] + 2400000.5, origin="julian", unit="D", utc=True
Expand Down
27 changes: 27 additions & 0 deletions schedview/collect/resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from lsst.resources import ResourcePath


def find_file_resources(base_resource_uri, file_filter=None):
"""Find matching files in a resource.
Parameters
----------
base_resource_uri : `str`
The uri of the resource to search
file_filter : `str` or `re.Pattern`, optional
Regex to filter out files from the list before it is returned.
Returns
-------
files : `list` of `str`
The list of matching files available at the resource.
"""
base_resource = ResourcePath(base_resource_uri)
accumulated_files = []
for dir_path, dir_names, file_names in base_resource.walk(file_filter=file_filter):
for file_name in file_names:
qualified_file_name = dir_path.join(file_name).geturl()
if qualified_file_name not in accumulated_files:
accumulated_files.append(qualified_file_name)

return accumulated_files
25 changes: 25 additions & 0 deletions tests/test_resources.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory

from schedview.collect.resources import find_file_resources


class TestResources(unittest.TestCase):
def test_find_file_resources(self):
# Generate some test files
test_file_names = ["foo/bar.txt", "foo/baz.txt", "foo/qux/moo.txt"]
made_files = []
with TemporaryDirectory() as temp_dir_name:
temp_dir = Path(temp_dir_name)
for file_name in test_file_names:
file_path = temp_dir.joinpath(file_name)
file_path.parent.mkdir(parents=True, exist_ok=True)
made_files.append(file_path.as_uri())
with open(file_path, "w") as file_io:
file_io.write("Test content.")

# Verify that we found exactly the files we made
found_files = find_file_resources(temp_dir)

assert set(made_files) == set(found_files)

0 comments on commit a5983cc

Please sign in to comment.