diff --git a/docs/installation.rst b/docs/installation.rst index 3478977a..c22054e6 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -63,3 +63,34 @@ Building the documentation requires the installation of ``documenteer[guide]``: $ package-docs build The root of the local documentation will then be ``docs/_build/html/index.html``. + +Using the schedview S3 bucket +----------------------------- + +If a user has appropriate credentials, ``schedview`` can read data from an +``S3`` bucket. To have the ``prenight`` dashboard read data from as ``S3`` +bucket, a few steps are needed to prepare the environment in which the +dashboard will be run. + +First, the bucket credentials with access to the the endpoint and bucket +in which the archive resides need to be added to ``.lsst/aws-credentials.ini`` +file in the account that will be running the dashboard. + +For the pre-night ``S3`` bucket at the USDF, the endpoint is +``https://s3dfrgw.slac.stanford.edu/`` and the bucket name is +``rubin-scheduler-prenight``. Access to this bucket must be +coordinated with the USDF administrators and the Rubin Observatory +survey scheduling team. + +For example, if the USDF ``S3`` bucket is to be used anth the section with +the ``aws_access_key_id`` and ``aws_secret_access_key`` with access to this +endpoint and bucket is ``prenight``, then the following environment variables +need to be set in the process running the dashboard: + +:: + + $ export S3_ENDPOINT_URL='https://s3dfrgw.slac.stanford.edu/' + $ export AWS_PROFILE=prenight + +The first of these (``S3_ENDPOINT_URL``) might have been set up automatically +for you if you are running on the USDF. diff --git a/docs/usage.rst b/docs/usage.rst index d0d3aded..2f5cfdb6 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -25,7 +25,7 @@ Activate the conda environment and start the app: The app will then give you the URL at which you can find the app. -By default, the app will allow the user to select ``opsim`` databas, pickles of +By default, the app will allow the user to select ``opsim`` database, pickles of scheduler instances, and rewards data from ``/sdf/group/rubin/web_data/sim-data/schedview`` (if it is being run at the USDF) or the samples directory (elsewhere). The data directory from which a user can select files can be set on startup: @@ -34,7 +34,30 @@ The data directory from which a user can select files can be set on startup: $ prenight --data_dir /path/to/data/files -Alternately, the user can be allowed to enter arbitrary URLs for these files. +Alternately, ``prenight`` can be set to look at an archive of simulation +output in an S3 bucket: + +:: + + $ export S3_ENDPOINT_URL='https://s3dfrgw.slac.stanford.edu/' + $ export AWS_PROFILE=prenight_aws_profile + $ prenight --resource_uri='s3://rubin-scheduler-prenight/opsim/' --data_from_archive + +where ``prenight_aws_profile`` should be replaced by whatever section of +the ``~/.lsst/aws-credentials.ini`` file has the credentials needed for +access to the ``rubin-scheduler-prenight`` bucket. + +The ``resources-uri`` can also be set to a local directory tree with the same +layout as the above S3 bucket, in which case filesystem access is needed to +that directory tree, but the environment variables above are not. For example: + +:: + + $ prenight --resource-uri='file:///where/my/data/is/' --data_from_archive + +Note that the trailing ``/`` in the ``resource-uri`` value is required. + +Finally, the user can be allowed to enter arbitrary URLs for these files. (Note that this is not secure, because it will allow the user to upload malicious pickles. So, it should only be done when public access to the dashboard is not possible.) Such a dashboard can be started thus: diff --git a/notebooks/sim_archive.ipynb b/notebooks/sim_archive.ipynb new file mode 100644 index 00000000..2df75c85 --- /dev/null +++ b/notebooks/sim_archive.ipynb @@ -0,0 +1,526 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "c2a72e18-33d6-4aa7-9c0f-6becfdc98860", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%load_ext lab_black\n", + "%load_ext autoreload\n", + "%autoreload 1" + ] + }, + { + "cell_type": "markdown", + "id": "9c840f8b-2e01-42f5-91c4-31357f14e515", + "metadata": {}, + "source": [ + "### Imports\n", + "\n", + "Use `aimport` for `schedview` imports for ease of debugging." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9616a73e-01d5-4e6a-9205-11aaf75e5390", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import warnings\n", + "import math\n", + "import os\n", + "import sys\n", + "import logging\n", + "from pathlib import Path\n", + "import panel as pn\n", + "import numpy as np\n", + "import pandas as pd\n", + "import param\n", + "import bokeh\n", + "from copy import deepcopy\n", + "import datetime\n", + "from pytz import timezone\n", + "import lzma\n", + "import pickle\n", + "import yaml\n", + "import json\n", + "import socket\n", + "import time\n", + "from contextlib import redirect_stdout\n", + "from pathlib import Path\n", + "from collections import OrderedDict\n", + "from tempfile import TemporaryDirectory, NamedTemporaryFile\n", + "import hashlib\n", + "import shutil\n", + "\n", + "import sys\n", + "from conda.exceptions import EnvironmentLocationNotFound\n", + "from conda.gateways.disk.test import is_conda_environment\n", + "from conda.cli.main_list import print_packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2486d2e9-b9a7-4842-bcf6-24e42596ad67", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from astropy.time import Time, TimeDelta\n", + "from zoneinfo import ZoneInfo\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import hvplot.pandas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c2996af-34dd-461f-a352-e33a30aa1e9e", + "metadata": {}, + "outputs": [], + "source": [ + "import lsst.resources" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c83e3257-40bf-4d14-8375-cacc8cedbcba", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import rubin_scheduler\n", + "from rubin_scheduler.scheduler.example import example_scheduler\n", + "from rubin_scheduler.scheduler import sim_runner\n", + "from rubin_scheduler.scheduler.model_observatory import ModelObservatory\n", + "from rubin_scheduler.scheduler.utils import SchemaConverter\n", + "from rubin_scheduler.sim_archive import sim_archive, drive_sim" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "76e8c207-4673-41f6-ab64-086cfc4d8a96", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import schedview\n", + "import schedview.app.prenight\n", + "import schedview.compute.scheduler\n", + "import schedview.collect.opsim\n", + "from schedview.plot.visitmap import BAND_COLORS" + ] + }, + { + "cell_type": "markdown", + "id": "222a6d81-563f-4e33-9a40-fc5d3bc47968", + "metadata": {}, + "source": [ + "### Further preparation of the notebook" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "55637881-b52d-4333-85ff-b325fbc80c3d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pn.extension(\"terminal\")" + ] + }, + { + "cell_type": "markdown", + "id": "d97d5079-af2d-4707-a00f-2b9826524881", + "metadata": {}, + "source": [ + "### Filter warnings\n", + "\n", + "Several dependencies throw prodigious instances of (benign) warnings.\n", + "Suppress them to avoid poluting the executed notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c07dcb73-9ce9-413e-a9a2-c38512806410", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"astropy.time\",\n", + " message=\"Numerical value without unit or explicit format passed to TimeDelta, assuming days\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"pandas\",\n", + " message=\"In a future version of pandas, a length 1 tuple will be returned when iterating over a groupby with a grouper equal to a list of length 1. Don't supply a list with a single grouper to avoid this warning.\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"healpy\",\n", + " message=\"divide by zero encountered in divide\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"healpy\",\n", + " message=\"invalid value encountered in multiply\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"holoviews\",\n", + " message=\"Discarding nonzero nanoseconds in conversion.\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"rubin_sim\",\n", + " message=\"invalid value encountered in arcsin\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"rubin_sim\",\n", + " message=\"All-NaN slice encountered\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"rubin_sim.scheduler.utils\",\n", + " message=\"invalid value encountered in cast\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"rubin_scheduler.scheduler.utils\",\n", + " message=\"invalid value encountered in cast\",\n", + ")\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " module=\"rubin_scheduler.scheduler.surveys\",\n", + " message=\"All-NaN slice encountered\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "77dd3fc1-a84f-4d9c-8fad-5b91f3da4852", + "metadata": {}, + "source": [ + "## Configuration" + ] + }, + { + "cell_type": "markdown", + "id": "058c606c-2d15-4503-9514-0feaab312b15", + "metadata": {}, + "source": [ + "Setting `keep_rewards` to `True` stores rewards that can be plotted in the schedview dashboards, but makes the simulation slower." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41ddf2e8-acb9-4af3-ba53-c88f92948313", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "keep_rewards = True" + ] + }, + { + "cell_type": "markdown", + "id": "db1e633d-45b9-4115-828e-c684c4670be2", + "metadata": {}, + "source": [ + "Set the date we are simulating:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f236eab9-0f4d-4214-a168-00396e327787", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "evening_iso8601 = \"2025-05-05\"" + ] + }, + { + "cell_type": "markdown", + "id": "b9db752d-2ff5-4066-96d6-f211722cdb3b", + "metadata": {}, + "source": [ + "If we just use this day as the start and make the simulation duration 1 day, the begin and end of the simulation will probably begin in the middle on one night and end in the middle of the next.\n", + "Instead, find the sunset and sunrise of the night we want using the almanac, and use these to determine our start time and duration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64e05c42-07c4-4495-a8a6-98031d4fd355", + "metadata": {}, + "outputs": [], + "source": [ + "observatory = ModelObservatory()\n", + "night_sunset_info = observatory.almanac.get_sunset_info(\n", + " evening_date=evening_iso8601, longitude=observatory.location.lon\n", + ")\n", + "\n", + "mjd_start = night_sunset_info[\"sun_n12_setting\"]\n", + "night_duration = night_sunset_info[\"sunrise\"] - mjd_start" + ] + }, + { + "cell_type": "markdown", + "id": "d8255dd4-0d23-4e60-9939-c65615529f31", + "metadata": {}, + "source": [ + "Now instantiate the `ModelObservatory` and scheduler we will actually use for the simulation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "283efe8e-7f9c-4978-af19-803416bb2423", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "observatory = ModelObservatory(mjd_start=mjd_start)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "605f0e05-8971-4e1e-8107-511ac043d3a6", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "scheduler = example_scheduler(mjd_start=mjd_start)\n", + "scheduler.keep_rewards = True" + ] + }, + { + "cell_type": "markdown", + "id": "aa162d69-df59-47d7-96c0-bd36f39d1380", + "metadata": {}, + "source": [ + "## Get a URI for the archive into which to save the simulation" + ] + }, + { + "cell_type": "markdown", + "id": "3baf6f23-7122-4acc-8a3c-dd829973ecb7", + "metadata": {}, + "source": [ + "For this sample, use a temporary directory:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34a47d12-b04e-4693-9b68-46f10d5e297b", + "metadata": {}, + "outputs": [], + "source": [ + "if True:\n", + " archive_dir = TemporaryDirectory()\n", + " archive_uri = lsst.resources.ResourcePath(\n", + " archive_dir.name, forceDirectory=True\n", + " ).geturl()" + ] + }, + { + "cell_type": "markdown", + "id": "18d462e3-faa0-46a6-a47d-b28d16b97ff6", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-04T16:42:37.279985Z", + "iopub.status.busy": "2024-01-04T16:42:37.279350Z", + "iopub.status.idle": "2024-01-04T16:42:37.367953Z", + "shell.execute_reply": "2024-01-04T16:42:37.367371Z", + "shell.execute_reply.started": "2024-01-04T16:42:37.279968Z" + } + }, + "source": [ + "If you have a local non-temporary you want to use instead, do something like this:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddcf7b11-bfb7-4fa5-a4b4-6b8dba52b01b", + "metadata": {}, + "outputs": [], + "source": [ + "if False:\n", + " archive_uri = \"file:///my/directory/\"" + ] + }, + { + "cell_type": "markdown", + "id": "699de188-216e-4131-ac68-e5a01828b5f9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-01-04T16:43:40.866133Z", + "iopub.status.busy": "2024-01-04T16:43:40.865578Z", + "iopub.status.idle": "2024-01-04T16:43:40.872744Z", + "shell.execute_reply": "2024-01-04T16:43:40.872213Z", + "shell.execute_reply.started": "2024-01-04T16:43:40.866120Z" + } + }, + "source": [ + "To save into the S3 bucket used by `schedview`, set both the uri and the environment variables necessary for access to it:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a3fa8ec-66c3-4c70-ab36-8a628daed580", + "metadata": {}, + "outputs": [], + "source": [ + "if False:\n", + " os.environ[\"S3_ENDPOINT_URL\"] = \"https://s3dfrgw.slac.stanford.edu/\"\n", + " os.environ[\"AWS_PROFILE\"] = \"prenight\"\n", + " archive_uri = \"s3://rubin-scheduler-prenight/opsim/\"" + ] + }, + { + "cell_type": "markdown", + "id": "b8d1acd1-934b-4496-893e-71665a67ecf5", + "metadata": {}, + "source": [ + "In both of the above cases, be sure to include the trailing `/`." + ] + }, + { + "cell_type": "markdown", + "id": "5689f570-d36d-4bca-9a77-2c610a4b3bde", + "metadata": { + "execution": { + "iopub.execute_input": "2023-12-15T20:51:37.321181Z", + "iopub.status.busy": "2023-12-15T20:51:37.320911Z", + "iopub.status.idle": "2023-12-15T20:51:37.323806Z", + "shell.execute_reply": "2023-12-15T20:51:37.323445Z", + "shell.execute_reply.started": "2023-12-15T20:51:37.321168Z" + } + }, + "source": [ + "## Save the notebook in a temporary directory" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d68629f-e9b7-4305-b32a-e5daa5836bf6", + "metadata": {}, + "outputs": [], + "source": [ + "scratch_dir = TemporaryDirectory()\n", + "scratch_path = Path(scratch_dir.name)\n", + "notebook_fname = scratch_path.joinpath(\"notebook.ipynb\").as_posix()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3bce129-c4f9-481a-abb2-f66d5a095581", + "metadata": {}, + "outputs": [], + "source": [ + "%notebook $notebook_fname" + ] + }, + { + "cell_type": "markdown", + "id": "fcdb3253-41bb-49f6-b7eb-c85e9a4431df", + "metadata": {}, + "source": [ + "## Run the simulation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "854cf2bb-9d65-4083-a1fb-932a8c46fdc4", + "metadata": {}, + "outputs": [], + "source": [ + "exec_start_time = Time.now()\n", + "results = drive_sim(\n", + " observatory=observatory,\n", + " scheduler=scheduler,\n", + " archive_uri=archive_uri,\n", + " label=f\"Notebook test on {exec_start_time.iso}\",\n", + " notebook=notebook_fname,\n", + " tags=[\"notebook\", \"devel\"],\n", + " mjd_start=mjd_start,\n", + " survey_length=night_duration,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a370886c-125d-4b67-9c81-a08e865212c1", + "metadata": {}, + "outputs": [], + "source": [ + "sim_uri" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "375094a3-6b65-4f0d-8511-51bce23923a9", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ehn311", + "language": "python", + "name": "ehn311" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/schedview/app/prenight/prenight.py b/schedview/app/prenight/prenight.py index 53656f30..164b5295 100644 --- a/schedview/app/prenight/prenight.py +++ b/schedview/app/prenight/prenight.py @@ -3,7 +3,6 @@ import logging import os import sys -import urllib.parse from pathlib import Path import astropy.utils.iers @@ -21,6 +20,7 @@ import schedview.collect.footprint import schedview.collect.opsim import schedview.collect.resources +import schedview.collect.rewards import schedview.compute.astro import schedview.compute.scheduler import schedview.param @@ -184,6 +184,8 @@ class Prenight(param.Parameterized): ) _obs_rewards = schedview.param.Series() + _autoload_rewards = False + def __init__(self, **params): super().__init__(**params) self.config_logger() @@ -520,6 +522,28 @@ def visit_skymaps(self): return vmap @param.depends("rewards_fname", watch=True) + def _update_rewards(self): + if self.rewards_fname is None or len(self.rewards_fname) < 1: + return None + + reward_df, obs_rewards = schedview.collect.rewards.read_rewards( + self.rewards_fname, + Time(self._almanac_events.loc["sunset", "UTC"]), + Time(self._almanac_events.loc["sunrise", "UTC"]), + ) + + if reward_df is not None: + self._reward_df = reward_df + self.logger.info("Finished updating reward dataframe.") + else: + self.logger.warning("Could not update reward dataframe.") + + if obs_rewards is not None: + self._obs_rewards = obs_rewards + self.logger.info("Finished updating obs_rewards.") + else: + self.logger.warning("Could not update obs_rewards.") + def _update_reward_df(self): if self.rewards_fname is None or len(self.rewards_fname) < 1: return None @@ -532,8 +556,7 @@ def _update_reward_df(self): raise FileNotFoundError(f"Resource not found: {self.rewards_fname}") with reward_resource.as_local() as local_resource: - local_fname = Path(urllib.parse.urlparse(str(local_resource)).path) - reward_df = pd.read_hdf(local_fname, "reward_df") + reward_df = pd.read_hdf(local_resource.ospath, "reward_df") self.logger.info("Finished updating reward dataframe.") except Exception as e: self.logger.error(e) @@ -587,7 +610,6 @@ def _update_basis_function_selector(self): self.basis_function = "Total" self.logger.info("Finished updating basis function selector.") - @param.depends("rewards_fname", watch=True) def _update_obs_rewards(self): if self.rewards_fname is None: return None @@ -599,8 +621,7 @@ def _update_obs_rewards(self): raise FileNotFoundError(f"Resource not found: {self.rewards_fname}") with reward_resource.as_local() as local_resource: - local_fname = Path(urllib.parse.urlparse(str(local_resource)).path) - obs_rewards = pd.read_hdf(local_fname, "obs_rewards") + obs_rewards = pd.read_hdf(local_resource.ospath, "obs_rewards") self._obs_rewards = obs_rewards self.logger.info("Finished updating obs_rewards.") @@ -618,20 +639,28 @@ def reward_params(self): """ self.logger.info("Starting to update reward params.") if self._reward_df is None: - this_param_set = pn.Param( - self.param, - parameters=["rewards_fname"], - ) - return this_param_set + if self._autoload_rewards: + return "No rewards are loaded." + else: + this_param_set = pn.Param( + self.param, + parameters=["rewards_fname"], + ) + return this_param_set if len(self.param["surveys"].objects) > 10: survey_widget = pn.widgets.CrossSelector else: survey_widget = pn.widgets.MultiSelect + if self._autoload_rewards: + settable_reward_params = ["tier", "basis_function", "surveys"] + else: + settable_reward_params = ["rewards_fname", "tier", "basis_function", "surveys"] + this_param_set = pn.Param( self.param, - parameters=["rewards_fname", "tier", "basis_function", "surveys"], + parameters=settable_reward_params, default_layout=pn.Row, name="", widgets={"surveys": survey_widget}, @@ -885,17 +914,54 @@ def __init__(self, resource_uri=DEFAULT_RESOURCE_URI, **kwargs): fname_params[arg_name].objects = matching_resources +class ArchiveInputPrenight(Prenight): + """A pre-night dashboard that read data from an archive.""" + + opsim_output_fname = param.Selector( + objects={None: None}, label="OpSim output", default=None, allow_None=True + ) + + _autoload_rewards = True + + def __init__(self, resource_uri=DEFAULT_RESOURCE_URI, **kwargs): + super().__init__(**kwargs) + + matching_resources = schedview.collect.resources.find_archive_resources( + resource_uri, file_key="observations" + ) + self.param["opsim_output_fname"].objects.update(matching_resources) + + @param.depends("opsim_output_fname", watch=True) + def _update_rewards_fname(self): + # When loading a new opsim output file, automaticallly load the + # corresponding rewards. + self.rewards_fname = ResourcePath(self.opsim_output_fname).dirname().geturl() + + def prenight_app(*args, **kwargs): """Create the pre-night briefing app.""" try: + # Let the user write URL for specific data files directely into + # the dashboard. data_from_urls = kwargs["data_from_urls"] del kwargs["data_from_urls"] except KeyError: data_from_urls = False + try: + # Provide a URI for an anchive that contains the data from + # which a user may choose. + data_from_archive = kwargs["data_from_archive"] + del kwargs["data_from_archive"] + except KeyError: + data_from_archive = False + if data_from_urls: prenight = Prenight() + elif data_from_archive: + resource_uri = kwargs["resource_uri"] if "resource_uri" in kwargs else None + prenight = ArchiveInputPrenight(resource_uri=resource_uri) else: try: resource_uri = kwargs["resource_uri"] @@ -957,7 +1023,7 @@ def parse_prenight_args(): "-d", type=str, default=DEFAULT_RESOURCE_URI, - help="The base URI for data files.", + help="The base URI for the archive containing the data.", ) parser.add_argument( @@ -966,6 +1032,12 @@ def parse_prenight_args(): help="Let the user specify URLs from which to load data. THIS IS NOT SECURE.", ) + parser.add_argument( + "--data_from_archive", + action="store_true", + help="Load data from a simulation archive using archive metadata.", + ) + parser.add_argument( "--yes", action="store_true", diff --git a/schedview/collect/opsim.py b/schedview/collect/opsim.py index 43dc1cf1..93e29cff 100644 --- a/schedview/collect/opsim.py +++ b/schedview/collect/opsim.py @@ -1,8 +1,8 @@ import sqlite3 -import urllib import numpy as np import pandas as pd +import yaml from astropy.time import Time from lsst.resources import ResourcePath @@ -28,10 +28,20 @@ def read_opsim(opsim_uri, start_time="2000-01-01", end_time="2100-01-01"): end_mjd = Time(end_time).mjd original_resource_path = ResourcePath(opsim_uri) - with original_resource_path.as_local() as local_resource_path: - filename = urllib.parse.urlparse(str(local_resource_path)).path - with sqlite3.connect(filename) as sim_connection: + if original_resource_path.isdir(): + # If we were given a directory, look for a metadata file in the + # directory, and look up in it what file to load observations from. + metadata_path = original_resource_path.join("sim_metadata.yaml") + sim_metadata = yaml.safe_load(metadata_path.read().decode("utf-8")) + obs_basename = sim_metadata["files"]["observations"]["name"] + obs_path = original_resource_path.join(obs_basename) + else: + # otherwise, assume we were given the path to the observations file. + obs_path = original_resource_path + + with obs_path.as_local() as local_obs_path: + with sqlite3.connect(local_obs_path.ospath) as sim_connection: visits = pd.read_sql_query( f"SELECT * FROM observations WHERE observationStartMJD BETWEEN {start_mjd} AND {end_mjd}", sim_connection, diff --git a/schedview/collect/resources.py b/schedview/collect/resources.py index 55089f48..ec5f94f9 100644 --- a/schedview/collect/resources.py +++ b/schedview/collect/resources.py @@ -1,4 +1,5 @@ from lsst.resources import ResourcePath +from rubin_scheduler.sim_archive import read_archived_sim_metadata def find_file_resources(base_resource_uri, file_filter=None): @@ -25,3 +26,35 @@ def find_file_resources(base_resource_uri, file_filter=None): accumulated_files.append(qualified_file_name) return accumulated_files + + +def find_archive_resources(base_resource_uri, file_key=None, latest_date=None, num_nights=5): + """Find matching files in a resource. + + Parameters + ---------- + base_resource_uri : `str` + The uri of the resource to search + file_key : `str` + The file time as keyed in the archive metadata files. + latest_date : `str`, optional + The date of the latest simulation to return. + num_nights : `int`, optional + The with of the date window to search for simulations. + + Returns + ------- + files : `dict` + The dictionary of matching resources. + """ + accumulated_files = {} + sim_metadata = read_archived_sim_metadata(base_resource_uri, latest=latest_date, num_nights=num_nights) + + for sim_uri, sim_metadata in sim_metadata.items(): + try: + label = sim_metadata["label"] + accumulated_files[label] = sim_uri + except KeyError: + pass + + return accumulated_files diff --git a/schedview/collect/rewards.py b/schedview/collect/rewards.py new file mode 100644 index 00000000..d0069f90 --- /dev/null +++ b/schedview/collect/rewards.py @@ -0,0 +1,64 @@ +import numpy as np +import pandas as pd +import yaml +from astropy.time import Time +from lsst.resources import ResourcePath + + +def read_rewards(rewards_uri, start_time="2000-01-01", end_time="2100-01-01"): + """Read rewards from an rewards table recorded by the scheduler. + + Parameters + ---------- + opsim_uri : `str` + The uri from which to rewards. + start_time : `str`, `astropy.time.Time` + The start time for rewards to be loaded. + end_time : `str`, `astropy.time.Time` + The end time for rewards ot be loaded. + + Returns + ------- + rewards_df, obs_rewards : `tuple` [`pandas.DataFrame`] + The rewards and obs rewards data frames. + """ + start_mjd = Time(start_time).mjd + end_mjd = Time(end_time).mjd + + # Make sure we use a time window which includes the start and end times. + start_mjd = np.nextafter(start_mjd, start_mjd - 1) + end_mjd = np.nextafter(end_mjd, end_mjd + 1) + + original_resource_path = ResourcePath(rewards_uri) + + if original_resource_path.isdir(): + # If we were given a directory, look for a metadata file in the + # directory, and look up in it what file to load observations from. + metadata_path = original_resource_path.join("sim_metadata.yaml") + sim_metadata = yaml.safe_load(metadata_path.read().decode("utf-8")) + try: + rewards_basename = sim_metadata["files"]["rewards"]["name"] + except KeyError: + rewards_df = None + obs_rewards = None + return rewards_df, obs_rewards + + rewards_path = original_resource_path.join(rewards_basename) + else: + # otherwise, assume we were given the path to the observations file. + rewards_path = original_resource_path + + with rewards_path.as_local() as local_rewards_path: + try: + rewards_df = pd.read_hdf(local_rewards_path.ospath, key="reward_df") + rewards_df.query(f"{start_mjd} <= queue_start_mjd <= {end_mjd}", inplace=True) + except KeyError: + rewards_df = None + + try: + obs_rewards = pd.read_hdf(local_rewards_path.ospath, key="obs_rewards") + obs_rewards = obs_rewards.loc[start_mjd:end_mjd] + except KeyError: + obs_rewards = None + + return rewards_df, obs_rewards diff --git a/schedview/data/sim_metadata.yaml b/schedview/data/sim_metadata.yaml new file mode 100644 index 00000000..5b62e84f --- /dev/null +++ b/schedview/data/sim_metadata.yaml @@ -0,0 +1,5 @@ +files: + observations: + name: 'sample_opsim.db' + rewards: + name: 'sample_rewards.h5' diff --git a/tests/test_resources.py b/tests/test_resources.py index 0ed44585..79e78aa3 100644 --- a/tests/test_resources.py +++ b/tests/test_resources.py @@ -2,7 +2,12 @@ from pathlib import Path from tempfile import TemporaryDirectory +import pandas as pd +from lsst.resources import ResourcePath + +from schedview.collect.opsim import read_opsim from schedview.collect.resources import find_file_resources +from schedview.collect.rewards import read_rewards class TestResources(unittest.TestCase): @@ -23,3 +28,21 @@ def test_find_file_resources(self): found_files = find_file_resources(temp_dir) assert set(made_files) == set(found_files) + + +class TestCollectOpsim(unittest.TestCase): + def test_read_opsim(self): + resource_path = ResourcePath("resource://schedview/data/") + visits = read_opsim(resource_path) + self.assertTrue("airmass" in visits.columns) + self.assertGreater(len(visits), 0) + + +class TestCollectRewards(unittest.TestCase): + def test_read_opsim(self): + resource_path = ResourcePath("resource://schedview/data/") + rewards_df, obs_rewards = read_rewards(resource_path) + self.assertGreater(len(rewards_df), 0) + self.assertGreater(len(obs_rewards), 0) + self.assertTrue("survey_reward" in rewards_df.columns) + self.assertTrue(isinstance(obs_rewards, pd.Series))