Skip to content

Commit

Permalink
Merge pull request #1080 from lsst/tickets/DM-46339
Browse files Browse the repository at this point in the history
DM-46339: fix regression in glob detection in query-datasets
  • Loading branch information
TallJimbo authored Sep 16, 2024
2 parents 4a59ea5 + b9c8216 commit 910bec6
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-46339.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix a bug in `butler query-datasets` that incorrectly rejected a find-first query against a chain collection as having a glob.
3 changes: 2 additions & 1 deletion python/lsst/daf/butler/script/queryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

from .._butler import Butler
from ..cli.utils import sortAstropyTable
from ..utils import has_globs

if TYPE_CHECKING:
from lsst.daf.butler import DatasetRef
Expand Down Expand Up @@ -261,7 +262,7 @@ def getDatasets(self) -> Iterator[list[DatasetRef]]:
summary_datasets=dataset_types,
)
expanded_query_collections = [c.name for c in query_collections_info]
if self._find_first and set(query_collections) != set(expanded_query_collections):
if self._find_first and has_globs(query_collections):
raise RuntimeError("Can not use wildcards in collections when find_first=True")
query_collections = expanded_query_collections

Expand Down
15 changes: 12 additions & 3 deletions tests/test_cliCmdQueryDatasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
import unittest

from astropy.table import Table as AstropyTable
from lsst.daf.butler import StorageClassFactory, script
from lsst.daf.butler import CollectionType, StorageClassFactory, script
from lsst.daf.butler.tests import addDatasetType
from lsst.daf.butler.tests.utils import ButlerTestHelper, MetricTestRepo, makeTestTempDir, removeTestTempDir
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -370,8 +370,13 @@ def testFindFirstAndCollections(self):
# Add a new run, and add a dataset to shadow an existing dataset.
testRepo.addDataset(run="foo", dataId={"instrument": "DummyCamComp", "visit": 424})

# Add a CHAINED collection to include the two runs, to check that
# flattening works as well.
testRepo.butler.collections.register("chain", CollectionType.CHAINED)
testRepo.butler.collections.redefine_chain("chain", ["foo", "ingest/run"])

# Verify that without find-first, duplicate datasets are returned
tables = self._queryDatasets(repo=testRepo.butler, collections=["foo", "ingest/run"], show_uri=True)
tables = self._queryDatasets(repo=testRepo.butler, collections=["chain"], show_uri=True)

# The test should be running with a single FileDatastore.
roots = testRepo.butler.get_datastore_roots()
Expand Down Expand Up @@ -514,7 +519,7 @@ def testFindFirstAndCollections(self):
# Verify that with find first the duplicate dataset is eliminated and
# the more recent dataset is returned.
tables = self._queryDatasets(
repo=testRepo.butler, collections=["foo", "ingest/run"], show_uri=True, find_first=True
repo=testRepo.butler, collections=["chain"], show_uri=True, find_first=True
)

expectedTables = (
Expand Down Expand Up @@ -614,6 +619,10 @@ def testFindFirstAndCollections(self):

self.assertAstropyTablesEqual(tables, expectedTables, filterColumns=True)

# Verify that globs are not supported with find_first=True.
with self.assertRaises(RuntimeError):
self._queryDatasets(repo=testRepo.butler, collections=["*"], show_uri=True, find_first=True)


if __name__ == "__main__":
unittest.main()

0 comments on commit 910bec6

Please sign in to comment.