Skip to content

Commit

Permalink
Merge pull request #1079 from lsst/tickets/DM-46340
Browse files Browse the repository at this point in the history
DM-46340: Fix query datasets failing when run collection specified
  • Loading branch information
dhirving authored Sep 16, 2024
2 parents da89c37 + 1582b9e commit 4a59ea5
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/changes/DM-46430.bugfix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix an issue where `query_datasets` would sometimes fail when searching in a single run collection.
16 changes: 14 additions & 2 deletions python/lsst/daf/butler/registry/datasets/byDimensions/_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,13 @@ def _finish_query_builder(
only_collection_record = collections[0]
sql_projection.joiner.where(collection_col == only_collection_record.key)
if "collection" in fields:
fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name)
fields_provided["collection"] = sqlalchemy.literal(only_collection_record.name).cast(
# This cast is necessary to ensure that Postgres knows the
# type of this column if it is used in an aggregate
# function.
sqlalchemy.String
)

elif not collections:
sql_projection.joiner.where(sqlalchemy.literal(False))
if "collection" in fields:
Expand Down Expand Up @@ -710,7 +716,13 @@ def _finish_query_builder(
# know that if we find the dataset in that collection,
# then that's the datasets's run; we don't need to
# query for it.
fields_provided["run"] = sqlalchemy.literal(only_collection_record.name)
#
fields_provided["run"] = sqlalchemy.literal(only_collection_record.name).cast(
# This cast is necessary to ensure that Postgres knows the
# type of this column if it is used in an aggregate
# function.
sqlalchemy.String
)
elif run_collections_only:
# Once again we can avoid joining to the collection table by
# adding a CASE statement.
Expand Down
40 changes: 40 additions & 0 deletions python/lsst/daf/butler/tests/butler_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -1819,6 +1819,46 @@ def test_collection_query_info(self) -> None:
assert dataset_types is not None
self.assertCountEqual(dataset_types, ["flat"])

def test_dataset_queries(self) -> None:
butler = self.make_butler("base.yaml", "spatial.yaml")

# Need a dataset with some spatial information to trigger aggregate
# value logic in queries.
butler.registry.registerDatasetType(
DatasetType("dt", ["visit", "detector"], "int", universe=butler.dimensions)
)
butler.collections.register("run1")
butler.registry.insertDatasets("dt", [{"instrument": "Cam1", "visit": 1, "detector": 1}], "run1")

# Tests for a regression of DM-46340, where invalid SQL would be
# generated when the list of collections is a single run collection and
# there is region-postprocessing logic involved. This was due to
# missing type information associated with the "run" dataset field.
result = butler.query_datasets(
"dt",
"run1",
where="instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0",
with_dimension_records=True,
)
self.assertEqual(result[0].dataId, {"instrument": "Cam1", "visit": 1, "detector": 1})
self.assertEqual(result[0].run, "run1")

# A similar issue to the "run" issue above was occuring with the
# 'collection' dataset field.
with butler.query() as query:
rows = list(
query.join_dataset_search("dt", "run1")
.where("instrument='Cam1' and skymap='SkyMap1' and visit=1 and tract=0")
.general(
dimensions=["visit", "detector"],
dataset_fields={"dt": set(["collection"])},
find_first=True,
)
)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0]["visit"], 1)
self.assertEqual(rows[0]["dt.collection"], "run1")


def _get_exposure_ids_from_dimension_records(dimension_records: Iterable[DimensionRecord]) -> list[int]:
output = []
Expand Down

0 comments on commit 4a59ea5

Please sign in to comment.