diff --git a/python/valis/db/queries.py b/python/valis/db/queries.py index 1209a33..0590682 100644 --- a/python/valis/db/queries.py +++ b/python/valis/db/queries.py @@ -4,8 +4,10 @@ # all resuable queries go here +from contextlib import contextmanager import itertools import packaging +import uuid from typing import Sequence, Union, Generator import astropy.units as u @@ -57,17 +59,20 @@ def append_pipes(query: peewee.ModelSelect, table: str = 'stacked', if table not in {'stacked', 'flat'}: raise ValueError('table must be either "stacked" or "flat"') - model = vizdb.SDSSidStacked if table == 'stacked' else vizdb.SDSSidFlat - qq = query.select_extend(vizdb.SDSSidToPipes.in_boss, - vizdb.SDSSidToPipes.in_apogee, - vizdb.SDSSidToPipes.in_bvs, - vizdb.SDSSidToPipes.in_astra, - vizdb.SDSSidToPipes.has_been_observed, - vizdb.SDSSidToPipes.release, - vizdb.SDSSidToPipes.obs, - vizdb.SDSSidToPipes.mjd).\ - join(vizdb.SDSSidToPipes, on=(model.sdss_id == vizdb.SDSSidToPipes.sdss_id), - attr='pipes').distinct(vizdb.SDSSidToPipes.sdss_id) + # Run initial query as a temporary table. + temp = create_temporary_table(query, indices=['sdss_id']) + + qq = temp.select(temp.__star__, + vizdb.SDSSidToPipes.in_boss, + vizdb.SDSSidToPipes.in_apogee, + vizdb.SDSSidToPipes.in_bvs, + vizdb.SDSSidToPipes.in_astra, + vizdb.SDSSidToPipes.has_been_observed, + vizdb.SDSSidToPipes.release, + vizdb.SDSSidToPipes.obs, + vizdb.SDSSidToPipes.mjd).\ + join(vizdb.SDSSidToPipes, on=(temp.c.sdss_id == vizdb.SDSSidToPipes.sdss_id)).\ + distinct(temp.c.sdss_id) if observed: qq = qq.where(vizdb.SDSSidToPipes.has_been_observed == observed) @@ -264,7 +269,8 @@ def carton_program_map(key: str = 'program') -> dict: def carton_program_search(name: str, name_type: str, - query: peewee.ModelSelect | None = None) -> peewee.ModelSelect: + query: peewee.ModelSelect | None = None, + limit: int | None = None) -> peewee.ModelSelect: """ Perform a search on either carton or program Parameters @@ -276,6 +282,8 @@ def carton_program_search(name: str, query : ModelSelect An initial query to extend. If ``None``, a new query with all the unique ``sdss_id``s is created. + limit : int + Limit the number of results returned. Returns ------- @@ -286,6 +294,13 @@ def carton_program_search(name: str, if query is None: query = vizdb.SDSSidStacked.select(vizdb.SDSSidStacked).distinct() + # NOTE: These setting seem to help when querying some cartons or programs, mainly + # those with small number of targets, and in some cases with these the query + # actually applies the LIMIT more efficiently, but it's not a perfect solution. + vizdb.database.execute_sql('SET enable_gathermerge = off;') + vizdb.database.execute_sql('SET parallel_tuple_cost = 100;') + vizdb.database.execute_sql('SET enable_bitmapscan = off;') + query = (query.join( vizdb.SDSSidFlat, on=(vizdb.SDSSidFlat.sdss_id == vizdb.SDSSidStacked.sdss_id)) @@ -295,6 +310,9 @@ def carton_program_search(name: str, .join(targetdb.Carton) .where(getattr(targetdb.Carton, name_type) == name)) + if limit: + query = query.limit(limit) + return query def get_targets_obs(release: str, obs: str, spectrograph: str) -> peewee.ModelSelect: @@ -931,3 +949,23 @@ def get_target_by_altid(id: str | int, idtype: str = None) -> peewee.ModelSelect # get the sdss_id metadata info return get_targets_by_sdss_id(res.sdss_id) + + +def create_temporary_table(query: peewee.ModelSelect, + indices: list[str] | None = None) -> Generator[None, None, peewee.Table]: + """Create a temporary table from a query.""" + + table_name = uuid.uuid4().hex[0:8] + + table = peewee.Table(table_name) + table.bind(vizdb.database) + + query.create_table(table_name, temporary=True) + + if indices: + for index in indices: + vizdb.database.execute_sql(f'CREATE INDEX ON "{table_name}" ({index})') + + vizdb.database.execute_sql(f'ANALYZE "{table_name}"') + + return table diff --git a/python/valis/routes/query.py b/python/valis/routes/query.py index 5a7d25c..866430c 100644 --- a/python/valis/routes/query.py +++ b/python/valis/routes/query.py @@ -41,6 +41,7 @@ class SearchModel(BaseModel): program: Optional[str] = Field(None, description='The program name', example='bhm_rm') carton: Optional[str] = Field(None, description='The carton name', example='bhm_rm_core') observed: Optional[bool] = Field(True, description='Flag to only include targets that have been observed', example=True) + limit: Optional[int] = Field(None, description='Limit the number of returned targets', example=100) class MainResponse(SDSSModel): """ Combined model from all individual query models """ @@ -105,6 +106,13 @@ async def main_search(self, body: SearchModel): query = carton_program_search(body.program or body.carton, 'program' if body.program else 'carton', query=query) + + # DANGER!!! This limit applies *before* the append_pipes call. If the + # append_pipes call includes observed=True we may have limited things in + # such a way that only unobserved or very few targets are returned. + if body.limit: + query = query.limit(body.limit) + # append query to pipes if query: query = append_pipes(query, observed=body.observed) @@ -203,12 +211,17 @@ async def carton_program(self, Query(enum=['carton', 'program'], description='Specify search on carton or program', example='carton')] = 'carton', - observed: Annotated[bool, Query(description='Flag to only include targets that have been observed', example=True)] = True): + observed: Annotated[bool, Query(description='Flag to only include targets that have been observed', example=True)] = True, + limit: Annotated[int | None, Query(description='Limit the number of returned targets', example=100)] = None): """ Perform a search on carton or program """ with database.atomic(): - database.execute_sql('SET LOCAL enable_seqscan=false;') - query = carton_program_search(name, name_type) + if limit is False: + # This tweak seems to do more harm than good when limit is passed. + database.execute_sql('SET LOCAL enable_seqscan=false;') + + query = carton_program_search(name, name_type, limit=limit) query = append_pipes(query, observed=observed) + return query.dicts().iterator() @router.get('/obs', summary='Return targets with spectrum at observatory',