Skip to content

Commit

Permalink
Prerun input query in append_pipes (#63)
Browse files Browse the repository at this point in the history
* Prerun input query to append_pipes and add limit option

* Add an ANALYZE after creating the temporary table

* Tweak carton/program query with specific settings
  • Loading branch information
albireox authored Dec 9, 2024
1 parent 092f162 commit 93a500f
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 15 deletions.
62 changes: 50 additions & 12 deletions python/valis/db/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@

# all resuable queries go here

from contextlib import contextmanager
import itertools
import packaging
import uuid
from typing import Sequence, Union, Generator

import astropy.units as u
Expand Down Expand Up @@ -57,17 +59,20 @@ def append_pipes(query: peewee.ModelSelect, table: str = 'stacked',
if table not in {'stacked', 'flat'}:
raise ValueError('table must be either "stacked" or "flat"')

model = vizdb.SDSSidStacked if table == 'stacked' else vizdb.SDSSidFlat
qq = query.select_extend(vizdb.SDSSidToPipes.in_boss,
vizdb.SDSSidToPipes.in_apogee,
vizdb.SDSSidToPipes.in_bvs,
vizdb.SDSSidToPipes.in_astra,
vizdb.SDSSidToPipes.has_been_observed,
vizdb.SDSSidToPipes.release,
vizdb.SDSSidToPipes.obs,
vizdb.SDSSidToPipes.mjd).\
join(vizdb.SDSSidToPipes, on=(model.sdss_id == vizdb.SDSSidToPipes.sdss_id),
attr='pipes').distinct(vizdb.SDSSidToPipes.sdss_id)
# Run initial query as a temporary table.
temp = create_temporary_table(query, indices=['sdss_id'])

qq = temp.select(temp.__star__,
vizdb.SDSSidToPipes.in_boss,
vizdb.SDSSidToPipes.in_apogee,
vizdb.SDSSidToPipes.in_bvs,
vizdb.SDSSidToPipes.in_astra,
vizdb.SDSSidToPipes.has_been_observed,
vizdb.SDSSidToPipes.release,
vizdb.SDSSidToPipes.obs,
vizdb.SDSSidToPipes.mjd).\
join(vizdb.SDSSidToPipes, on=(temp.c.sdss_id == vizdb.SDSSidToPipes.sdss_id)).\
distinct(temp.c.sdss_id)

if observed:
qq = qq.where(vizdb.SDSSidToPipes.has_been_observed == observed)
Expand Down Expand Up @@ -264,7 +269,8 @@ def carton_program_map(key: str = 'program') -> dict:

def carton_program_search(name: str,
name_type: str,
query: peewee.ModelSelect | None = None) -> peewee.ModelSelect:
query: peewee.ModelSelect | None = None,
limit: int | None = None) -> peewee.ModelSelect:
""" Perform a search on either carton or program
Parameters
Expand All @@ -276,6 +282,8 @@ def carton_program_search(name: str,
query : ModelSelect
An initial query to extend. If ``None``, a new query with all the unique
``sdss_id``s is created.
limit : int
Limit the number of results returned.
Returns
-------
Expand All @@ -286,6 +294,13 @@ def carton_program_search(name: str,
if query is None:
query = vizdb.SDSSidStacked.select(vizdb.SDSSidStacked).distinct()

# NOTE: These setting seem to help when querying some cartons or programs, mainly
# those with small number of targets, and in some cases with these the query
# actually applies the LIMIT more efficiently, but it's not a perfect solution.
vizdb.database.execute_sql('SET enable_gathermerge = off;')
vizdb.database.execute_sql('SET parallel_tuple_cost = 100;')
vizdb.database.execute_sql('SET enable_bitmapscan = off;')

query = (query.join(
vizdb.SDSSidFlat,
on=(vizdb.SDSSidFlat.sdss_id == vizdb.SDSSidStacked.sdss_id))
Expand All @@ -295,6 +310,9 @@ def carton_program_search(name: str,
.join(targetdb.Carton)
.where(getattr(targetdb.Carton, name_type) == name))

if limit:
query = query.limit(limit)

return query

def get_targets_obs(release: str, obs: str, spectrograph: str) -> peewee.ModelSelect:
Expand Down Expand Up @@ -931,3 +949,23 @@ def get_target_by_altid(id: str | int, idtype: str = None) -> peewee.ModelSelect

# get the sdss_id metadata info
return get_targets_by_sdss_id(res.sdss_id)


def create_temporary_table(query: peewee.ModelSelect,
indices: list[str] | None = None) -> Generator[None, None, peewee.Table]:
"""Create a temporary table from a query."""

table_name = uuid.uuid4().hex[0:8]

table = peewee.Table(table_name)
table.bind(vizdb.database)

query.create_table(table_name, temporary=True)

if indices:
for index in indices:
vizdb.database.execute_sql(f'CREATE INDEX ON "{table_name}" ({index})')

vizdb.database.execute_sql(f'ANALYZE "{table_name}"')

return table
19 changes: 16 additions & 3 deletions python/valis/routes/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class SearchModel(BaseModel):
program: Optional[str] = Field(None, description='The program name', example='bhm_rm')
carton: Optional[str] = Field(None, description='The carton name', example='bhm_rm_core')
observed: Optional[bool] = Field(True, description='Flag to only include targets that have been observed', example=True)
limit: Optional[int] = Field(None, description='Limit the number of returned targets', example=100)

class MainResponse(SDSSModel):
""" Combined model from all individual query models """
Expand Down Expand Up @@ -105,6 +106,13 @@ async def main_search(self, body: SearchModel):
query = carton_program_search(body.program or body.carton,
'program' if body.program else 'carton',
query=query)

# DANGER!!! This limit applies *before* the append_pipes call. If the
# append_pipes call includes observed=True we may have limited things in
# such a way that only unobserved or very few targets are returned.
if body.limit:
query = query.limit(body.limit)

# append query to pipes
if query:
query = append_pipes(query, observed=body.observed)
Expand Down Expand Up @@ -203,12 +211,17 @@ async def carton_program(self,
Query(enum=['carton', 'program'],
description='Specify search on carton or program',
example='carton')] = 'carton',
observed: Annotated[bool, Query(description='Flag to only include targets that have been observed', example=True)] = True):
observed: Annotated[bool, Query(description='Flag to only include targets that have been observed', example=True)] = True,
limit: Annotated[int | None, Query(description='Limit the number of returned targets', example=100)] = None):
""" Perform a search on carton or program """
with database.atomic():
database.execute_sql('SET LOCAL enable_seqscan=false;')
query = carton_program_search(name, name_type)
if limit is False:
# This tweak seems to do more harm than good when limit is passed.
database.execute_sql('SET LOCAL enable_seqscan=false;')

query = carton_program_search(name, name_type, limit=limit)
query = append_pipes(query, observed=observed)

return query.dicts().iterator()

@router.get('/obs', summary='Return targets with spectrum at observatory',
Expand Down

0 comments on commit 93a500f

Please sign in to comment.