Skip to content

Commit

Permalink
Merge pull request #599 from msdemlei/fix-bug-543
Browse files Browse the repository at this point in the history
Fix bug #543
  • Loading branch information
andamian authored Sep 26, 2024
2 parents c9e0d04 + 0fcf7e7 commit 4e86bf3
Show file tree
Hide file tree
Showing 5 changed files with 277 additions and 49 deletions.
8 changes: 6 additions & 2 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,16 @@ Enhancements and Fixes
- Tables returned by RegistryResource.get_tables() now have a utype
attribute [#576]

- Registry Spatial constraint now supports Astropy Quantities for the radius argument [#594]
- Registry Spatial constraint now supports Astropy Quantities for the
radius argument [#594]

- iter_metadata() no longer crashes on tables with a datalink RESOURCE
and without obscore attributes [#599]


Deprecations and Removals
-------------------------


- SodaRecordMixin no longer will use a datalink#links endpoint for soda [#580]

- Deprecating the use of "image" and "spectrum" in registry Servicetype
Expand Down
158 changes: 113 additions & 45 deletions pyvo/dal/adhoc.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _get_accessurl_from_params(params):

class AdhocServiceResultsMixin:
"""
Mixin for adhoc:service functionallity for results classes.
Mixin for adhoc:service functionality for results classes.
"""

def __init__(self, votable, *, url=None, session=None):
Expand Down Expand Up @@ -169,6 +169,113 @@ class DatalinkResultsMixin(AdhocServiceResultsMixin):
"""
Mixin for datalink functionallity for results classes.
"""
def _iter_datalinks_from_dlblock(self, datalink_service):
"""yields datalinks from the current rows using a datalink
service RESOURCE.
"""
remaining_ids = [] # remaining IDs to processed
current_batch = None # retrieved but not returned yet
current_ids = [] # retrieved but not returned
processed_ids = [] # retrived and returned IDs
batch_size = None # size of the batch

for row in self:
if not current_ids:
if batch_size is None:
# first call.
self.query = DatalinkQuery.from_resource(
[_ for _ in self],
self._datalink,
session=self._session,
original_row=row)
remaining_ids = self.query['ID']
if not remaining_ids:
# we are done
return
if batch_size:
# subsequent calls are limitted to batch size
self.query['ID'] = remaining_ids[:batch_size]
current_batch = self.query.execute(post=True)
current_ids = list(OrderedDict.fromkeys(
[_ for _ in current_batch.to_table()['ID']]))
if not current_ids:
raise DALServiceError(
'Could not retrieve datalinks for: {}'.format(
', '.join([_ for _ in remaining_ids])))
batch_size = len(current_ids)
id1 = current_ids.pop(0)
processed_ids.append(id1)
remaining_ids.remove(id1)
yield current_batch.clone_byid(
id1,
original_row=row)

@staticmethod
def _guess_access_format(row):
"""returns a guess for the format of what __guess_access_url will
return.
This tries a few heuristics based on how obscore or SIA records might
be marked up. If will return None if row does not look as if
it contained an access format. Note that the heuristics are
tried in sequence; for now, we do not define the sequence of
heuristics.
"""
if hasattr(row, "access_format"):
return row.access_format

if "access_format" in row:
return row["access_format"]

access_format = row.getbyutype("obscore:access.format"
) or row.getbyutype("ssa:Access.Format")
if access_format:
return access_format

access_format = row.getbyucd("meta.code.mime"
) or row.getbyucd("VOX:Image_Format")
if access_format:
return access_format

@staticmethod
def _guess_access_url(row):
"""returns a guess for a URI to a data product in row.
This tries a few heuristics based on how obscore or SIA records might
be marked up. If will return None if row does not look as if
it contained a product access url. Note that the heuristics are
tried in sequence; for now, we do not define the sequence of
heuristics.
"""
if hasattr(row, "access_url"):
return row.access_url

if "access_url" in row:
return row["access_url"]

access_url = row.getbyutype("obscore:access.reference"
) or row.getbyutype("ssa:Access.Reference")
if access_url:
return access_url

access_url = row.getbyucd("meta.ref.url"
) or row.getbyucd("VOX:Image_AccessReference")
if access_url:
return access_url

def _iter_datalinks_from_product_rows(self):
"""yield datalinks from self's rows if they describe datalink-valued
products.
"""
for row in self:
# TODO: we should be more careful about whitespace, case
# and perhaps more parameters in the following comparison
if self._guess_access_format(row) == DATALINK_MIME_TYPE:
access_url = self._guess_access_url(row)
if access_url is not None:
yield DatalinkResults.from_result_url(
access_url,
original_row=row)

def iter_datalinks(self):
"""
Expand All @@ -186,51 +293,12 @@ def iter_datalinks(self):
self._datalink = self.get_adhocservice_by_ivoid(DATALINK_IVOID)
except DALServiceError:
self._datalink = None
remaining_ids = [] # remaining IDs to processed
current_batch = None # retrieved but not returned yet
current_ids = [] # retrieved but not returned
processed_ids = [] # retrived and returned IDs
batch_size = None # size of the batch

for row in self:
if self._datalink:
if not current_ids:
if batch_size is None:
# first call.
self.query = DatalinkQuery.from_resource(
[_ for _ in self],
self._datalink,
session=self._session,
original_row=row)
remaining_ids = self.query['ID']
if not remaining_ids:
# we are done
return
if batch_size:
# subsequent calls are limitted to batch size
self.query['ID'] = remaining_ids[:batch_size]
current_batch = self.query.execute(post=True)
current_ids = list(OrderedDict.fromkeys(
[_ for _ in current_batch.to_table()['ID']]))
if not current_ids:
raise DALServiceError(
'Could not retrieve datalinks for: {}'.format(
', '.join([_ for _ in remaining_ids])))
batch_size = len(current_ids)
id1 = current_ids.pop(0)
processed_ids.append(id1)
remaining_ids.remove(id1)
yield current_batch.clone_byid(
id1,
original_row=row)
elif getattr(row, 'access_format', None) == DATALINK_MIME_TYPE:
yield DatalinkResults.from_result_url(row.getdataurl())
else:
# Fall back to row-specific handling
try:
yield row.getdatalink()
except AttributeError:
yield None
if self._datalink is None:
yield from self._iter_datalinks_from_product_rows()

else:
yield from self._iter_datalinks_from_dlblock(self._datalink)


class DatalinkRecordMixin:
Expand Down
3 changes: 2 additions & 1 deletion pyvo/dal/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,10 +530,11 @@ def fieldname_with_utype(self, utype):
return the field name that has a given UType value or None if the UType
is not found.
"""
utype = utype.lower()
try:
iterchain = (
self.getdesc(fieldname) for fieldname in self.fieldnames)
iterchain = (field for field in iterchain if field.utype == utype)
iterchain = (field for field in iterchain if (field.utype or "").lower() == utype)
return next(iterchain).name
except StopIteration:
return None
Expand Down
111 changes: 110 additions & 1 deletion pyvo/dal/tests/test_datalink.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

import pyvo as vo
from pyvo.dal.adhoc import DatalinkResults
from pyvo.utils import vocabularies
from pyvo.dal.sia2 import SIA2Results
from pyvo.dal.tap import TAPResults
from pyvo.utils import testing, vocabularies

from astropy.utils.data import get_pkg_data_contents, get_pkg_data_filename

Expand Down Expand Up @@ -39,6 +41,17 @@ def callback(request, context):
yield matcher


@pytest.fixture()
def datalink_product(mocker):
def callback(request, context):
return get_pkg_data_contents('data/datalink/datalink.xml')

with mocker.register_uri(
'GET', 'http://example.com/datalink.xml', content=callback
) as matcher:
yield matcher


@pytest.fixture()
def obscore_datalink(mocker):
def callback(request, context):
Expand Down Expand Up @@ -198,3 +211,99 @@ def test_all_mixed(self):
assert res[1].endswith("comb_avg.0001.fits.fz?preview=True")
assert res[2].endswith("http://dc.zah.uni-heidelberg.de/wider.dat")
assert res[3].endswith("when-will-it-be-back")


@pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02")
@pytest.mark.usefixtures('datalink_product', 'datalink_vocabulary')
class TestIterDatalinksProducts:
"""Tests for producing datalinks from tables containing links to
datalink documents.
"""
def test_no_access_format(self):
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"}],
[("http://foo.bar/baz.jpeg",)],
resultsClass=TAPResults)
assert list(res.iter_datalinks()) == []

def test_obscore_utype(self):
res = testing.create_dalresults([
{"name": "data_product", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"},
{"name": "content_type", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_sia2_record(self):
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.reference"},
{"name": "access_format", "datatype": "char", "arraysize": "*",
"utype": "obscore:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=SIA2Results)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_sia1_record(self):
res = testing.create_dalresults([
{"name": "product", "datatype": "char", "arraysize": "*",
"ucd": "VOX:Image_AccessReference"},
{"name": "mime", "datatype": "char", "arraysize": "*",
"ucd": "VOX:Image_Format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_ssap_record(self):
res = testing.create_dalresults([
{"name": "product", "datatype": "char", "arraysize": "*",
"utype": "ssa:access.reference"},
{"name": "mime", "datatype": "char", "arraysize": "*",
"utype": "ssa:access.format"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink")],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")

def test_generic_record(self):
# The meta.code.mime and meta.ref.url UCDs are perhaps too
# generic. To ensure a somewhat predictable behaviour,
# we at least make sure we pick the first of possibly multiple
# pairs (not that this would preclude arbitrary amounts of
# chaos).
res = testing.create_dalresults([
{"name": "access_url", "datatype": "char", "arraysize": "*",
"ucd": "meta.ref.url"},
{"name": "access_format", "datatype": "char", "arraysize": "*",
"utype": "meta.code.mime"},
{"name": "alt_access_url", "datatype": "char", "arraysize": "*",
"ucd": "meta.ref.url"},
{"name": "alt_access_format", "datatype": "char", "arraysize": "*",
"utype": "meta.code.mime"},],
[("http://example.com/datalink.xml",
"application/x-votable+xml;content=datalink",
"http://example.com/bad-pick.xml",
"application/x-votable+xml;content=datalink",)],
resultsClass=TAPResults)
links = list(res.iter_datalinks())
assert len(links) == 1
assert (next(links[0].bysemantics("#this"))["access_url"]
== "http://dc.zah.uni-heidelberg.de/getproduct/flashheros/data/ca90/f0011.mt")
46 changes: 46 additions & 0 deletions pyvo/utils/testing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
Miscellenaneous utilities for writing tests.
"""

from astropy.io.votable import tree
from pyvo.dal import query as dalquery


try:
TABLE_ELEMENT = tree.TableElement
except AttributeError:
TABLE_ELEMENT = tree.Table


def create_votable(field_descs, records):
"""returns a VOTableFile with a a single table containing records,
described by field_descs.
"""
votable = tree.VOTableFile()
resource = tree.Resource(type="results")
votable.resources.append(resource)
table = TABLE_ELEMENT(votable)
resource.tables.append(table)
table.fields.extend(
tree.Field(votable, **desc) for desc in field_descs)
table.create_arrays(len(records))

for index, rec in enumerate(records):
table.array[index] = rec

return votable


def create_dalresults(
field_descs,
records,
*,
resultsClass=dalquery.DALResults):
"""returns a DALResults instance for a query returning records
described by field_descs.
The arguments are as for create_votable.
"""
return resultsClass(
create_votable(field_descs, records),
url="http://testing.pyvo/test-url")

0 comments on commit 4e86bf3

Please sign in to comment.