diff --git a/bindings/python/dlite-collection-python.i b/bindings/python/dlite-collection-python.i index de53c2309..2a28c9e55 100644 --- a/bindings/python/dlite-collection-python.i +++ b/bindings/python/dlite-collection-python.i @@ -64,6 +64,7 @@ class Collection(Instance): Relations are (s, p, o, d=None)-triples with an optional fourth field `d`, specifying the datatype of the object. The datatype may have the following values: + - None: object is an IRI. - Starts with '@': object is a language-tagged plain literal. The language identifier follows the '@'-sign. @@ -85,7 +86,7 @@ class Collection(Instance): Arguments: src: Storage instance | url | driver - location: str + location: File path to load from when `src` is a driver. options: str Options passed to the storage plugin when `src` is a driver. diff --git a/bindings/python/dlite-entity-python.i b/bindings/python/dlite-entity-python.i index 7a572c2ef..d8465c6a0 100644 --- a/bindings/python/dlite-entity-python.i +++ b/bindings/python/dlite-entity-python.i @@ -404,6 +404,7 @@ def get_instance( # Allow metaid to be an Instance if isinstance(metaid, Instance): metaid = metaid.uri + errclr() # Clear internal error before calling Instance() inst = Instance( metaid=metaid, dims=dimensions, id=id, dimensions=(), properties=() # arrays must not be None @@ -459,7 +460,7 @@ def get_instance( protocol+driver://location?options#id protocol://location?driver=;options#id - where `protocol`, `driver`, `location`, `options` and `id are + where `protocol`, `driver`, `location`, `options` and `id` are documented in the load() method. If `metaid` is provided, the instance is tried mapped to this @@ -488,6 +489,7 @@ def get_instance( metaid=metaid ) else: + errclr() # Clear internal error before calling Instance() inst = Instance( url=url, metaid=metaid, dims=(), dimensions=(), properties=() # arrays @@ -502,6 +504,7 @@ def get_instance( If `metaid` is provided, the instance is tried mapped to this metadata before it is returned. """ + errclr() # Clear internal error before calling Instance() inst = Instance( storage=storage, id=id, metaid=metaid, dims=(), dimensions=(), properties=() # arrays @@ -519,6 +522,7 @@ def get_instance( from dlite.options import make_query if options and not isinstance(options, str): options = make_query(options) + errclr() # Clear internal error before calling Instance() inst = Instance( driver=driver, location=str(location), options=options, id=id, metaid=metaid, @@ -529,6 +533,7 @@ def get_instance( @classmethod def from_json(cls, jsoninput, id=None, metaid=None): """Load the instance from json input.""" + errclr() # Clear internal error before calling Instance() inst = Instance( jsoninput=jsoninput, id=id, metaid=metaid, dims=(), dimensions=(), properties=() # arrays @@ -538,6 +543,7 @@ def get_instance( @classmethod def from_bson(cls, bsoninput): """Load the instance from bson input.""" + errclr() # Clear internal error before calling Instance() inst = Instance( bsoninput=bsoninput, dims=(), dimensions=(), properties=() # arrays @@ -593,6 +599,7 @@ def get_instance( """Create a new metadata entity (instance of entity schema) casted to an instance. """ + errclr() # Clear internal error before calling Instance() inst = Instance( uri=uri, dimensions=dimensions, properties=properties, description=description, @@ -614,6 +621,7 @@ def get_instance( meta = get_instance(metaid) dimensions = [dimensions[dim.name] for dim in meta.properties['dimensions']] + errclr() # Clear internal error before calling Instance() inst = Instance( metaid=metaid, dims=dimensions, id=id, dimensions=(), properties=() # arrays must not be None @@ -630,10 +638,12 @@ def get_instance( warnings.warn( "create_from_url() is deprecated, use from_url() instead.", DeprecationWarning, stacklevel=2) - return Instance( + errclr() # Clear internal error before calling Instance() + inst = Instance( url=url, metaid=metaid, dims=(), dimensions=(), properties=() # arrays ) + return instance_cast(inst) @classmethod def create_from_storage(cls, storage, id=None, metaid=None): @@ -646,6 +656,7 @@ def get_instance( warnings.warn( "create_from_storage() is deprecated, use from_storage() instead.", DeprecationWarning, stacklevel=2) + errclr() # Clear internal error before calling Instance() inst = Instance( storage=storage, id=id, metaid=metaid, dims=(), dimensions=(), properties=() # arrays @@ -664,12 +675,30 @@ def get_instance( from dlite.options import make_query if options and not isinstance(options, str): options = make_query(options) + errclr() # Clear internal error before calling Instance() inst = Instance( driver=driver, location=str(location), options=options, id=id, dims=(), dimensions=(), properties=() # arrays ) return instance_cast(inst) + @classmethod + def get_uuids(cls, driver, location, options=None, pattern=None): + """Returns a iterator over matching UUIDs in storage. + + Arguments: + driver: Name of storage plugin for data parsing. + location: Location of resource. Typically a URL or file path. + options: Options passed to the protocol and driver plugins. + pattern: A glob pattern matching metadata UUIDs. If given, + only matching UUIDs will be returned. + + Return: + Iterator over all matching UUIDs in storage. + """ + with Storage(driver, location, options=options) as s: + return s.get_uuids(pattern=pattern) + def save(self, *dest, location=None, options=None): """Saves this instance to url or storage. diff --git a/bindings/python/dlite-misc-python.i b/bindings/python/dlite-misc-python.i index 36b2e2508..544989b0c 100644 --- a/bindings/python/dlite-misc-python.i +++ b/bindings/python/dlite-misc-python.i @@ -28,9 +28,9 @@ class errctl(): shown/hidden. filename: Filename to redirect errors to. The following values are handled specially: - - "None" or empty: No output is written. - - "": Write errors to stderr (default). - - "": Write errors to stdout. + - "None" or empty: No output is written. + - "": Write errors to stderr (default). + - "": Write errors to stdout. """ def __init__(self, hide=(), show=(), filename=""): diff --git a/bindings/python/quantity.py b/bindings/python/quantity.py index 8ce31cd1c..fc9a4dc73 100644 --- a/bindings/python/quantity.py +++ b/bindings/python/quantity.py @@ -1,15 +1,13 @@ - -""" Define the singleton QuantityHelper to work with pint Quantity and dlite - instance properties. +"""Define the singleton QuantityHelper to work with pint Quantity and dlite +instance properties. """ from typing import Any import numpy as np -HAS_PINT = True -try: - import pint -except Exception: - HAS_PINT = False +from dlite.testutils import importcheck + +pint = importcheck("pint") + DLITE_QUANTITY_TYPES = [ 'int', 'float', 'double', 'uint', @@ -21,8 +19,16 @@ class QuantityHelper: + """Singleton class for working with pint Quantity and dlite instance + properties. + """ def __init__(self): + if not pint: + raise RuntimeError( + 'you must install "pint" to work with quantities, ' + 'try: pip install pint' + ) self.__dict__['_instance'] = None self.__dict__['_registry'] = None @@ -46,7 +52,7 @@ def _get_unit_as_string(self, unit: Any) -> str: unit_string = str(unit) return unit_string.replace('%', 'percent') - def __getitem__(self, name: str) -> pint.Quantity: + def __getitem__(self, name: str) -> "pint.Quantity": p = self._get_property(name) u = self._get_unit_as_string(p.unit) return self.quantity(self._instance[name], u) @@ -105,15 +111,15 @@ def get(self, *names): return None @property - def unit_registry(self) -> pint.UnitRegistry: + def unit_registry(self) -> "pint.UnitRegistry": """ Returns the current pint UnitRegistry object """ return self._registry.get() - def quantity(self, magnitude, unit) -> pint.Quantity: + def quantity(self, magnitude, unit) -> "pint.Quantity": """ Return a pint.Quantity object """ return self._registry.Quantity(magnitude, unit) - def parse(self, value: Any) -> pint.Quantity: + def parse(self, value: Any) -> "pint.Quantity": """ Parse the given value and return a pint.Quantity object """ if isinstance(value, (pint.Quantity, self._registry.Quantity)): return value @@ -129,8 +135,8 @@ def parse(self, value: Any) -> pint.Quantity: else: return self.quantity(value, '') - def parse_expression(self, value: str) -> pint.Quantity: - """ Parse an expression (str) and return a pint.Quantity object """ + def parse_expression(self, value: str) -> "pint.Quantity": + """Parse an expression (str) and return a pint.Quantity object """ result = None if value: value_str = self._get_unit_as_string(value) @@ -171,12 +177,6 @@ def to_dict(self, names=None, value_type='quantity', fmt=''): def get_quantity_helper(instance): global quantity_helper - if HAS_PINT: - if quantity_helper is None: - quantity_helper = QuantityHelper() - return quantity_helper(instance) - else: - raise RuntimeError( - 'you must install "pint" to work with quantities, ' - 'try: pip install pint' - ) + if quantity_helper is None: + quantity_helper = QuantityHelper() + return quantity_helper(instance) diff --git a/bindings/python/scripts/CMakeLists.txt b/bindings/python/scripts/CMakeLists.txt index bb560f977..95dc9d7ea 100644 --- a/bindings/python/scripts/CMakeLists.txt +++ b/bindings/python/scripts/CMakeLists.txt @@ -79,3 +79,13 @@ test_success( dlite-validate-empty dlite-validate Empty.json ) +# Test valid yaml +test_success( + dlite-validate-valid1-yaml + dlite-validate ../tests/entities/ValidYaml1.yaml +) +# Test invalid yaml +test_failure( + dlite-validate-invalid1-yaml + dlite-validate ../tests/entities/InvalidYaml1.yaml +) diff --git a/bindings/python/scripts/dlite-validate b/bindings/python/scripts/dlite-validate index 0cc8f763a..5b0819df9 100644 --- a/bindings/python/scripts/dlite-validate +++ b/bindings/python/scripts/dlite-validate @@ -11,7 +11,7 @@ DLITE_BEHAVIOR = True # Turn off warnings about behavior changes import dlite -def parse(url, driver=None, options=None, id=None): +def parse(url, driver=None, options="mode=r", id=None): """Loads an instance from storage. Arguments: diff --git a/bindings/python/tests/entities/InvalidYaml1.yaml b/bindings/python/tests/entities/InvalidYaml1.yaml new file mode 100644 index 000000000..875a15bf2 --- /dev/null +++ b/bindings/python/tests/entities/InvalidYaml1.yaml @@ -0,0 +1,14 @@ +# An invalid version of datamodel Valid1.yaml +uri: http://onto-ns.com/meta/0.1/InvalidYaml1 +description: An datamodel for an item with float type and invalid shape name. +dimensions: + nf: Number of eigen-frequencies. +properties: + name: + type: str + description: Name of the item. + f: + type: float64 + shape: [nf_MISPELLED] + unit: Hz + description: The magic eigen-frequencies of the item. diff --git a/bindings/python/tests/entities/ValidYaml1.yaml b/bindings/python/tests/entities/ValidYaml1.yaml new file mode 100644 index 000000000..382cd6efa --- /dev/null +++ b/bindings/python/tests/entities/ValidYaml1.yaml @@ -0,0 +1,14 @@ +# An valid version of datamodel Invalid1.yaml +uri: http://onto-ns.com/meta/0.1/ValidYaml1 +description: An datamodel for an item with float type and invalid shape name. +dimensions: + nf: Number of eigen-frequencies. +properties: + name: + type: str + description: Name of the item. + f: + type: float64 + shape: [nf] + unit: Hz + description: The magic eigen-frequencies of the item. diff --git a/bindings/python/tests/input/test_ref_type_middle.yaml b/bindings/python/tests/input/test_ref_type_middle.yaml new file mode 100644 index 000000000..beaa596c5 --- /dev/null +++ b/bindings/python/tests/input/test_ref_type_middle.yaml @@ -0,0 +1,11 @@ +http://onto-ns.com/meta/0.2/Middle: + description: Middle-level nested data structure. + dimensions: [] + properties: + - name: name + type: string + description: Value of this structure. + - name: leaf + type: ref + $ref: http://onto-ns.com/meta/0.1/Leaf + description: Reference to low-level structure. diff --git a/bindings/python/tests/test_entity.py b/bindings/python/tests/test_entity.py index 297b0e336..fe3ad2a6d 100644 --- a/bindings/python/tests/test_entity.py +++ b/bindings/python/tests/test_entity.py @@ -343,10 +343,37 @@ assert item.q.f.m.tolist() == [0., 1., 0.001] assert item.q.f.to("1/hour").m.tolist() == [0, 3600, 3.6] - + # For issue #750 - test instance_cast() with raises(dlite.DLiteTypeError): dlite.instance_cast(inst, dlite.Metadata) castinst = dlite.instance_cast(inst.meta, dlite.Instance) assert type(castinst) == dlite.Instance assert type(dlite.instance_cast(castinst)) == dlite.Metadata + + +# Test storage query +uuids = { + '850637b9-1d21-573c-91b6-477530e4bf58', + '020e411b-f349-5689-8657-f82b709369c3', + '570611f5-96b3-5b0d-90ad-f3a4c19a78b2', + '5e378ac7-83c9-5d77-ab20-b5bb32c695da', + 'e5efe084-27f2-5fec-9b1c-fa1a692e1434', +} +with dlite.Storage("json", indir / "test_ref_type.json") as s: + assert set(s.get_uuids()) == uuids + assert set(s.get_uuids("http://onto-ns.com/meta/0.3/EntitySchema")) == uuids + assert s.get_uuids("xxx") == [] +assert set( + dlite.Instance.get_uuids("json", indir / "test_ref_type.json") +) == uuids +assert set( + dlite.Instance.get_uuids( + "json", indir / "test_ref_type.json", + pattern="http://onto-ns.com/meta/0.3/EntitySchema", + ) +) == uuids +assert dlite.Instance.get_uuids( + "json", indir / "test_ref_type.json", + pattern="xxx", +) == [] diff --git a/bindings/python/tests/test_ref_type.py b/bindings/python/tests/test_ref_type.py index 5d7e62c5b..763198bf3 100644 --- a/bindings/python/tests/test_ref_type.py +++ b/bindings/python/tests/test_ref_type.py @@ -1,15 +1,24 @@ from pathlib import Path import dlite +from dlite.testutils import importcheck + +yaml = importcheck("yaml") thisdir = Path(__file__).resolve().parent indir = thisdir / "input" -dlite.storage_path.append(indir / "test_ref_type.json") +dlite.storage_path.append(indir) +dlite.storage_path.append(indir / "test_ref_type_middle.yaml") + +# If yaml is available, we read Middle v0.2, which is defined in +# `test_ref_type_middle.yaml`. Otherwise, we read Middle v0.1, which +# is defined together with the other datamodels in `test_ref_type.json`. +version = "0.2" if yaml else "0.1" Top = dlite.get_instance("http://onto-ns.com/meta/0.1/Top") -Middle = dlite.get_instance("http://onto-ns.com/meta/0.1/Middle") +Middle = dlite.get_instance(f"http://onto-ns.com/meta/{version}/Middle") Leaf = dlite.get_instance("http://onto-ns.com/meta/0.1/Leaf") Linked = dlite.get_instance("http://onto-ns.com/meta/0.1/Linked") Tree = dlite.get_instance("http://onto-ns.com/meta/0.1/Tree") @@ -78,6 +87,8 @@ assert cyclic.subtree[0].subtree[0] == cyclic assert cyclic.subtree[0].subtree[0].subtree[0] == cyclic -# Instantiate nested from dict -# For issue #515 -# middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}}) +# For isue #982: ref-type in yaml +assert Middle.getprop("leaf").ref == "http://onto-ns.com/meta/0.1/Leaf" + +# For issue #515: Instantiate nested from dict +#middle = Middle(properties={"name": "nested", "leaf": {"a": 1, "b": True}}) diff --git a/bindings/python/tests/test_storage.py b/bindings/python/tests/test_storage.py index a986b0bac..681bb02fd 100755 --- a/bindings/python/tests/test_storage.py +++ b/bindings/python/tests/test_storage.py @@ -127,9 +127,9 @@ location: Path to YAML file. options: Supported options: - `mode`: Mode for opening. Valid values are: - - `a`: Append to existing file or create new file (default). - - `r`: Open existing file for read-only. - - `w`: Truncate existing file or create new file. + - `a`: Open for writing, add to existing `location` (default). + - `r`: Open existing `location` for reading. + - `w`: Open for writing. If `location` exists, it is truncated. - `soft7`: Whether to save using SOFT7 format. - `single`: Whether the input is assumed to be in single-entity form. If "auto" (default) the form will be inferred automatically. @@ -244,6 +244,7 @@ # Test plugin that only defines to_bytes() and from_bytes() -#print("===================================") -#dlite.Storage.plugin_help("testbuff") -#buf = inst.to_bytes("bufftest") +txt = dlite.Storage.plugin_help("bufftest") +assert txt == "Test plugin that represents instances as byte-encoded json." +buf = inst.to_bytes("bufftest") +assert buf == str(inst).encode() diff --git a/bindings/python/triplestore/README.md b/bindings/python/triplestore/README.md deleted file mode 100644 index 53d3baf25..000000000 --- a/bindings/python/triplestore/README.md +++ /dev/null @@ -1,148 +0,0 @@ -Triplestore -=========== -> A Python package encapsulating different triplestores using the strategy -> design pattern. - -This package has by itself no dependencies outside the standard library, -but the triplestore backends may have. - -The main class is Triplestore, who's `__init__()` method takes the name of the -backend to encapsulate as first argument. Its interface is strongly inspired -by rdflib.Graph, but simplified when possible to make it easy to use. Some -important differences: -- all IRIs are represented by Python strings -- blank nodes are strings starting with "_:" -- literals are constructed with `Literal()` - -```python -from triplestore import Triplestore -ts = Triplestore(backend="rdflib") -``` - -The module already provides a set of pre-defined namespaces that simplifies -writing IRIs. For example: - -```python -from triplestore import RDFS, OWL -RDFS.subClassOf -# -> 'http://www.w3.org/2000/01/rdf-schema#subClassOf' -``` - -New namespaces can be created using the Namespace class, but are usually -added with the `bind()` method: - -```python -ONTO = ts.bind("onto", "http://example.com/onto#") -ONTO.MyConcept -# -> 'http://example.com/onto#MyConcept' -``` - -Namespace also support access by label and IRI checking. Both of these features -requires loading an ontology. The following example shows how to create an EMMO -namespace with IRI checking. The keyword argument `label_annotations=True` enables -access by `skos:prefLabel`, `rdfs:label` or `skos:altLabel`. The `check=True` -enables checking for existing IRIs. The `triplestore_url=...` is a resolvable URL -that can be read by the 'rdflib' backend. It is needed, because the 'rdflib' -backend is currently not able to load EMMO from the "http://emmo.info/emmo#" -namespace. - -```python -EMMO = ts.bind( - "emmo", "http://emmo.info/emmo#", - label_annotations=True, - check=True, - triplestore_url="https://emmo-repo.github.io/versions/1.0.0-beta4/emmo-inferred.ttl", -) -EMMO.Atom -# -> 'http://emmo.info/emmo#EMMO_eb77076b_a104_42ac_a065_798b2d2809ad' -EMMO.invalid_name -# -> NoSuchIRIError: http://emmo.info/emmo#invalid_name -``` - -New triples can be added either with the `parse()` method (for -backends that support it) or the `add()` and `add_triples()` methods: - -```python -# en(msg) is a convenient function for adding english literals. -# It is equivalent to ``triplestore.Literal(msg, lang="en")``. -from triplestore import en -ts.parse("onto.ttl", format="turtle") -ts.add_triples([ - (ONTO.MyConcept, RDFS.subClassOf, OWL.Thing), - (ONTO.MyConcept, RDFS.label, en("My briliant ontological concept.")), -]) -``` - -For backends that support it the triplestore can be serialised using -`serialize()`: - -```python -ts.serialize("onto2.ttl") -``` - -A set of convenient functions exists for simple queries, including -`triples()`, `subjects()`, `predicates()`, `objects()`, `subject_predicates()`, -`subject_objects()`, `predicate_objects()` and `value()`. Except for `value()`, -they return the result as generators. For example: - -```python -ts.objects(subject=ONTO.MyConcept, predicate=RDFS.subClassOf) -# -> -list(ts.objects(subject=ONTO.MyConcept, predicate=RDFS.subClassOf)) -# -> ['http://www.w3.org/2002/07/owl#Thing'] -``` - -The `query()` and `update()` methods can be used to query and update the -triplestore using SPARQL. - -Finally Triplestore has two specialised methods `add_mapsTo()` and -`add_function()` that simplify working with mappings. `add_mapsTo()` is -convinient for defining new mappings: - -```python -from triplestore import Namespace -META = Namespace("http://onto-ns.com/meta/0.1/MyEntity#") -ts.add_mapsTo(ONTO.MyConcept, META.my_property) -``` - -It can also be used with DLite and SOFT7 data models. Here we repeat -the above with DLite: - -```python -import dlite -meta = dlite.get_instance("http://onto-ns.com/meta/0.1/MyEntity") -ts.add_mapsTo(ONTO.MyConcept, meta, "my_property") -``` - -The `add_function()` describes a function and adds mappings for its -arguments and return value(s). Currently it only supports the [Function -Ontology (FnO)](https://fno.io/). - -```python -def mean(x, y): - """Returns the mean value of `x` and `y`.""" - return (x + y)/2 - -ts.add_function( - mean, - expects=(ONTO.RightArmLength, ONTO.LeftArmLength), - returns=ONTO.AverageArmLength, -) -``` - - -Further development -------------------- -* Update the `query()` method to return the SPARQL result in a backend- - independent way. -* Add additional backends. Candidates include: - - list of tuples - - owlready2/EMMOntoPy - - OntoRec/OntoFlowKB - - Stardog - - DLite triplestore (based on Redland librdf) - - Redland librdf - - Apache Jena Fuseki - - Allegrograph - - Wikidata -* Add ontological validation of physical dimension to Triplestore.mapsTo(). diff --git a/bindings/python/triplestore/__init__.py b/bindings/python/triplestore/__init__.py deleted file mode 100644 index cb99656fe..000000000 --- a/bindings/python/triplestore/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -"""A package encapsulating different triplestores using the strategy design -pattern. - -See the README.md file for a description for how to use this package. -""" -import warnings - -warnings.warn( - "dlite.triplestore is deprecated.\n" - "Use tripper (https://github.com/EMMC-ASBL/tripper) instead.", - DeprecationWarning, - stacklevel=2, -) diff --git a/bindings/python/triplestore/test_units.py b/bindings/python/triplestore/test_units.py deleted file mode 100644 index 59857446f..000000000 --- a/bindings/python/triplestore/test_units.py +++ /dev/null @@ -1,31 +0,0 @@ -from units import get_pint_registry - - -ureg = get_pint_registry(force_recreate=True) - -# Test the registry. -test_quantity1 = 1234 * ureg.M -print(str(test_quantity1.to_base_units())) -assert str(test_quantity1) == "1234 m" - -test_quantity2 = 2345.6 * ureg.Watt_per_Kelvin -print(str(test_quantity2)) -assert str(test_quantity2) == "2345.6 w/K" - -test_quantity3 = test_quantity1 * test_quantity2 -print("".join([str(test_quantity3), " = ", - str(test_quantity3.to_base_units()), - " = ", - "{:~}".format(test_quantity3.to_base_units())])) - -test_quantity4 = 16 * ureg.S -print(f'{str(test_quantity4)} = {test_quantity4.to_base_units()}') -assert str(test_quantity4) == "16 S" - -test_quantity5 = 25 * ureg.s -print(f'{str(test_quantity5)} = {test_quantity5.to_base_units()}') -assert str(test_quantity5) == "25 s" - -test_quantity6 = 36 * ureg.exaAmpere -print(f'{str(test_quantity6)} = {test_quantity6.to_base_units()}') -assert str(test_quantity6) == "36 EA" \ No newline at end of file diff --git a/bindings/python/triplestore/units.py b/bindings/python/triplestore/units.py deleted file mode 100644 index dc8d97af5..000000000 --- a/bindings/python/triplestore/units.py +++ /dev/null @@ -1,364 +0,0 @@ -"""Populates a pint unit registry from an ontology. -""" -import os -import re -import logging -from pint import UnitRegistry -from tripper import Triplestore, RDFS -from appdirs import user_cache_dir - - -def load_qudt(): - """Returns a Triplestore instance with QUDT pre-loaded.""" - ts = Triplestore(backend="rdflib") - ts.parse(source="http://qudt.org/2.1/vocab/unit") - ts.parse(source="http://qudt.org/2.1/schema/qudt") - return ts - - -def parse_qudt_dimension_vector(dimension_vector: str) -> dict: - """Split the dimension vector string into separate dimensions.""" - dimensions = re.findall(r'[AELIMHTD]-?[0-9]+', dimension_vector) - - result = {} - for dimension in dimensions: - result[dimension[0]] = dimension[1:] - - for letter in "AELIMHTD": - if letter not in result.keys(): - raise Exception( - f'Missing dimension "{letter}" in dimension vector ' - f'"{dimension_vector}"' - ) - return result - - -def pint_definition_string(dimension_dict: dict) -> str: - # Base units defined by: - # https://qudt.org/schema/qudt/QuantityKindDimensionVector - # https://qudt.org/vocab/sou/SI - base_units = { - "A": "mol", - "E": "A", - "L": "m", - "I": "cd", - "M": "kg", - "H": "K", - "T": "s", - "D": "1", - } - - # Build the unit definition, dimension by dimension. - result = [] - for letter, unit in base_units.items(): - exponent = dimension_dict[letter] - if int(dimension_dict[letter]) < 0: - result.append(f"/ {unit}**{exponent[1:]} ") - elif int(dimension_dict[letter]) > 0: - result.append(f"* {unit}**{exponent} ") - return "".join(result) - - -def prepare_cache_file_path(filename: str) -> str: - """Return cache file name.""" - cache_directory = user_cache_dir("dlite") - if not os.path.exists(cache_directory): - os.makedirs(cache_directory) - return os.path.join(cache_directory, filename) - - -def get_pint_registry(sources=('qudt', ), force_recreate=False) -> UnitRegistry: - """Load units from one or more unit sources into a Pint unit registry. - - Arguments: - sources: Sequence of unit sources to load. The sources are loaded - in the provided order. In case of conflicts, the source listed - first has precedence. - force_recreate: Whether to recreate the unit registry cache. - - Returns: - Pint unit registry. - """ - registry_file_path = prepare_cache_file_path("pint_unit_registry.txt") - if force_recreate or not os.path.exists(registry_file_path): - with open(registry_file_path, "w", encoding="utf8") as f: - f.write("\n".join(pint_prefix_lines()) + "\n") - for source in sources: - pint_registry_lines = pint_registry_lines_from_qudt() - with open(registry_file_path, "a", encoding="utf8") as f: - f.write("\n".join(pint_registry_lines) + "\n") - - ureg = UnitRegistry(registry_file_path) - #ureg.default_format = "~P" #symbols, pretty print - ureg.default_format = "~" #symbols, standard print (preferred) - #ureg.default_format = "~C" #symbols, compact print - return ureg - - -def pint_prefix_lines(): - # Decimal prefixes from pint's default_en.txt registry. - prefixes = ['quecto- = 1e-30 = q-', - 'ronto- = 1e-27 = r-', - 'yocto- = 1e-24 = y-', - 'zepto- = 1e-21 = z-', - 'atto- = 1e-18 = a-', - 'femto- = 1e-15 = f-', - 'pico- = 1e-12 = p-', - 'nano- = 1e-9 = n-', - 'micro- = 1e-6 = µ- = μ- = u-', - 'milli- = 1e-3 = m-', - 'centi- = 1e-2 = c-', - 'deci- = 1e-1 = d-', - 'deca- = 1e+1 = da- = deka-', - 'hecto- = 1e2 = h-', - 'kilo- = 1e3 = k-', - 'mega- = 1e6 = M-', - 'giga- = 1e9 = G-', - 'tera- = 1e12 = T-', - 'peta- = 1e15 = P-', - 'exa- = 1e18 = E-', - 'zetta- = 1e21 = Z-', - 'yotta- = 1e24 = Y-', - 'ronna- = 1e27 = R-', - 'quetta- = 1e30 = Q-', - ] - return prefixes - - -def prefix_names(): - lines = pint_prefix_lines() - for i in range(len(lines)): - lines[i] = lines[i].split(" ")[0].replace("-", "") - return lines - - -def pint_registry_lines_from_qudt(): - ts = load_qudt() - - QUDTU = ts.bind("unit", "http://qudt.org/vocab/unit/", check=True) - QUDT = ts.bind("unit", "http://qudt.org/schema/qudt/", check=True) - DCTERMS = ts.bind("dcterms", "http://purl.org/dc/terms/") - - pint_registry_lines = [] - pint_definitions = {} - identifiers = PintIdentifiers() - - # Explicit definition of which QUDT units that will serve as base units for - # the pint unit registry. (i.e. the QUDT names for the SI units and the - # name of their physical dimension) - #base_unit_dimensions ={ - # "M": "length", - # "SEC": "time", - # "A": "current", - # "CD": "luminosity", - # "KiloGM": "mass", - # "MOL": "substance", - # "K": "temperature", - #} - - # Base units defined by rdfs:label (instead of QUDT name): - base_unit_dimensions ={ - "Meter": "length", - "Second": "time", - "Ampere": "current", - "Candela": "luminosity", - "Kilogram": "mass", - "Mole": "substance", - "Kelvin": "temperature", - } - - # Read info from all units. - for s, p, o in ts.triples([None, QUDT.hasDimensionVector, None]): - - # Check if this unit has been replaced; then skip it. - replaced_by = next( - ts.objects(subject=s, predicate=DCTERMS.isReplacedBy), None) - if replaced_by is not None: - continue - - unit = s.split("/")[-1] - unit_name = unit.replace("-", "_") - - omit_prefixed_unit = False - for prefix in prefix_names(): - if unit_name.startswith('KiloGM'): - pass - elif unit_name.lower().startswith(prefix): - omit_prefixed_unit = True - break - - if omit_prefixed_unit: - continue - - # Extract and parse the dimension vector. - dimension_vector = o.split("/")[-1] - pint_definition = pint_definition_string(parse_qudt_dimension_vector( - dimension_vector)) - - # Extract multiplier and offset. - multiplier = next( - ts.objects(subject=s, predicate=QUDT.conversionMultiplier), "1") - offset = next(ts.objects(subject=s, predicate=QUDT.conversionOffset), - None) - - pint_definitions[s] = { - "unit_in_SI": pint_definition, - "multiplier": multiplier, - "offset": offset, - } - - # Extract identifiers. - pint_name_is_set = False - prio_downgrade = 2 - for label in ts.objects(subject=s, predicate=RDFS.label): - label = label.replace(" ", "_") - label = label.replace("-", "_") - if pint_name_is_set: - identifiers.add_identifier( - URI=s, label_name="label", prio=5, identifier=label) - else: - if label in base_unit_dimensions.keys(): - prio_downgrade = 0 - identifiers.add_identifier( - URI=s, label_name="unit_name", prio=1+prio_downgrade, identifier=label) - pint_name_is_set = True - - identifiers.add_identifier( - URI=s, label_name="label", prio=6, identifier=unit_name) - # Can there be more than one symbol in QUDT? - symbol = next(ts.objects(subject=s, predicate=QUDT.symbol), None) - if symbol is not None: - symbol = symbol.replace(" ", "_") - identifiers.add_identifier( - URI=s, label_name="symbol", prio=2+prio_downgrade, identifier=symbol) - udunits_code = next( - ts.objects(subject=s, predicate=QUDT.udunitsCode), None) - if udunits_code is not None: - udunits_code = udunits_code.replace(" ", "_") - identifiers.add_identifier( - URI=s, label_name="udunits_code", prio=7, identifier=udunits_code) - - identifiers.remove_ambiguities() - - # Build the pint unit registry lines. - for URIb, definition in pint_definitions.items(): - - unit_identifiers = identifiers.get_identifiers(URI=URIb) - - # Start constructing the pint definition line. - unit_name = unit_identifiers["unit_name"] - if unit_name is None: - logging.warning(f'Omitting UNIT {URIb} due to name conflict.') - continue - if unit_name in base_unit_dimensions.keys(): - pint_definition_line = ( - f'{unit_name} = [{base_unit_dimensions[unit_name]}]' - ) - else: - pint_definition_line = ( - f'{unit_name} = {definition["multiplier"]} ' - f'{definition["unit_in_SI"]}' - ) - - # Add offset. - if definition["offset"] is not None: - pint_definition_line += f'; offset: {definition["offset"]}' - - # Add symbol. - symbol = unit_identifiers["symbol"] - if symbol is None: - symbol = "_" - pint_definition_line += f' = {symbol}' - - # Add any labels. - for label in unit_identifiers["labels"]: - if label is not None: - pint_definition_line += f' = {label}' - - # Add URI. - pint_definition_line += f' = {URIb}' - - # Add udunits code. - udunits_code = unit_identifiers["udunits_code"] - if udunits_code is not None: - pint_definition_line += f' = {udunits_code}' - - pint_registry_lines.append(pint_definition_line) - return pint_registry_lines - - -class PintIdentifiers: - """Class for handling the various identifiers, with the functionality - to remove any ambiguous definitions. - """ - def __init__(self): - self.URIs = [] - self.label_names = [] - self.prios = [] - self.identifiers = [] - - def add_identifier(self, URI: str, label_name: str, prio: int, - identifier:str): - self.URIs.append(URI) - self.label_names.append(label_name) - self.prios.append(prio) - self.identifiers.append(identifier) - - def remove_ambiguities(self): - """Remove ambiguities. - - Set ambiguous identifiers to None. - Keep the first occurence within each priority level. - """ - # Store used identifiers along with their URI. - used_identifiers = {} - - # For each priority level, remove any ambiguities. - for prio in sorted(list(set(self.prios))): - inds_prio = [i for i,value in enumerate(self.prios) if value==prio] - for i in inds_prio: - if self.identifiers[i] is not None: - # Check if the identifier has already been used. - if self.identifiers[i] in used_identifiers.keys(): - # Warn if this identifier belongs to another URI. - URI_of_identifier = used_identifiers[self.identifiers[i]] - if self.URIs[i] is not URI_of_identifier: - logging.warning( - f'Omitting {self.label_names[i]} ' - f'"{self.identifiers[i]}" from {self.URIs[i]} ' - f'(the identifier is used for ' - f'{URI_of_identifier})' - ) - self.identifiers[i] = None - else: - used_identifiers[self.identifiers[i]] = self.URIs[i] - - - def is_valid_identifier(self, identifier: str, URI: str, - label_name: str) -> bool: - """Check if an identifier is valid for use as a particular - label_name for a particular unit. - """ - identifier_index = self.identifiers.index(identifier) - return (self.URIs[identifier_index] == URI and - self.label_names[identifier_index] == label_name) - - def get_identifiers(self, URI:str) -> dict: - """Returns a dict containing all identifiers for a given URI.""" - identifiers = {} - identifiers["labels"] = [] - - inds = [i for i,value in enumerate(self.URIs) if value==URI] - for i in inds: - label_name = self.label_names[i] - identifier = self.identifiers[i] - if label_name == "unit_name": - identifiers["unit_name"] = identifier - elif label_name == "symbol": - identifiers["symbol"] = identifier - elif label_name == "label": - identifiers["labels"].append(identifier) - elif label_name == "udunits_code": - identifiers["udunits_code"] = identifier - - return identifiers diff --git a/bindings/python/triplestore/units_README.md b/bindings/python/triplestore/units_README.md deleted file mode 100644 index 8680a8e54..000000000 --- a/bindings/python/triplestore/units_README.md +++ /dev/null @@ -1,61 +0,0 @@ -Pint unit registry generator -============================ - -Introduction and usage ----------------------- - -The units.py file contains the get_pint_registry() function, which downloads -the [QUDT UNITS](https://www.qudt.org/doc/DOC_VOCAB-UNITS.html) vocabulary -and uses its contents to generate a unit registry file for -[Pint](https://pint.readthedocs.io). The function then uses the generated -registry file to generate and return a Pint UnitRegistry object. - -The unit registry file is cached, and the default behavior is to not -re-recreate it if it already exists in the cache directory. - -Any unit identifiers in QUDT UNITS that use the "-" or " " characters will -have these replaced with "_" (in order to be Pint compatible). - -The usage of the get_pint_registry() is demonstrated in test_units.py. - - -Technical details ------------------ -* Unit registry filename: `pint_unit_registry.txt` -* Cache directory in Unix/Linux: typically `~/.cache/dlite` -* Cache directory in Windows 10: typically `C:\Users\\AppData\Local\ -Packages\PythonSoftwareFoundation.Python.\LocalCache\Local\dlite\ -dlite\Cache` -* Cache directory in Mac OS X: presumably `~/Library/Caches/dlite` (not tested) - -For the units, all identifiers, alternative labels and definitions are -read from QUDT. The program resolves naming conflicts by omitting the -conflicting labels, following a certain prioritization. Highest priority is -given to the rdfs:label identifiers, which are used as the primary -identifier ("canonical name") in the Pint unit registry. - -Prefix definitions are hard-coded and not read from QUDT. Units in QUDT UNITS -that start with a prefix are omitted, since Pint performs reasoning based on -the prefix definitions in its unit registry. The "KiloGM" SI unit is -excepted from this rule. - - -Known problems --------------- -* The program does not yet work on Windows, see -[issue #497](https://github.com/SINTEF/dlite/issues/497). - -* The program provides warnings (at registry creation time) for **omitted -units and/or labels**, with details about any label conflicts. This output -can be used to identify duplications and inconsistencies within QUDT UNITS. - -* Since the QUDT UNITS vocabulary is subject to change, so are the omitted -units and labels. When a unit is assigned a new label in QUDT UNITS, this -may take precedence over currently existing labels on other units (depending -on the prioritization of the various label types). Since QUDT UNITS seems -not to be consistency-checked before release, this can result in (possibly -undetected) changed references to the units of interest. - -* Units that contain a prefix inside its name are currently included in the -generated Pint unit registry. - diff --git a/bindings/python/utils.py b/bindings/python/utils.py index 6f7047f7a..87eebc359 100644 --- a/bindings/python/utils.py +++ b/bindings/python/utils.py @@ -136,8 +136,16 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): if meta.is_metameta: if "uri" in d: uri = d["uri"] - else: + elif "identity" in d: + uri = d["identity"] + elif "name" in d and "version" in d and "namespace" in d: uri = dlite.join_meta_uri(d["name"], d["version"], d["namespace"]) + elif id and dlite.urlparse(id).scheme: + uri = id + else: + raise TypeError( + "`id` required for metadata when the URI is not in the dict" + ) if check_storages: try: @@ -169,6 +177,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): dlite.Property( name=p["name"], type=p["type"], + ref=p.get("$ref", p.get("ref")), shape=p.get("shape", p.get("dims")), unit=p.get("unit"), description=p.get("description"), @@ -180,6 +189,7 @@ def instance_from_dict(d, id=None, single=None, check_storages=True): dlite.Property( name=k, type=v["type"], + ref=v.get("$ref", v.get("ref")), shape=v.get("shape", v.get("dims")), unit=v.get("unit"), description=v.get("description"), diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index 4c1b4b934..9e5667cdd 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -14,7 +14,14 @@ set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html") set(SPHINX_INDEX_FILE "${SPHINX_HTML_DIR}/index.html") # Doxygen intput source directory -set(DOXYGEN_INPUT_DIR ${dlite_SOURCE_DIR}/src) +set(DOXYGEN_INPUT_DIRS + ${dlite_SOURCE_DIR}/src/utils + ${dlite_SOURCE_DIR}/src + ${dlite_SOURCE_DIR}/src/pyembed + #${dlite_BINARY_DIR}/bindings/python/dlite +) +string(REPLACE ";" " " DOXYGEN_INPUT "${DOXYGEN_INPUT_DIRS}") + # Doxygen output directory set(DOXYGEN_OUTPUT_XML_DIR "${CMAKE_CURRENT_BINARY_DIR}/xml") @@ -53,8 +60,8 @@ configure_file( # Doxygen command add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE} DEPENDS ${DLITE_PUBLIC_HEADERS} - COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_IN} ${DOXYFILE_OUT} - MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN} + COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT} + MAIN_DEPENDENCY ${DOXYFILE_OUT} WORKING_DIRECTORY ${BINARY_BUILD_DIR} COMMENT "Generating docs" VERBATIM) diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in index 202ecaf1e..0a9715e4a 100644 --- a/doc/Doxyfile.in +++ b/doc/Doxyfile.in @@ -775,7 +775,7 @@ WARN_LOGFILE = # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. -INPUT = "@DOXYGEN_INPUT_DIR@" +INPUT = @DOXYGEN_INPUT@ # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses @@ -831,7 +831,8 @@ EXCLUDE_SYMLINKS = NO # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* -EXCLUDE_PATTERNS = +EXCLUDE_PATTERNS = */tests/* \ + */old/* # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the @@ -848,7 +849,8 @@ EXCLUDE_SYMBOLS = # that contain example code fragments that are included (see the \include # command). -EXAMPLE_PATH = @dlite_SOURCE_DIR@/src/utils/tests +EXAMPLE_PATH = @dlite_SOURCE_DIR@/examples \ + @dlite_SOURCE_DIR@/src/utils/tests/tgen_example.c # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and diff --git a/doc/api.rst b/doc/api.rst index 0ebc29681..36c804900 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -2,8 +2,8 @@ API Reference ============= .. toctree:: - :maxdepth: 4 + :maxdepth: 2 :caption: API Reference - Python API + Python API C-API diff --git a/doc/conf.py.in b/doc/conf.py.in index f9bada93e..8f7ef6bc4 100644 --- a/doc/conf.py.in +++ b/doc/conf.py.in @@ -51,15 +51,7 @@ exclude_patterns = [ ] extensions = [ - # Temporary disable autoapi since it leads to the following error during - # build of HTML documentation - # - # Extension error (autoapi.extension): - # Handler for event - # 'builder-inited' threw an exception (exception: 'Module' object has no - # attribute 'doc') - # - #"autoapi.extension", + "autoapi.extension", "breathe", # Doxygen bridge "myst_nb", # markdown source support & support for Jupyter notebooks "sphinx.ext.graphviz", # Graphviz @@ -84,12 +76,13 @@ dlite_share_plugins = [ if plugin_dir.is_dir() ] -autoapi_dirs = [ - "@CMAKE_BINARY_DIR@/bindings/python/dlite" -] + [ - f"@CMAKE_BINARY_DIR@/bindings/python/dlite/share/dlite/{plugin_dir}" - for plugin_dir in dlite_share_plugins -] +#autoapi_dirs = [ +# "@CMAKE_BINARY_DIR@/bindings/python/dlite" +#] + [ +# f"@CMAKE_BINARY_DIR@/bindings/python/dlite/share/dlite/{plugin_dir}" +# for plugin_dir in dlite_share_plugins +#] +autoapi_dirs = ["@CMAKE_BINARY_DIR@/bindings/python/dlite"] autoapi_type = "python" autoapi_file_patterns = ["*.py", "*.pyi"] autoapi_template_dir = "_templates/autoapi" @@ -103,7 +96,10 @@ autoapi_options = [ "imported-members", ] autoapi_keep_files = True # Should be False in production +#autoapi_keep_files = False # Should be False in production autoapi_python_use_implicit_namespaces = True # True to avoid namespace being `python.dlite` +#autoapi_ignore = ["@CMAKE_BINARY_DIR@/doc/_build/autoapi/dlite/dlite"] + autodoc_typehints = "description" autodoc_typehints_format = "short" @@ -127,7 +123,7 @@ html_theme_options = { "use_issues_button": True, "use_fullscreen_button": True, "use_repository_button": True, - "logo_only": True, + #"logo_only": True, "show_navbar_depth": 1, "announcement": "This documentation is under development!", } diff --git a/doc/contributors_guide/documentation_contributors.md b/doc/contributors_guide/documentation_contributors.md index 95f6c74d4..b85b45115 100644 --- a/doc/contributors_guide/documentation_contributors.md +++ b/doc/contributors_guide/documentation_contributors.md @@ -4,7 +4,45 @@ Guideline for contributing documentation The DLite documentation is written in [Markdown]. This include both the README files and documentation found in the `doc/` subdirectory. + +Generate documentation locally +------------------------------ +When writing documentation it is practical to build and check the documentation locally before submitting a pull request. + +The following steps are needed for building the documentation: + +1. Install dependencies. + + First you need [doxygen]. In Ubuntu it can be installed with + + sudo apt install doxygen + + Python requirements can be installed with + + pip install --upgrade -r requirements_doc.txt + +2. Ask cmake to build documentation + + ``` + cd + cmake -DWITH_DOC=YES . + ``` + + If you haven't build dlite before, you should replace the final dot with the + path to the root of the DLite source directory. + +3. Build the documentation + + cmake --build . + + Check and fix possible error and warning messages from doxygen and sphinx. + The generated documentation can be found in `/doc/html/index.html`. + + +Style recommendations and guidelines +------------------------------------ Common to both is that the text should be as easy and natural as possible to read and write both from the terminal, in an editor and rendered in a web browser. + Hence, the following recommendations: * Write one sentence per line, in order to get an easier to read output from `git diff`. @@ -97,6 +135,7 @@ If you click that button, it will toggle the prompt and output on or off, making +[doxygen]: https://www.doxygen.nl/ [Markdown]: https://en.wikipedia.org/wiki/Markdown [setext]: https://github.com/DavidAnson/markdownlint/blob/main/doc/md003.md [CommonMark]: https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet diff --git a/doc/getting_started/build/patch_activate.md b/doc/getting_started/build/patch_activate.md index 1186ad031..668113733 100644 --- a/doc/getting_started/build/patch_activate.md +++ b/doc/getting_started/build/patch_activate.md @@ -1,3 +1,5 @@ +Patch activate +============== By default, [virtualenv] does not set `LD_LIBRARY_PATH`. This will result in errors when running applications that links to libdlite, like for example, `dlite-codegen`. To fix this, `$VIRTUAL_ENV/lib/` needs to be appended/prepended to `LD_LIBRARY_PATH`. diff --git a/doc/index.rst b/doc/index.rst index 5ad7fe4df..ca1a43d17 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -49,12 +49,12 @@ DLite .. toctree:: - :maxdepth: 3 + :maxdepth: 2 :caption: API Reference :glob: :hidden: - Python API + Python API C-API diff --git a/doc/user_guide/concepts.md b/doc/user_guide/concepts.md index 8d77f5185..944e02494 100644 --- a/doc/user_guide/concepts.md +++ b/doc/user_guide/concepts.md @@ -269,83 +269,6 @@ Relations are currently not explored in metadata, but are included because of their generality. However, relations are heavily used in [collections]. - -### Representing an entity -Lets start to make a "Person" entity, where we want to describe his/her name, age and skills. - -```json -{ - "uri": "http://onto-ns.com/meta/0.1/Person", - "meta": "http://onto-ns.com/meta/0.3/EntitySchema", - "description": "A person.", - "dimensions": [ - { - "name": "N", - "description": "Number of skills." - } - ], - "properties": [ - { - "name": "name", - "type": "string", - "description": "Full name." - }, - { - "name": "age", - "type": "float", - "unit": "years", - "description": "Age of person." - }, - { - "name": "skills", - "type": "string", - "shape": ["N"], - "description": "List of skills." - } - ] -} -``` - -First we have "uri" identifying the entity, "meta" telling that this is an instance of the entity schema (hence an entity) and a human description. -Then comes "dimensions". -In this case one dimension named "N", which is the number of skills the person has. -Finally we have the properties; "name", "age" and "skills". -We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill. - - -### SOFT7 representation -Based on input from [SOFT7], DLite also supports a slightly shortened representation of entities. -The "Person" entity from the above example will in this representation, look like: - -```json -{ - "uri": "http://onto-ns.com/meta/0.1/Person", - "description": "A person.", - "dimensions": { - "N": "Number of skills." - }, - "properties": { - "name": { - "type": "string", - "description": "Full name." - }, - "age": { - "type": "float", - "unit": "years", - "description": "Age of person." - }, - "skills": { - "type": "string", - "shape": ["N"], - "description": "List of skills." - } - } -} -``` - -In this representation defaults the `meta` field to the entity schema if it is left out. -Dimensions and Properties are dictionaries (JSON objects) instead of arrays with the dimension or property name as key. - references ---------- diff --git a/doc/user_guide/datamodels.md b/doc/user_guide/datamodels.md new file mode 100644 index 000000000..76c2c455e --- /dev/null +++ b/doc/user_guide/datamodels.md @@ -0,0 +1,73 @@ +Representing a datamodel (entity) +---------------------------------- + +The underlying structure of DLite datamodels are described under [concepts]. + +Here, at set of rules on how to create a datamodel is presented. + +Note that several other possibilities are avilable, and this can be seen in the +examples and tests present in the repository. + +We choose here to present only one method as mixing reprentation methods might +be confusing. Note, however that yaml and json representations are interchangable. + +A generic example with some comments for clarity can be seen below. + +```yaml +uri: http://namespace/version/name +description: A description of what this datamodel represents. +dimensions: # Named dimensions referred to in the property shapes. Simplest to represent it as a dict, set to {} if there are no dimensions + name_of_dimension: description of dimension +properties: + name_of_property1: + description: What is this property + type: ref # Can be any on string, float, double, int, ref .... + unit: unit # Can be ommitted if the property has no unit + shape: [name_of_dimension] # Can be omitted if the property is a scalar + $ref: http://namespace/version/name_of_referenceddatamodel # only if type is ref +``` + +The keywords in the datamodel have the following meaning: +* `uri`: A URI that uniquely identifies the datamodel. +* `description`: A human description that describes what this datamodel represents. +* `dimensions`: Dimensions of the properties (referred to by the property shape). Properties can have the same dimensions, but not necessarily. Each dimension is described by: + - name of the dimension + - a human description of the dimension + In the below example there is one dimension with name "N" and description "Number of skills." +* `properties`: Sub-parts of the datamodel that describe the individual data fields. A property has a name and is further specified by the following keywords: + - `description`: Human description of the property. + - `type`: Data type of the property. Ex: "blob5", "boolean", "uint", "int32", "string", "string10", "ref", ... + - `$ref`: Optional. URI of a sub-datamodel. Only used if type is "ref". + - `unit`: Optional. The unit. Ex: "kg", "km/h", ... Can be omitted if the property has no unit. + - `shape`: Optional. Describes the dimensionality of the property as a list of dimension names. Ex: `[N]`. Can be omitted if the property has no shape, i.e. the instance always has only one value. This is equivalent to a 0-dimensional array, i.e. shape=[]. + The datamodel below has three properties; "name", "age" and "skills". We see that "name" is represented as a string, "age" as a floating point number with unit years and "skills" as an array of strings, one for each skill. + + +A slightly more realistic example is the "Person" entity, where we want to describe his/her name, age and skills: + +```yaml +uri: http://onto-ns.com/meta/0.1/Person +description: A person. +dimensions: + N: Number of skills. +properties: + name: + description: Full name. + type: string + age: + description: Age of person. + type: float + unit: years + skills: + description: List of skills. + type: string + shape: [N] +``` + + +dlite-validate +============== +The [dlite-validate tool][./tools.md#dlite_validate] can be used to check if a specific representation (in a file) is a valid DLite datamodel + + +[concepts]: https://sintef.github.io/dlite/user_guide/concepts.html diff --git a/doc/user_guide/index.rst b/doc/user_guide/index.rst index c6ef982c6..35a4fdef2 100644 --- a/doc/user_guide/index.rst +++ b/doc/user_guide/index.rst @@ -6,6 +6,7 @@ User Guide :caption: Contents concepts + datamodels type-system exceptions collections diff --git a/doc/user_guide/storage_plugins.md b/doc/user_guide/storage_plugins.md index 1734fadf1..c45d147f0 100644 --- a/doc/user_guide/storage_plugins.md +++ b/doc/user_guide/storage_plugins.md @@ -1,5 +1,5 @@ -Storage plugins -=============== +Storage plugins / Drivers +========================= Content ------- @@ -28,6 +28,36 @@ It also comes with a specific `Blob` and `Image` storage plugin, that can load a Storage plugins can be written in either C or Python. +How to make storage plugins available +------------------------------------- + +As described below it is possible (and most often advisable) to create specific drivers (storage plugins) for your data. +Additional storage plugins drivers can be made available by setting the environment variables +`DLITE_STORAGE_PLUGIN_DIRS` or `DLITE_PYTHON_STORAGE_PLUGIN_DIRS` e.g.: +```bash +export DLITE_STORAGE_PLUGIN_DIRS=/path/to/new/folder:$DLITE_STORAGE_PLUGIN_DIRS +``` + +Within python, the path to the directory containing plugins can be added as follows: + +```python +import dlite +dlite.python_storage_plugin_path.append("/path/to/plugins/dir") +``` + +Often drivers are connected to very specific datamodel (entities). +DLite will find these datamodels if the path to their directory is set with the +environment variable `DLITE_STORAGES` or added within python with `dlite.storage_path.append` similarly to described above for drivers. + + +```{attention} +Often, during development dlite will fail unexpectedly. This is typically either because of an error in the +datamodel or the driver. +The variable DLITE_PYDEBUG can be set as `export DLITE_PYDEBUG=` to get python debugging information. +This will give information about the driver. +It is advisable to first check that the datamodel is valid with the command `dlite-validate datamodelfilename`. +``` + Using storages implicitly from Python ------------------------------------- For convenience DLite also has an interface for creating storages implicitly. diff --git a/doc/user_guide/tools.md b/doc/user_guide/tools.md index c0ad039d2..708c8a257 100644 --- a/doc/user_guide/tools.md +++ b/doc/user_guide/tools.md @@ -3,6 +3,18 @@ Tools DLite comes with a small set of tools. +dlite-validate +-------------- +The dlite-validate tool can be used to check if a specific representation (in a file) is a valid DLite datamodel. + +This can be run as follows +```bash +dlite-validate filename.yaml # or json +``` + +It will then return a list of errors if it is not a valid datamodel. + + dlite-getuuid ------------- This is a handy small tool for generating a random UUID or getting the UUID corresonding to an URI. diff --git a/python/setup.py b/python/setup.py index 00a3f6cbd..91e04c267 100644 --- a/python/setup.py +++ b/python/setup.py @@ -202,7 +202,6 @@ def run(self): "Operating System :: POSIX :: Linux", "Operating System :: Microsoft :: Windows", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", diff --git a/requirements_doc.txt b/requirements_doc.txt index 638e6329c..1c047492d 100644 --- a/requirements_doc.txt +++ b/requirements_doc.txt @@ -1,17 +1,22 @@ importlib-metadata==6.8.0; python_version<'3.8' -beautifulsoup4==4.12.3 -lxml==5.2.2 - -Sphinx==7.3.7 -sphinx-autoapi==3.1.2 -sphinx-autobuild==2024.4.16 -sphinx-book-theme==1.1.3 -sphinx-design==0.6.0 -sphinxcontrib-plantuml==0.30 -sphinx-toggleprompt==0.5.2 -sphinx-copybutton==0.5.2 # does not work well with toggleprompt +beautifulsoup4==4.12.3 breathe==4.35.0 -myst-nb==1.1.1 -nbclient==0.10.0 docutils==0.21.2 +lxml==5.3.0 +myst-nb==1.1.2 +nbclient==0.10.0 +Sphinx==7.4.7 +sphinx-autoapi==3.3.3 +sphinx-autobuild==2024.10.3 +sphinx-book-theme==1.1.3 +sphinx-copybutton==0.5.2 +sphinx-toggleprompt==0.5.2 +sphinx_design==0.6.1 +sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-devhelp==2.0.0 +sphinxcontrib-htmlhelp==2.1.0 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-plantuml==0.30 +sphinxcontrib-qthelp==2.0.0 +sphinxcontrib-serializinghtml==2.0.0 diff --git a/requirements_full.txt b/requirements_full.txt index 7899f2803..a448265d6 100644 --- a/requirements_full.txt +++ b/requirements_full.txt @@ -13,7 +13,7 @@ pyarrow>=14.0,<18.0 tables>=3.8,<5.0 openpyxl>=3.0.9,<3.2 jinja2>=3.0,<4 -paramiko>=3.0.0,<3.4.1 +paramiko>=3.0.0,<3.5.1 requests>=2.10,<3 redis>=5.0,<6 minio>=6.0,<8 diff --git a/src/dlite-json.c b/src/dlite-json.c index 3993f2b75..f906524ae 100644 --- a/src/dlite-json.c +++ b/src/dlite-json.c @@ -1099,7 +1099,7 @@ static const jsmntok_t *nexttok(DLiteJsonIter *iter, int *length) DLiteJsonIter *dlite_json_iter_create(const char *src, int length, const char *metaid) { - int r, ok=0; + int r; DLiteJsonIter *iter=NULL; jsmn_parser parser; @@ -1116,10 +1116,10 @@ DLiteJsonIter *dlite_json_iter_create(const char *src, int length, iter->size = iter->tokens->size; if (metaid && dlite_get_uuid(iter->metauuid, metaid) < 0) goto fail; - ok=1; - fail: - if (!ok) dlite_json_iter_free(iter); return iter; + fail: + if (iter) dlite_json_iter_free(iter); + return NULL; } /* @@ -1308,11 +1308,15 @@ const char *dlite_jstore_iter_next(DLiteJStoreIter *iter) if (iter->metauuid[0]) { char metauuid[DLITE_UUID_LENGTH+1]; const char *val = jstore_get(js, iid); + int r; jsmn_init(&parser); - if (jsmn_parse_alloc(&parser, val, strlen(val), - &iter->tokens, &iter->ntokens) < 0) { - err(dliteParseError, "invalid json input: \"%s\"", val); + if ((r = jsmn_parse_alloc(&parser, val, strlen(val), + &iter->tokens, &iter->ntokens)) < 0) { + if (r == JSMN_ERROR_INVAL) + err(dliteParseError, "invalid json input: \"%s\"", val); + else + err(dliteParseError, "json parse error: \"%s\"", jsmn_strerror(r)); continue; } if (get_meta_uuid(metauuid, val, iter->tokens)) { diff --git a/src/getuuid.h b/src/getuuid.h index f9365bd54..cd1eb61d8 100644 --- a/src/getuuid.h +++ b/src/getuuid.h @@ -55,7 +55,7 @@ int isuuid(const char *s); /** - Returns non-zero if `s` matches /. `len` is the length of `s`. + Returns non-zero if `s` matches `[URI]/[UUID]. `len` is the length of `s`. An optional final hash or slash will be ignored. */ int isinstanceuri(const char *s, int len); diff --git a/src/utils/jsmn.h b/src/utils/jsmn.h index dca2bb592..a3dbbabdf 100644 --- a/src/utils/jsmn.h +++ b/src/utils/jsmn.h @@ -87,7 +87,7 @@ typedef struct jsmn_parser { } jsmn_parser; /** - * Create JSON parser over an array of tokens + * Initializes a JSON parser. */ JSMN_API void jsmn_init(jsmn_parser *parser); diff --git a/src/utils/jsmnx.c b/src/utils/jsmnx.c index 67a5e490d..aa3831a0c 100644 --- a/src/utils/jsmnx.c +++ b/src/utils/jsmnx.c @@ -17,59 +17,70 @@ #define JSMN_STRICT #define JSMN_PARENT_LINKS #include "jsmn.h" - +#include "jsmnx.h" /* Like jsmn_parse(), but realloc's the buffer pointed to by `tokens_ptr` if it is too small. `num_tokens_ptr` should point to the number of allocated tokens. - Returns JSMN_ERROR_NOMEM on allocation error. + Returns number of tokens used by the parser or one of the following error + codes on error: + - JSMN_ERROR_NOMEM on allocation error. + - JSMN_INVAL on invalid character inside json string. */ int jsmn_parse_alloc(jsmn_parser *parser, const char *js, const size_t len, jsmntok_t **tokens_ptr, unsigned int *num_tokens_ptr) { - int n, n_save; - unsigned int saved_pos; - jsmntok_t *t=NULL; - (void) n_save; // avoid unused parameter error when assert is turned off + int ntokens; + jsmntok_t *tokens=NULL; assert(tokens_ptr); assert(num_tokens_ptr); - if (!*num_tokens_ptr) *tokens_ptr = NULL; - if (!*tokens_ptr) *num_tokens_ptr = 0; - - saved_pos = parser->pos; + assert(!((*tokens_ptr == NULL) ^ (*num_tokens_ptr == 0))); if (!*tokens_ptr) { - if ((n = jsmn_parse(parser, js, len, NULL, 0)) < 0) goto fail; + if ((ntokens = jsmn_required_tokens(js, len)) < 0) return ntokens; + /* FIXME: there seems to be an issue with the dlite_json_check() that looks post the last allocated token. Allocating `n+1` tokens is a workaround to avoid memory issues. */ - if (!(t = calloc(n+1, sizeof(jsmntok_t)))) return JSMN_ERROR_NOMEM; + if (!(tokens = calloc(ntokens+1, sizeof(jsmntok_t)))) + return JSMN_ERROR_NOMEM; } else { - n = jsmn_parse(parser, js, len, *tokens_ptr, *num_tokens_ptr); - if (n >= 0) return n; - if (n != JSMN_ERROR_NOMEM) goto fail; - if (!(t = realloc(*tokens_ptr, n*sizeof(jsmntok_t)))) + jsmn_parser saved_parser; + memcpy(&saved_parser, parser, sizeof(saved_parser)); + ntokens = jsmn_parse(parser, js, len, *tokens_ptr, *num_tokens_ptr); + if (ntokens != JSMN_ERROR_NOMEM) return ntokens; + + // Try to handle JSMN_ERROR_NOMEM by reallocating + if ((ntokens = jsmn_required_tokens(js, len)) < 0) return ntokens; + if (!(tokens = realloc(*tokens_ptr, (ntokens+1)*sizeof(jsmntok_t)))) return JSMN_ERROR_NOMEM; + + // Resetting parser - is this really needed? + memcpy(parser, &saved_parser, sizeof(saved_parser)); } - *tokens_ptr = t; - *num_tokens_ptr = n; - n_save = n; - - /* TODO: Instead of resetting the parser, we should continue after - reallocation */ - parser->pos = saved_pos; - if ((n = jsmn_parse(parser, js, len, t, n)) < 0) goto fail; - assert(n == n_save); - return n; - fail: - switch (n) { - case JSMN_ERROR_NOMEM: abort(); // this should never happen - case JSMN_ERROR_INVAL: return JSMN_ERROR_INVAL; - case JSMN_ERROR_PART: return JSMN_ERROR_INVAL; - } - abort(); // should never be reached + *tokens_ptr = tokens; + *num_tokens_ptr = ntokens; + + ntokens = jsmn_parse(parser, js, len, tokens, ntokens); + assert(ntokens != JSMN_ERROR_NOMEM); + return ntokens; +} + + +/* + Returns number of tokens required to parse JSON string `js` of length `len`. + On error, JSMN_ERROR_INVAL or JSMN_ERROR_PART is retuned. + */ +int jsmn_required_tokens(const char *js, size_t len) +{ + int ntokens; + jsmn_parser parser; + jsmn_init(&parser); + ntokens = jsmn_parse(&parser, js, len, NULL, 0); + assert(ntokens != JSMN_ERROR_NOMEM); + return ntokens; } diff --git a/src/utils/jsmnx.h b/src/utils/jsmnx.h index 077e605c8..ce494d165 100644 --- a/src/utils/jsmnx.h +++ b/src/utils/jsmnx.h @@ -43,9 +43,7 @@ void jsmn_init(jsmn_parser *parser); /** - * Run JSON parser. - * - * It parses a JSON data string into and array of tokens, each + * Parse a JSON data string into and array of tokens, each * describing a single JSON object. * * Arguments @@ -60,8 +58,9 @@ void jsmn_init(jsmn_parser *parser); * Returns * ------- * On success, it returns the number of tokens actually used by the parser. - * On error, one of the following (negative) codes is returned: + * If `tokens` is NULL, the number of needed will be returned. * + * On error, one of the following (negative) codes is returned: * - JSMN_ERROR_INVAL: bad token, JSON string is corrupted * - JSMN_ERROR_NOMEM: not enough tokens, JSON string is too large * - JSMN_ERROR_PART: JSON string is too short, expecting more JSON data @@ -82,6 +81,13 @@ int jsmn_parse_alloc(jsmn_parser *parser, const char *js, unsigned int *num_tokens_ptr); +/** + Returns number of tokens required to parse JSON string `js` of length `len`. + On error, JSMN_ERROR_INVAL or JSMN_ERROR_PART is retuned. + */ +int jsmn_required_tokens(const char *js, size_t len); + + /** * Returns number of sub-tokens contained in `t` or -1 on error. */ diff --git a/storages/python/python-storage-plugins/postgresql.py b/storages/python/python-storage-plugins/postgresql.py index 74aa2a8f4..1053d7d87 100644 --- a/storages/python/python-storage-plugins/postgresql.py +++ b/storages/python/python-storage-plugins/postgresql.py @@ -129,8 +129,7 @@ def load(self, uuid): # The uuid will be wrong for data instances, so override it if not inst.is_metameta: - d = inst.asdict() - d["uuid"] = uuid + d = inst.asdict(single=True, uuid=True) inst = instance_from_dict(d) return inst diff --git a/storages/python/python-storage-plugins/yaml.py b/storages/python/python-storage-plugins/yaml.py index f8cd7fa7a..22e8cb6b6 100644 --- a/storages/python/python-storage-plugins/yaml.py +++ b/storages/python/python-storage-plugins/yaml.py @@ -24,9 +24,9 @@ def open(self, location: str, options=None): location: Path to YAML file. options: Supported options: - `mode`: Mode for opening. Valid values are: - - `a`: Append to existing file or create new file (default). - - `r`: Open existing file for read-only. - - `w`: Truncate existing file or create new file. + - `a`: Open for writing, add to existing `location` (default). + - `r`: Open existing `location` for reading. + - `w`: Open for writing. If `location` exists, it is truncated. - `soft7`: Whether to save using SOFT7 format. - `single`: Whether the input is assumed to be in single-entity form. If "auto" (default) the form will be inferred automatically. @@ -35,13 +35,13 @@ def open(self, location: str, options=None): self.options = Options( options, defaults="mode=a;soft7=true;single=auto;with_uuid=false" ) - self.readable = "r" in self.options.mode - self.writable = "r" != self.options.mode + mode = self.options.mode + self.writable = "w" in mode or "a" in mode self.generic = True self.location = location - self.flushed = False # whether buffered data has been written to file + self.flushed = True # whether buffered data has been written to file self._data = {} # data buffer - if self.options.mode in ("r", "a", "append"): + if "r" in mode or "a" in mode: with open(location, "r") as f: data = pyyaml.safe_load(f) if data: diff --git a/storages/python/tests-c/test_postgresql_storage.c b/storages/python/tests-c/test_postgresql_storage.c index 99172f5c1..13bff2571 100644 --- a/storages/python/tests-c/test_postgresql_storage.c +++ b/storages/python/tests-c/test_postgresql_storage.c @@ -44,7 +44,7 @@ MU_TEST(test_save) double age = 42.; const char *skills[] = {"jumping", "hopping"}; int n, i; - char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests/*.json"; + char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json"; mu_check(dlite_storage_plugin_path_append(paths) >= 0); mu_check((meta = dlite_instance_load_url("json://Person.json?mode=r"))); diff --git a/storages/python/tests-c/test_postgresql_storage2.c b/storages/python/tests-c/test_postgresql_storage2.c index 1ce1044a1..578435d0f 100644 --- a/storages/python/tests-c/test_postgresql_storage2.c +++ b/storages/python/tests-c/test_postgresql_storage2.c @@ -32,6 +32,10 @@ MU_TEST(test_load_meta) { DLiteInstance *meta; char url[256], *id="http://onto-ns.com/meta/0.1/Person"; + char *paths = STRINGIFY(dlite_SOURCE_DIR) "/storage/python/tests-c/*.json"; + + mu_check(dlite_storage_plugin_path_append(paths) >= 0); + snprintf(url, sizeof(url), "postgresql://%s?%s#%s", HOST, options, id); mu_check((meta = dlite_instance_load_url(url)));