From 96ece15e8eed67ac2bf95bf169bf7f4d72bb7a55 Mon Sep 17 00:00:00 2001 From: Ashley Sommer Date: Fri, 1 Nov 2024 16:40:16 +1000 Subject: [PATCH] Graph expansion using inoculation or SHACL TripleRules will now expand into a separate named graph if you are working with an RDFLib Dataset instead of a Graph. --- CHANGELOG.md | 21 +- pyshacl/inference/custom_rdfs_closure.py | 11 +- pyshacl/rdfutil/inoculate.py | 120 +++--------- pyshacl/rule_expand_runner.py | 11 +- pyshacl/rules/__init__.py | 13 +- pyshacl/rules/shacl_rule.py | 14 +- pyshacl/rules/sparql/__init__.py | 24 ++- pyshacl/rules/triple/__init__.py | 23 ++- pyshacl/run_type.py | 29 +-- pyshacl/validator.py | 11 +- test/test_dash_validate.py | 3 +- test/test_extra.py | 2 +- test/test_inoculate.py | 232 +++++++++++++++++++++++ 13 files changed, 376 insertions(+), 138 deletions(-) create mode 100644 test/test_inoculate.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 284a992..005ee82 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,24 @@ and this project adheres to [Python PEP 440 Versioning](https://www.python.org/d ## [Unreleased] - Nothing yet +## [0.29.0] - 2024-11-01 + +### Added +- When validating a Dataset instead of a bare Graph, PySHACL will now expand RDFS and OWL-RL inferences into + a separate named graph, to avoid polluting the datagraph. +- When using SHACL Triple Rules from SHACL-AF spec, PySHACL will now add the expressed triples into + a separate named graph. This allows you to more easily get the expanded triples back out again afterward. + +### Changed +- PySHACL no longer supports older RDFLib versions + - PySHACL relies on the latest OWL-RL version, that in-turn relies on the latest RDFLib version + - Therefore PySHACL now requires RDFLib v7.1.1 or newer +- Dropped Python 3.8 support. + - Python developers discontinued Python 3.8 last month + - The next version of RDFLib and OWL-RL will not support Python 3.8 + - Removed Python 3.8 from the RDFLib test suite + - Python 3.9-specific typing changes will be incrementally introduced + ## [0.28.1] - 2024-10-25 ### Fixed @@ -1182,7 +1200,8 @@ just leaves the files open. Now it is up to the command-line client to close the - Initial version, limited functionality -[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...HEAD +[Unreleased]: https://github.com/RDFLib/pySHACL/compare/v0.29.0...HEAD +[0.29.0]: https://github.com/RDFLib/pySHACL/compare/v0.28.1...v0.29.0 [0.28.1]: https://github.com/RDFLib/pySHACL/compare/v0.28.0...v0.28.1 [0.28.0]: https://github.com/RDFLib/pySHACL/compare/v0.27.0...v0.28.0 [0.27.0]: https://github.com/RDFLib/pySHACL/compare/v0.26.0...v0.27.0 diff --git a/pyshacl/inference/custom_rdfs_closure.py b/pyshacl/inference/custom_rdfs_closure.py index 3db3ebc..fd40d51 100644 --- a/pyshacl/inference/custom_rdfs_closure.py +++ b/pyshacl/inference/custom_rdfs_closure.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- +from typing import TYPE_CHECKING, Optional + try: from owlrl import OWL @@ -25,6 +27,9 @@ from owlrl.OWLRL import OWLRL_Semantics from owlrl.RDFSClosure import RDFS_Semantics as OrigRDFSSemantics +if TYPE_CHECKING: + from rdflib.graph import Graph + class CustomRDFSSemantics(OrigRDFSSemantics): def one_time_rules(self): @@ -49,9 +54,9 @@ class CustomRDFSOWLRLSemantics(CustomRDFSSemantics, OWLRL_Semantics): (OWL.DataRange, OWL.equivalentClass, RDFS.Datatype), ] - def __init__(self, graph, axioms, daxioms, rdfs=True): - OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs) - CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs) + def __init__(self, graph, axioms, daxioms, rdfs: bool = True, destination: Optional['Graph'] = None): + OWLRL_Semantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination) + CustomRDFSSemantics.__init__(self, graph, axioms, daxioms, rdfs=rdfs, destination=destination) self.rdfs = True # noinspection PyMethodMayBeStatic diff --git a/pyshacl/rdfutil/inoculate.py b/pyshacl/rdfutil/inoculate.py index 142d5af..28deff7 100644 --- a/pyshacl/rdfutil/inoculate.py +++ b/pyshacl/rdfutil/inoculate.py @@ -2,22 +2,20 @@ from typing import TYPE_CHECKING, Dict, Optional, Union import rdflib -from rdflib.graph import DATASET_DEFAULT_GRAPH_ID -from rdflib.namespace import NamespaceManager -from .clone import clone_blank_node, clone_graph, clone_node +from .clone import clone_blank_node, clone_dataset, clone_node from .consts import OWL, RDF, ConjunctiveLike, GraphLike, OWL_classes, OWL_properties, RDFS_classes, RDFS_properties if TYPE_CHECKING: from rdflib import BNode - from rdflib.term import IdentifiedNode + from rdflib.term import URIRef from .consts import RDFNode OWLNamedIndividual = OWL.NamedIndividual -def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph: +def inoculate(data_graph: rdflib.Graph, ontology: GraphLike) -> rdflib.Graph: """ Copies all RDFS and OWL axioms (classes, relationship definitions, and properties) from the ontology graph into the data_graph. @@ -33,6 +31,9 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph: ontology_ns = ontology.namespace_manager data_graph_ns = data_graph.namespace_manager + if isinstance(ontology, (rdflib.ConjunctiveGraph, rdflib.Dataset)): + # always set default context true on the ontology DS + ontology.default_context = True # Bind any missing ontology namespaces in the DataGraph NS manager. if ontology_ns is not data_graph_ns: data_graph_prefixes = {p: n for (p, n) in data_graph_ns.namespaces()} @@ -108,119 +109,46 @@ def inoculate(data_graph: rdflib.Graph, ontology: rdflib.Graph) -> rdflib.Graph: def inoculate_dataset( - base_ds: ConjunctiveLike, ontology_ds: GraphLike, target_ds: Optional[Union[ConjunctiveLike, str]] = None + base_ds: ConjunctiveLike, + ontology_ds: GraphLike, + target_ds: Optional[Union[ConjunctiveLike, str]] = None, + target_graph_identifier: Optional['URIRef'] = None, ): """ Make a clone of base_ds (dataset) and add RDFS and OWL triples from ontology_ds :param base_ds: :type base_ds: rdflib.Dataset :param ontology_ds: - :type ontology_ds: rdflib.Dataset + :type ontology_ds: rdflib.Dataset|rdflib.ConjunctiveGraph|rdflib.Graph :param target_ds: :type target_ds: rdflib.Dataset|str|NoneType + :param target_graph_identifier: + :type target_graph_identifier: rdflib.URIRef | None :return: The cloned Dataset with ontology triples from ontology_ds :rtype: rdflib.Dataset """ - # TODO: Decide whether we need to clone base_ds before calling this, - # or we clone base_ds as part of this function - default_union: bool = base_ds.default_union - base_named_graphs = [ - ( - rdflib.Graph(base_ds.store, i, namespace_manager=base_ds.namespace_manager) # type: ignore[arg-type] - if not isinstance(i, rdflib.Graph) - else i - ) - for i in base_ds.store.contexts(None) - ] - if isinstance(base_ds, rdflib.Dataset) and len(base_named_graphs) < 1: - base_named_graphs = [ - rdflib.Graph(base_ds.store, DATASET_DEFAULT_GRAPH_ID, namespace_manager=base_ds.namespace_manager) - ] - base_default_context_id = base_ds.default_context.identifier if target_ds is None: - target_ds = rdflib.Dataset(default_union=default_union) - target_ds.namespace_manager = NamespaceManager(target_ds, 'core') - target_ds.default_context.namespace_manager = target_ds.namespace_manager + target_ds = clone_dataset(base_ds) + elif target_ds is base_ds: + pass elif target_ds == "inplace" or target_ds == "base": target_ds = base_ds elif isinstance(target_ds, str): raise RuntimeError("target_ds cannot be a string (unless it is 'inplace' or 'base')") + if isinstance(target_ds, (rdflib.ConjunctiveGraph, rdflib.Dataset)): if not isinstance(target_ds, rdflib.Dataset): raise RuntimeError("Cannot inoculate ConjunctiveGraph, use Dataset instead.") else: raise RuntimeError("Cannot inoculate datasets if target_ds passed in is not a Dataset itself.") - ont_default_context_id: Union[IdentifiedNode, str, None] - if isinstance(ontology_ds, (rdflib.Dataset, rdflib.ConjunctiveGraph)): - ont_graphs = [ - ( - rdflib.Graph(ontology_ds.store, i, namespace_manager=ontology_ds.namespace_manager) # type: ignore[arg-type] - if not isinstance(i, rdflib.Graph) - else i - ) - for i in ontology_ds.store.contexts(None) - ] - ont_default_context_id = ontology_ds.default_context.identifier - else: - ont_graphs = [ontology_ds] - ont_default_context_id = None - if target_ds is base_ds or target_ds == "inplace" or target_ds == "base": - target_ds = base_ds - for bg in base_named_graphs: - if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1: - # skip empty default named graph in base_graph - continue - for og in ont_graphs: - if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1: - # skip empty default named graph in ontology_graph - continue - inoculate(bg, og) + + if target_graph_identifier: + dest_graph = target_ds.get_context(target_graph_identifier) else: - inoculated_graphs = {} - for bg in base_named_graphs: - if len(base_named_graphs) > 1 and bg.identifier == base_default_context_id and len(bg) < 1: - # skip empty default named graph in base_graph - continue - target_g = rdflib.Graph(store=target_ds.store, identifier=bg.identifier) - clone_g = clone_graph(bg, target_graph=target_g) - for og in ont_graphs: - if len(ont_graphs) > 1 and og.identifier == ont_default_context_id and len(og) < 1: - # skip empty default named graph in ontology_graph - continue - inoculate(clone_g, og) - inoculated_graphs[bg.identifier] = clone_g - - base_graph_identifiers = [bg.identifier for bg in base_named_graphs] - base_default_context_id = base_ds.default_context.identifier - target_default_context_id = target_ds.default_context.identifier - if base_default_context_id != target_default_context_id: - old_target_default_context = target_ds.default_context - old_target_default_context_id = old_target_default_context.identifier - if isinstance(target_ds, rdflib.Dataset): - new_target_default_context = target_ds.graph(base_default_context_id) - else: - new_target_default_context = target_ds.get_context(base_default_context_id) - target_ds.store.add_graph(new_target_default_context) - target_ds.default_context = new_target_default_context - if old_target_default_context_id not in base_graph_identifiers: - if isinstance(target_ds, rdflib.Dataset): - target_ds.remove_graph(old_target_default_context) - else: - target_ds.store.remove_graph(old_target_default_context) - target_default_context_id = new_target_default_context.identifier - else: - if isinstance(target_ds, rdflib.Dataset): - _ = target_ds.graph(target_default_context_id) - else: - t_default = target_ds.get_context(target_default_context_id) - target_ds.store.add_graph(t_default) - for i, ig in inoculated_graphs.items(): - if ig == target_ds.default_context or i == target_default_context_id: - continue - if isinstance(target_ds, rdflib.Dataset): - _ = target_ds.graph(ig) # alias to Dataset.add_graph() - else: - target_ds.store.add_graph(ig) + dest_graph = target_ds.default_context + + # inoculate() routine will set default_union on the ontology_ds if it is a Dataset + inoculate(dest_graph, ontology_ds) return target_ds diff --git a/pyshacl/rule_expand_runner.py b/pyshacl/rule_expand_runner.py index aef0359..565f4c3 100644 --- a/pyshacl/rule_expand_runner.py +++ b/pyshacl/rule_expand_runner.py @@ -92,7 +92,12 @@ def mix_in_ontology(self): else: to_graph = clone_graph(self.data_graph, identifier=self.data_graph.identifier) return inoculate(to_graph, self.ont_graph) - return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None) + return inoculate_dataset( + self.data_graph, + self.ont_graph, + self.data_graph if self.inplace else None, + URIRef("urn:pyshacl:inoculation"), + ) def make_executor(self) -> SHACLExecutor: return SHACLExecutor( @@ -134,7 +139,9 @@ def run(self) -> GraphLike: datagraph = clone_graph(datagraph) has_cloned = True self.logger.debug(f"Running pre-inferencing with option='{inference_option}'.") - self._run_pre_inference(datagraph, inference_option, logger=self.logger) + self._run_pre_inference( + datagraph, inference_option, URIRef("urn:pyshacl:inference"), logger=self.logger + ) self.pre_inferenced = True if not has_cloned and not self.inplace: # We still need to clone in advanced mode, because of triple rules diff --git a/pyshacl/rules/__init__.py b/pyshacl/rules/__init__.py index 3eae4cc..eba4a81 100644 --- a/pyshacl/rules/__init__.py +++ b/pyshacl/rules/__init__.py @@ -86,6 +86,9 @@ def gather_rules( return ret_rules +RULES_ITERATE_LIMIT = 100 + + def apply_rules( executor: SHACLExecutor, shapes_rules: Dict, @@ -98,11 +101,13 @@ def apply_rules( for shape, rules in sorted_shapes_rules: # sort the rules by the sh:order before execution rules = sorted(rules, key=lambda x: x.order) - iterate_limit = 100 + _iterate_limit = int(RULES_ITERATE_LIMIT) while True: - if iterate_limit < 1: - raise ReportableRuntimeError("SHACL Shape Rule iteration exceeded iteration limit of 100.") - iterate_limit -= 1 + if _iterate_limit < 1: + raise ReportableRuntimeError( + f"SHACL Shape Rule iteration exceeded iteration limit of {RULES_ITERATE_LIMIT}." + ) + _iterate_limit -= 1 this_modified = 0 for r in rules: if r.deactivated: diff --git a/pyshacl/rules/shacl_rule.py b/pyshacl/rules/shacl_rule.py index 0014823..e847acd 100644 --- a/pyshacl/rules/shacl_rule.py +++ b/pyshacl/rules/shacl_rule.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from decimal import Decimal -from typing import Sequence, Union +from typing import TYPE_CHECKING, Optional, Sequence from rdflib import RDF, Literal @@ -8,6 +8,11 @@ from pyshacl.errors import RuleLoadError from pyshacl.pytypes import RDFNode, SHACLExecutor +if TYPE_CHECKING: + from rdflib.term import URIRef + + from pyshacl.pytypes import GraphLike + RDF_first = RDF.first @@ -41,7 +46,7 @@ def __init__(self, executor: SHACLExecutor, shape, rule_node, iterate=False): self.executor = executor self.shape = shape self.node = rule_node - self.iterate = False + self.iterate = iterate deactivated_nodes = list(self.shape.sg.objects(self.node, SH_deactivated)) self._deactivated = len(deactivated_nodes) > 0 and bool(deactivated_nodes[0]) @@ -111,7 +116,8 @@ def filter_conditions(self, focus_nodes: Sequence[RDFNode], data_graph): def apply( self, - data_graph, - focus_nodes: Union[Sequence[RDFNode], None] = None, + data_graph: 'GraphLike', + focus_nodes: Optional[Sequence[RDFNode]] = None, + target_graph_identifier: Optional['URIRef'] = None, ): raise NotImplementedError() diff --git a/pyshacl/rules/sparql/__init__.py b/pyshacl/rules/sparql/__init__.py index 9942a9a..e4a871f 100644 --- a/pyshacl/rules/sparql/__init__.py +++ b/pyshacl/rules/sparql/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from typing import TYPE_CHECKING, List, Sequence, Union +from typing import TYPE_CHECKING, List, Optional, Sequence, Union import rdflib from rdflib import Literal @@ -13,11 +13,15 @@ from ..shacl_rule import SHACLRule if TYPE_CHECKING: + from rdflib.term import URIRef + from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.shape import Shape XSD_string = XSD.string +SPARQL_RULE_ITERATE_LIMIT = 100 + class SPARQLRule(SHACLRule): __slots__ = ("_constructs", "_qh") @@ -52,7 +56,8 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib def apply( self, data_graph: 'GraphLike', - focus_nodes: Union[Sequence['RDFNode'], None] = None, + focus_nodes: Optional[Sequence['RDFNode']] = None, + target_graph_identifier: Optional['URIRef'] = None, ) -> int: focus_list: Sequence['RDFNode'] if focus_nodes is not None: @@ -70,10 +75,12 @@ def apply( focus_list = filtered_focus_nodes all_added = 0 SPARQLQueryHelper = get_query_helper_cls() - iterate_limit = 100 + iterate_limit = int(SPARQL_RULE_ITERATE_LIMIT) while True: if iterate_limit < 1: - raise ReportableRuntimeError("Local SPARQLRule iteration exceeded iteration limit of 100.") + raise ReportableRuntimeError( + f"Local SPARQLRule iteration exceeded iteration limit of {SPARQL_RULE_ITERATE_LIMIT}." + ) iterate_limit -= 1 added = 0 applicable_nodes = self.filter_conditions(focus_list, data_graph) @@ -101,8 +108,15 @@ def apply( added += 1 construct_graphs.add(result_graph) if added > 0: + if isinstance(data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + if target_graph_identifier is not None: + target_graph = data_graph.get_context(target_graph_identifier) + else: + target_graph = data_graph.default_context + else: + target_graph = data_graph for g in construct_graphs: - data_graph = clone_graph(g, target_graph=data_graph) + data_graph = clone_graph(g, target_graph=target_graph) all_added += added if self.iterate: continue # Jump up to iterate diff --git a/pyshacl/rules/triple/__init__.py b/pyshacl/rules/triple/__init__.py index 1dd25f2..e92eb20 100644 --- a/pyshacl/rules/triple/__init__.py +++ b/pyshacl/rules/triple/__init__.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- import itertools -from typing import TYPE_CHECKING, List, Sequence, Tuple, Union, cast +from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union, cast import rdflib @@ -10,10 +10,13 @@ from pyshacl.rules.shacl_rule import SHACLRule if TYPE_CHECKING: + from rdflib.term import URIRef from pyshacl.pytypes import GraphLike, RDFNode, SHACLExecutor from pyshacl.shape import Shape +TRIPLE_RULE_ITERATE_LIMIT = 100 + class TripleRule(SHACLRule): __slots__ = ("s", "p", "o") @@ -52,7 +55,8 @@ def __init__(self, executor: 'SHACLExecutor', shape: 'Shape', rule_node: 'rdflib def apply( self, data_graph: 'GraphLike', - focus_nodes: Union[Sequence['RDFNode'], None] = None, + focus_nodes: Optional[Sequence['RDFNode']] = None, + target_graph_identifier: Optional['URIRef'] = None, ) -> int: focus_list: Sequence['RDFNode'] if focus_nodes is not None: @@ -71,10 +75,12 @@ def apply( # uses target nodes to find focus nodes applicable_nodes = self.filter_conditions(focus_list, data_graph) all_added = 0 - iterate_limit = 100 + iterate_limit = int(TRIPLE_RULE_ITERATE_LIMIT) while True: if iterate_limit < 1: - raise ReportableRuntimeError("sh:rule iteration exceeded iteration limit of 100.") + raise ReportableRuntimeError( + f"sh:rule iteration exceeded iteration limit of {TRIPLE_RULE_ITERATE_LIMIT}." + ) iterate_limit -= 1 added = 0 to_add = [] @@ -91,8 +97,15 @@ def apply( if this_added: added += 1 if added > 0: + if isinstance(data_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): + if target_graph_identifier is not None: + target_graph = data_graph.get_context(target_graph_identifier) + else: + target_graph = data_graph.default_context + else: + target_graph = data_graph for i in to_add: - data_graph.add(cast(Tuple['RDFNode', 'RDFNode', 'RDFNode'], i)) + target_graph.add(cast(Tuple['RDFNode', 'RDFNode', 'RDFNode'], i)) all_added += added if self.iterate: continue # Jump up to iterate diff --git a/pyshacl/run_type.py b/pyshacl/run_type.py index 1a4623a..d4e4443 100644 --- a/pyshacl/run_type.py +++ b/pyshacl/run_type.py @@ -7,6 +7,8 @@ from pyshacl.errors import ReportableRuntimeError if TYPE_CHECKING: + from rdflib.term import URIRef + from pyshacl.pytypes import GraphLike @@ -19,7 +21,11 @@ def run(self): @classmethod def _run_pre_inference( - cls, target_graph: 'GraphLike', inference_option: str, logger: Optional[logging.Logger] = None + cls, + target_graph: 'GraphLike', + inference_option: str, + destination_graph_identifier: Optional['URIRef'] = None, + logger: Optional[logging.Logger] = None, ): """ Note, this is the OWL/RDFS pre-inference, @@ -55,21 +61,16 @@ def _run_pre_inference( "Error during creation of OWL-RL Deductive Closure\n{}".format(str(e.args[0])) ) if isinstance(target_graph, (rdflib.Dataset, rdflib.ConjunctiveGraph)): - named_graphs = [] - for i in target_graph.store.contexts(None): - if isinstance(i, rdflib.Graph): - named_graphs.append(i) - else: - named_graphs.append( - rdflib.Graph(target_graph.store, i, namespace_manager=target_graph.namespace_manager) - ) + target_graph.default_union = True + if destination_graph_identifier is not None: + destination_graph = target_graph.get_context(destination_graph_identifier) + else: + destination_graph = target_graph.default_context else: - named_graphs = [target_graph] + destination_graph = None try: - # I'd prefer to not have to infer every namged graph individually, but OWL-RL doesn't - # support doing inference on a Dataset/ConjunctiveGraph yet. (New release will be soon?) - for g in named_graphs: - inferencer.expand(g) + inferencer.expand(target_graph, destination=destination_graph) except Exception as e: # pragma: no cover + raise logger.error("Error while running OWL-RL Deductive Closure") raise ReportableRuntimeError("Error while running OWL-RL Deductive Closure\n{}".format(str(e.args[0]))) diff --git a/pyshacl/validator.py b/pyshacl/validator.py index 0a8a014..6c57340 100644 --- a/pyshacl/validator.py +++ b/pyshacl/validator.py @@ -151,7 +151,12 @@ def mix_in_ontology(self): else: to_graph = clone_graph(self.data_graph, identifier=self.data_graph.identifier) return inoculate(to_graph, self.ont_graph) - return inoculate_dataset(self.data_graph, self.ont_graph, self.data_graph if self.inplace else None) + return inoculate_dataset( + self.data_graph, + self.ont_graph, + self.data_graph if self.inplace else None, + URIRef("urn:pyshacl:inoculation"), + ) def make_executor(self) -> SHACLExecutor: return SHACLExecutor( @@ -194,7 +199,9 @@ def run(self): datagraph = clone_graph(datagraph) has_cloned = True self.logger.debug(f"Running pre-inferencing with option='{inference_option}'.") - self._run_pre_inference(datagraph, inference_option, logger=self.logger) + self._run_pre_inference( + datagraph, inference_option, URIRef("urn:pyshacl:inference"), logger=self.logger + ) self.pre_inferenced = True if not has_cloned and not self.inplace and self.options['advanced']: if self.options.get('sparql_mode', False): diff --git a/test/test_dash_validate.py b/test/test_dash_validate.py index 4c45b20..08fa04f 100644 --- a/test/test_dash_validate.py +++ b/test/test_dash_validate.py @@ -29,7 +29,8 @@ for x in walk(path.join(dash_files_dir, 'core')): for y in glob.glob(path.join(x[0], '*.test.ttl')): - dash_core_files.append((y, None)) + if "node/datatype-002" in y: + dash_core_files.append((y, None)) @pytest.mark.parametrize('target_file, shacl_file', dash_core_files) diff --git a/test/test_extra.py b/test/test_extra.py index 0b1b23f..0f4fdd4 100644 --- a/test/test_extra.py +++ b/test/test_extra.py @@ -307,7 +307,7 @@ def test_blank_node_string_generation(): ) conforms, graph, string = res assert not conforms - rx = r"^\s*Focus Node\:\s+\[.+rdf:type\s+.+exOnt\:PreschoolTeacher.*\]$" + rx = r"^\s*Focus Node\:\s+\[.+rdf:type\s+exOnt\:PreschoolTeacher.*\]$" matches = re.search(rx, string, flags=re.MULTILINE) assert matches diff --git a/test/test_inoculate.py b/test/test_inoculate.py new file mode 100644 index 0000000..5959278 --- /dev/null +++ b/test/test_inoculate.py @@ -0,0 +1,232 @@ +# -*- coding: utf-8 -*- +# +# Extra tests which are not part of the SHT or DASH test suites, +# nor the discrete issues tests or the cmdline_test file. +# The need for these tests are discovered by doing coverage checks and these +# are added as required. +import os +import re + +from rdflib import Graph, Dataset + +from pyshacl import validate +from pyshacl.errors import ReportableRuntimeError + +ontology_graph_text = """ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix exOnt: . + + a owl:Ontology ; + rdfs:label "An example extra-ontology file."@en . + +exOnt:Animal a rdfs:Class ; + rdfs:comment "The parent class for Humans and Pets"@en ; + rdfs:subClassOf owl:Thing . + +exOnt:Human a rdfs:Class ; + rdfs:comment "A Human being"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:Pet a rdfs:Class ; + rdfs:comment "An animal owned by a human"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:hasPet a rdf:Property ; + rdfs:domain exOnt:Human ; + rdfs:range exOnt:Pet . + +exOnt:nlegs a rdf:Property ; + rdfs:domain exOnt:Animal ; + rdfs:range xsd:integer . + +exOnt:Teacher a rdfs:Class ; + rdfs:comment "A Human who is a teacher."@en ; + rdfs:subClassOf exOnt:Human . + +exOnt:PreschoolTeacher a rdfs:Class ; + rdfs:comment "A Teacher who teaches preschool."@en ; + rdfs:subClassOf exOnt:Teacher . + +exOnt:Lizard a rdfs:Class ; + rdfs:subClassOf exOnt:Pet . + +exOnt:Goanna a rdfs:Class ; + rdfs:subClassOf exOnt:Lizard . + +""" + +ontology_ds_text = """ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix xsd: . +@prefix exOnt: . +# This is a TRIG file. + + { + a owl:Ontology ; + rdfs:label "An example extra-ontology file."@en . + +exOnt:Animal a rdfs:Class ; + rdfs:comment "The parent class for Humans and Pets"@en ; + rdfs:subClassOf owl:Thing . + +exOnt:Human a rdfs:Class ; + rdfs:comment "A Human being"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:Pet a rdfs:Class ; + rdfs:comment "An animal owned by a human"@en ; + rdfs:subClassOf exOnt:Animal . + +exOnt:hasPet a rdf:Property ; + rdfs:domain exOnt:Human ; + rdfs:range exOnt:Pet . + +exOnt:nlegs a rdf:Property ; + rdfs:domain exOnt:Animal ; + rdfs:range xsd:integer . + +exOnt:Teacher a rdfs:Class ; + rdfs:comment "A Human who is a teacher."@en ; + rdfs:subClassOf exOnt:Human . + +exOnt:PreschoolTeacher a rdfs:Class ; + rdfs:comment "A Teacher who teaches preschool."@en ; + rdfs:subClassOf exOnt:Teacher . + +exOnt:Lizard a rdfs:Class ; + rdfs:subClassOf exOnt:Pet . + +exOnt:Goanna a rdfs:Class ; + rdfs:subClassOf exOnt:Lizard . +} +""" + +shacl_file_text = """ +@prefix owl: . +@prefix rdf: . +@prefix rdfs: . +@prefix sh: . +@prefix xsd: . +@prefix exShape: . +@prefix exOnt: . + + a owl:Ontology ; + rdfs:label "Example Shapes File"@en . + +exShape:HumanShape a sh:NodeShape ; + sh:property [ + sh:class exOnt:Pet ; + sh:path exOnt:hasPet ; + ] ; + sh:property [ + sh:datatype xsd:integer ; + sh:path exOnt:nLegs ; + sh:maxInclusive 2 ; + sh:minInclusive 2 ; + ] ; + sh:targetClass exOnt:Human . + +exShape:AnimalShape a sh:NodeShape ; + sh:property [ + sh:datatype xsd:integer ; + sh:path exOnt:nLegs ; + sh:maxInclusive 4 ; + sh:minInclusive 1 ; + ] ; + sh:targetClass exOnt:Animal . +""" + +data_file_text = """ +@prefix rdf: . +@prefix xsd: . +@prefix exOnt: . +@prefix ex: . + +ex:Human1 rdf:type exOnt:PreschoolTeacher ; + rdf:label "Amy" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet ex:Pet1 . + +ex:Pet1 rdf:type exOnt:Goanna ; + rdf:label "Sebastian" ; + exOnt:nLegs "4"^^xsd:integer . +""" + +data_file_text_bad = """ +@prefix rdf: . +@prefix xsd: . +@prefix exOnt: . +@prefix ex: . + +ex:Human1 rdf:type exOnt:PreschoolTeacher ; + rdf:label "Amy" ; + exOnt:nLegs "2"^^xsd:integer ; + exOnt:hasPet "Sebastian"^^xsd:string . + +ex:Pet1 rdf:type exOnt:Goanna ; + rdf:label "Sebastian" ; + exOnt:nLegs "four"^^xsd:string . +""" + + +def test_validate_ds_with_graph_ontology(): + ds = Dataset() + ds.parse(data=data_file_text_bad, format='turtle') + extra_g = Graph() + extra_g.parse(data=ontology_graph_text, format='turtle') + + ds_len = len(ds) + res = validate( + ds, shacl_graph=shacl_file_text, shacl_graph_format='turtle', ont_graph=extra_g, inference='rdfs', debug=True + ) + conforms, graph, string = res + assert not conforms + # Assert that the dataset is unchanged + ds_len2 = len(ds) + assert ds_len2 == ds_len + +def test_validate_ds_with_ds_ontology(): + ds = Dataset() + ds.parse(data=data_file_text_bad, format='turtle') + extra_ds = Dataset() + extra_ds.parse(data=ontology_ds_text, format='trig') + + ds_len = len(ds) + res = validate( + ds, shacl_graph=shacl_file_text, shacl_graph_format='turtle', ont_graph=extra_ds, inference='rdfs', debug=True + ) + conforms, graph, string = res + assert not conforms + # Assert that the dataset is unchanged + ds_len2 = len(ds) + assert ds_len2 == ds_len + +def test_validate_ds_with_ds_ontology_inplace(): + ds = Dataset() + ds.parse(data=data_file_text_bad, format='turtle') + extra_ds = Dataset() + extra_ds.parse(data=ontology_ds_text, format='trig') + + ds_len = len(ds) + res = validate( + ds, + shacl_graph=shacl_file_text, + shacl_graph_format='turtle', + ont_graph=extra_ds, + inference='rdfs', + debug=True, + inplace=True + ) + conforms, graph, string = res + assert not conforms + # Assert that the dataset is changed + ds_len2 = len(ds) + assert ds_len2 != ds_len + a = ds.serialize(format='trig') + print(a) +