From c2b25b299677ad15eb127235b931db387ff9255d Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Fri, 7 Jul 2023 08:28:13 -0400 Subject: [PATCH] Draft S3Object as artifact accessible from URL This patch defines a `observable:File`-like object to separate `observable:URL`s from `observable:File`s, reflecting a design decision from the UCO and CASE Ontology Committees related to UCO Issue 536. To reflect new usage, the Digital Corpora supplemental-graph script is adapted to now separate `Facet` assignment between `drafting:S3Object`s and `observable:URL`s. The Android 10 CASE supplemental graph has also been updated to reflect the new `Facet` IRIs. A follow-on patch will regenerate Make-managed files. References: * https://github.com/ucoProject/UCO/issues/536 Signed-off-by: Alex Nelson --- .../supplemental.ttl | 67 ++++++++++++------- ontology/case-corpora.ttl | 6 ++ src/digital_corpora_supplement_ttl.py | 43 +++++++----- 3 files changed, 75 insertions(+), 41 deletions(-) diff --git a/catalog/datasets/digitalcorpora-android-10/supplemental.ttl b/catalog/datasets/digitalcorpora-android-10/supplemental.ttl index 1ba2c48..a1e4982 100644 --- a/catalog/datasets/digitalcorpora-android-10/supplemental.ttl +++ b/catalog/datasets/digitalcorpora-android-10/supplemental.ttl @@ -1,3 +1,4 @@ +@prefix drafting: . @prefix kb: . @prefix owl: . @prefix rdf: . @@ -16,7 +17,11 @@ kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b uco-observable:archiveType "ZIP" ; . -kb:ContentDataFacet-61c34042-2f3e-51ab-8e03-149b56a9e31b +kb:ContentDataFacet-5cb9a74f-b21d-5cd7-8c09-80b1ee9f9089 + uco-observable:mimeType "application/pdf" ; + . + +kb:ContentDataFacet-7e5b597a-dac8-5cdf-9737-84068812c4da rdfs:seeAlso ; uco-observable:mimeType "application/vnd.ms-excel" , @@ -24,37 +29,53 @@ kb:ContentDataFacet-61c34042-2f3e-51ab-8e03-149b56a9e31b ; . -kb:ContentDataFacet-647e0c0d-ea3c-5198-b35a-03b8b6b55ce9 - uco-observable:mimeType "text/plain" ; +kb:Relationship-4dda53b3-102d-4427-887d-0e7a49ba0cb1 + a uco-observable:ObservableRelationship ; + uco-core:isDirectional "true"^^xsd:boolean ; + uco-core:kindOfRelationship "Downloadable_From" ; + uco-core:source ; + uco-core:target ; . -kb:ContentDataFacet-7b1aad72-df7f-561b-a807-2e2f1cd896ef - uco-observable:mimeType "application/pdf" ; +kb:Relationship-7a0eda2d-598e-4e40-961f-01583226628d + a uco-observable:ObservableRelationship ; + uco-core:isDirectional "true"^^xsd:boolean ; + uco-core:kindOfRelationship "Downloadable_From" ; + uco-core:source ; + uco-core:target ; . -kb:ContentDataFacet-cc26df85-d96b-5c0c-9aa3-10e3f9d45751 - uco-observable:mimeType "application/zip" ; +kb:Relationship-daf100bc-16b3-422f-9796-272edf7424cf + a uco-observable:ObservableRelationship ; + uco-core:isDirectional "true"^^xsd:boolean ; + uco-core:kindOfRelationship "Downloadable_From" ; + uco-core:source ; + uco-core:target ; . -kb:ContentDataFacet-e4062d00-0d77-5f6b-8822-07b519008dea - uco-observable:mimeType "text/plain" ; - . - -kb:ContentDataFacet-fada9f8b-4c79-5d24-ae23-c3dc1c032b8e - uco-observable:mimeType "application/zip" ; - . - - - a uco-observable:PDFFile ; + + a + drafting:S3Object , + uco-observable:PDFFile + ; + uco-core:hasFacet kb:ContentDataFacet-5cb9a74f-b21d-5cd7-8c09-80b1ee9f9089 ; . - - a uco-observable:ArchiveFile ; + + a + drafting:S3Object , + uco-observable:ArchiveFile + ; uco-core:hasFacet kb:ArchiveFileFacet-020a38b1-f370-4117-86b8-3aed63114f91 ; . - - a uco-observable:ArchiveFile ; - uco-core:hasFacet kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b ; + + a + drafting:S3Object , + uco-observable:ArchiveFile + ; + uco-core:hasFacet + kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b , + kb:ContentDataFacet-7e5b597a-dac8-5cdf-9737-84068812c4da + ; . - diff --git a/ontology/case-corpora.ttl b/ontology/case-corpora.ttl index e5c8b85..ea65e8c 100644 --- a/ontology/case-corpora.ttl +++ b/ontology/case-corpora.ttl @@ -173,6 +173,12 @@ drafting:Quality ; . +drafting:S3Object + a owl:Class ; + rdfs:subClassOf uco-observable:ObservableObject ; + rdfs:seeAlso ; + . + drafting:hasQuality a owl:ObjectProperty , diff --git a/src/digital_corpora_supplement_ttl.py b/src/digital_corpora_supplement_ttl.py index 962920e..f27839b 100644 --- a/src/digital_corpora_supplement_ttl.py +++ b/src/digital_corpora_supplement_ttl.py @@ -37,6 +37,7 @@ from rdflib import Graph, Literal, Namespace, URIRef NS_CASE_CORPORA = Namespace("http://example.org/ontology/case-corpora/") +NS_DRAFTING = Namespace("http://example.org/ontology/drafting/") RX_HEXBINARY = re.compile("^[0-9a-f]+$", re.IGNORECASE) @@ -73,6 +74,7 @@ def main() -> None: graph = Graph() graph.bind("case-corpora", NS_CASE_CORPORA) + graph.bind("drafting", NS_DRAFTING) graph.bind("kb", NS_KB) graph.bind("uco-core", NS_UCO_CORE) graph.bind("uco-observable", NS_UCO_OBSERVABLE) @@ -94,20 +96,27 @@ def main() -> None: continue if row["s3key"].endswith("~"): continue - n_downloadable_file = URIRef( - "https://digitalcorpora.s3.amazonaws.com/" - + urllib.parse.quote(row["s3key"]) + s3key_quoted = urllib.parse.quote(row["s3key"]) + n_s3_object = URIRef("s3://digitalcorpora/" + s3key_quoted) + n_download_url = URIRef( + "https://digitalcorpora.s3.amazonaws.com/" + s3key_quoted ) - if n_downloadable_file not in n_subjects: + if n_download_url not in n_subjects: continue - n_subjects.remove(n_downloadable_file) + n_subjects.remove(n_download_url) + graph.add((n_s3_object, NS_RDF.type, NS_DRAFTING.S3Object)) + graph.add((n_download_url, NS_RDF.type, NS_UCO_OBSERVABLE.URL)) graph.add( - (n_downloadable_file, NS_RDF.type, NS_CASE_CORPORA.DownloadableFile) + ( + n_s3_object, + NS_UCO_CORE.createdBy, + NS_KB["organization-72ec45c9-ea94-4503-9428-ad73300056f5"], + ) ) graph.add( ( - n_downloadable_file, + n_download_url, NS_UCO_CORE.createdBy, NS_KB["organization-72ec45c9-ea94-4503-9428-ad73300056f5"], ) @@ -117,41 +126,39 @@ def main() -> None: l_object_mtime = Literal( row["modified"].replace(" ", "T") + "Z", datatype=NS_XSD.dateTime ) - graph.add( - (n_downloadable_file, NS_UCO_CORE.modifiedTime, l_object_mtime) - ) + graph.add((n_s3_object, NS_UCO_CORE.modifiedTime, l_object_mtime)) n_content_data_facet = n_inherent_facet_for_node( - n_downloadable_file, NS_UCO_OBSERVABLE.ContentDataFacet, NS_KB + n_s3_object, NS_UCO_OBSERVABLE.ContentDataFacet, NS_KB ) graph.add( (n_content_data_facet, NS_RDF.type, NS_UCO_OBSERVABLE.ContentDataFacet) ) - graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_content_data_facet)) + graph.add((n_s3_object, NS_UCO_CORE.hasFacet, n_content_data_facet)) graph.add( ( n_content_data_facet, NS_UCO_OBSERVABLE.dataPayloadReferenceURL, - n_downloadable_file, + n_download_url, ) ) n_file_facet = n_inherent_facet_for_node( - n_downloadable_file, NS_UCO_OBSERVABLE.FileFacet, NS_KB + n_s3_object, NS_UCO_OBSERVABLE.FileFacet, NS_KB ) graph.add((n_file_facet, NS_RDF.type, NS_UCO_OBSERVABLE.FileFacet)) - graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_file_facet)) + graph.add((n_s3_object, NS_UCO_CORE.hasFacet, n_file_facet)) n_url_facet = n_inherent_facet_for_node( - n_downloadable_file, NS_UCO_OBSERVABLE.URLFacet, NS_KB + n_download_url, NS_UCO_OBSERVABLE.URLFacet, NS_KB ) graph.add((n_url_facet, NS_RDF.type, NS_UCO_OBSERVABLE.URLFacet)) - graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_url_facet)) + graph.add((n_download_url, NS_UCO_CORE.hasFacet, n_url_facet)) graph.add( ( n_url_facet, NS_UCO_OBSERVABLE.fullValue, - Literal(str(n_downloadable_file)), + Literal(str(n_download_url)), ) )