Skip to content

Commit

Permalink
Draft S3Object as artifact accessible from URL
Browse files Browse the repository at this point in the history
This patch defines a `observable:File`-like object to separate
`observable:URL`s from `observable:File`s, reflecting a design decision
from the UCO and CASE Ontology Committees related to UCO Issue 536.

To reflect new usage, the Digital Corpora supplemental-graph script is
adapted to now separate `Facet` assignment between `drafting:S3Object`s
and `observable:URL`s.  The Android 10 CASE supplemental graph has also
been updated to reflect the new `Facet` IRIs.

A follow-on patch will regenerate Make-managed files.

References:
* ucoProject/UCO#536

Signed-off-by: Alex Nelson <[email protected]>
  • Loading branch information
ajnelson-nist committed Jul 7, 2023
1 parent 668bad6 commit c2b25b2
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 41 deletions.
67 changes: 44 additions & 23 deletions catalog/datasets/digitalcorpora-android-10/supplemental.ttl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
@prefix drafting: <http://example.org/ontology/drafting/> .
@prefix kb: <http://example.org/kb/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
Expand All @@ -16,45 +17,65 @@ kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b
uco-observable:archiveType "ZIP" ;
.

kb:ContentDataFacet-61c34042-2f3e-51ab-8e03-149b56a9e31b
kb:ContentDataFacet-5cb9a74f-b21d-5cd7-8c09-80b1ee9f9089
uco-observable:mimeType "application/pdf" ;
.

kb:ContentDataFacet-7e5b597a-dac8-5cdf-9737-84068812c4da
rdfs:seeAlso <https://unifiedcyberontology.atlassian.net/browse/OC-116> ;
uco-observable:mimeType
"application/vnd.ms-excel" ,
"application/zip"
;
.

kb:ContentDataFacet-647e0c0d-ea3c-5198-b35a-03b8b6b55ce9
uco-observable:mimeType "text/plain" ;
kb:Relationship-4dda53b3-102d-4427-887d-0e7a49ba0cb1
a uco-observable:ObservableRelationship ;
uco-core:isDirectional "true"^^xsd:boolean ;
uco-core:kindOfRelationship "Downloadable_From" ;
uco-core:source <s3://digitalcorpora/corpora/mobile/android_10/Android10-ImageCreation.pdf> ;
uco-core:target <https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/Android10-ImageCreation.pdf> ;
.

kb:ContentDataFacet-7b1aad72-df7f-561b-a807-2e2f1cd896ef
uco-observable:mimeType "application/pdf" ;
kb:Relationship-7a0eda2d-598e-4e40-961f-01583226628d
a uco-observable:ObservableRelationship ;
uco-core:isDirectional "true"^^xsd:boolean ;
uco-core:kindOfRelationship "Downloadable_From" ;
uco-core:source <s3://digitalcorpora/corpora/mobile/android_10/Android_10.zip> ;
uco-core:target <https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/Android_10.zip> ;
.

kb:ContentDataFacet-cc26df85-d96b-5c0c-9aa3-10e3f9d45751
uco-observable:mimeType "application/zip" ;
kb:Relationship-daf100bc-16b3-422f-9796-272edf7424cf
a uco-observable:ObservableRelationship ;
uco-core:isDirectional "true"^^xsd:boolean ;
uco-core:kindOfRelationship "Downloadable_From" ;
uco-core:source <s3://digitalcorpora/corpora/mobile/android_10/SMS-Messages.xlsx> ;
uco-core:target <https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/SMS-Messages.xlsx> ;
.

kb:ContentDataFacet-e4062d00-0d77-5f6b-8822-07b519008dea
uco-observable:mimeType "text/plain" ;
.

kb:ContentDataFacet-fada9f8b-4c79-5d24-ae23-c3dc1c032b8e
uco-observable:mimeType "application/zip" ;
.

<https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/Android10-ImageCreation.pdf>
a uco-observable:PDFFile ;
<s3://digitalcorpora/corpora/mobile/android_10/Android10-ImageCreation.pdf>
a
drafting:S3Object ,
uco-observable:PDFFile
;
uco-core:hasFacet kb:ContentDataFacet-5cb9a74f-b21d-5cd7-8c09-80b1ee9f9089 ;
.

<https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/Android_10.zip>
a uco-observable:ArchiveFile ;
<s3://digitalcorpora/corpora/mobile/android_10/Android_10.zip>
a
drafting:S3Object ,
uco-observable:ArchiveFile
;
uco-core:hasFacet kb:ArchiveFileFacet-020a38b1-f370-4117-86b8-3aed63114f91 ;
.

<https://digitalcorpora.s3.amazonaws.com/corpora/mobile/android_10/SMS-Messages.xlsx>
a uco-observable:ArchiveFile ;
uco-core:hasFacet kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b ;
<s3://digitalcorpora/corpora/mobile/android_10/SMS-Messages.xlsx>
a
drafting:S3Object ,
uco-observable:ArchiveFile
;
uco-core:hasFacet
kb:ArchiveFileFacet-54f6d4e1-58de-4888-888b-0992d881a20b ,
kb:ContentDataFacet-7e5b597a-dac8-5cdf-9737-84068812c4da
;
.

6 changes: 6 additions & 0 deletions ontology/case-corpora.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ drafting:Quality
;
.

drafting:S3Object
a owl:Class ;
rdfs:subClassOf uco-observable:ObservableObject ;
rdfs:seeAlso <https://docs.aws.amazon.com/general/latest/gr/glos-chap.html#O> ;
.

drafting:hasQuality
a
owl:ObjectProperty ,
Expand Down
43 changes: 25 additions & 18 deletions src/digital_corpora_supplement_ttl.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from rdflib import Graph, Literal, Namespace, URIRef

NS_CASE_CORPORA = Namespace("http://example.org/ontology/case-corpora/")
NS_DRAFTING = Namespace("http://example.org/ontology/drafting/")

RX_HEXBINARY = re.compile("^[0-9a-f]+$", re.IGNORECASE)

Expand Down Expand Up @@ -73,6 +74,7 @@ def main() -> None:

graph = Graph()
graph.bind("case-corpora", NS_CASE_CORPORA)
graph.bind("drafting", NS_DRAFTING)
graph.bind("kb", NS_KB)
graph.bind("uco-core", NS_UCO_CORE)
graph.bind("uco-observable", NS_UCO_OBSERVABLE)
Expand All @@ -94,20 +96,27 @@ def main() -> None:
continue
if row["s3key"].endswith("~"):
continue
n_downloadable_file = URIRef(
"https://digitalcorpora.s3.amazonaws.com/"
+ urllib.parse.quote(row["s3key"])
s3key_quoted = urllib.parse.quote(row["s3key"])
n_s3_object = URIRef("s3://digitalcorpora/" + s3key_quoted)
n_download_url = URIRef(
"https://digitalcorpora.s3.amazonaws.com/" + s3key_quoted
)
if n_downloadable_file not in n_subjects:
if n_download_url not in n_subjects:
continue
n_subjects.remove(n_downloadable_file)
n_subjects.remove(n_download_url)

graph.add((n_s3_object, NS_RDF.type, NS_DRAFTING.S3Object))
graph.add((n_download_url, NS_RDF.type, NS_UCO_OBSERVABLE.URL))
graph.add(
(n_downloadable_file, NS_RDF.type, NS_CASE_CORPORA.DownloadableFile)
(
n_s3_object,
NS_UCO_CORE.createdBy,
NS_KB["organization-72ec45c9-ea94-4503-9428-ad73300056f5"],
)
)
graph.add(
(
n_downloadable_file,
n_download_url,
NS_UCO_CORE.createdBy,
NS_KB["organization-72ec45c9-ea94-4503-9428-ad73300056f5"],
)
Expand All @@ -117,41 +126,39 @@ def main() -> None:
l_object_mtime = Literal(
row["modified"].replace(" ", "T") + "Z", datatype=NS_XSD.dateTime
)
graph.add(
(n_downloadable_file, NS_UCO_CORE.modifiedTime, l_object_mtime)
)
graph.add((n_s3_object, NS_UCO_CORE.modifiedTime, l_object_mtime))

n_content_data_facet = n_inherent_facet_for_node(
n_downloadable_file, NS_UCO_OBSERVABLE.ContentDataFacet, NS_KB
n_s3_object, NS_UCO_OBSERVABLE.ContentDataFacet, NS_KB
)
graph.add(
(n_content_data_facet, NS_RDF.type, NS_UCO_OBSERVABLE.ContentDataFacet)
)
graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_content_data_facet))
graph.add((n_s3_object, NS_UCO_CORE.hasFacet, n_content_data_facet))
graph.add(
(
n_content_data_facet,
NS_UCO_OBSERVABLE.dataPayloadReferenceURL,
n_downloadable_file,
n_download_url,
)
)

n_file_facet = n_inherent_facet_for_node(
n_downloadable_file, NS_UCO_OBSERVABLE.FileFacet, NS_KB
n_s3_object, NS_UCO_OBSERVABLE.FileFacet, NS_KB
)
graph.add((n_file_facet, NS_RDF.type, NS_UCO_OBSERVABLE.FileFacet))
graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_file_facet))
graph.add((n_s3_object, NS_UCO_CORE.hasFacet, n_file_facet))

n_url_facet = n_inherent_facet_for_node(
n_downloadable_file, NS_UCO_OBSERVABLE.URLFacet, NS_KB
n_download_url, NS_UCO_OBSERVABLE.URLFacet, NS_KB
)
graph.add((n_url_facet, NS_RDF.type, NS_UCO_OBSERVABLE.URLFacet))
graph.add((n_downloadable_file, NS_UCO_CORE.hasFacet, n_url_facet))
graph.add((n_download_url, NS_UCO_CORE.hasFacet, n_url_facet))
graph.add(
(
n_url_facet,
NS_UCO_OBSERVABLE.fullValue,
Literal(str(n_downloadable_file)),
Literal(str(n_download_url)),
)
)

Expand Down

0 comments on commit c2b25b2

Please sign in to comment.