Skip to content

Commit

Permalink
Pick up migrations of some objects to case_utils.inherent_uuid
Browse files Browse the repository at this point in the history
No effects were observed on Make-managed files.

Signed-off-by: Alex Nelson <[email protected]>
  • Loading branch information
ajnelson-nist committed Nov 14, 2023
1 parent b336c93 commit 21e85ce
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 47 deletions.
50 changes: 5 additions & 45 deletions src/case_utils_extras.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,58 +22,18 @@
date 2004-03-04) and did not have a linked superseding document.
"""

import binascii
import re
import uuid
from typing import Dict, Optional, Tuple

from case_utils.namespace import NS_UCO_VOCABULARY, NS_XSD
from rdflib import Literal, Namespace, URIRef


L_MD5 = Literal("MD5", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SHA1 = Literal("SHA1", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SHA256 = Literal("SHA256", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SHA3_256 = Literal("SHA3-256", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SHA384 = Literal("SHA384", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SHA512 = Literal("SHA512", datatype=NS_UCO_VOCABULARY.HashNameVocab)
L_SSDEEP = Literal("SSDEEP", datatype=NS_UCO_VOCABULARY.HashNameVocab)

# Key: hashMethod literal.
# Value: Tuple.
# * Lowercase spelling
# TODO - SHA3-256 is used by Digital Corpora, impacting CASE-Corpora, but the vocabulary string is not yet in UCO.
# https://github.com/ucoProject/UCO/issues/526
HASH_METHOD_CASTINGS: Dict[Literal, Tuple[str, Optional[int]]] = {
L_MD5: ("md5", 32),
L_SHA1: ("sha1", 40),
L_SHA256: ("sha256", 64),
L_SHA3_256: ("sha3-256", 64),
L_SHA384: ("sha384", 96),
L_SHA512: ("sha512", 128),
L_SSDEEP: ("ssdeep", None),
}

RX_UUID = re.compile(
"[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE
from case_utils.inherent_uuid import (
RX_UUID,
hash_method_value_uuid,
)
from rdflib import Literal, Namespace, URIRef


def method_value_to_node(
l_hash_method: Literal, l_hash_value: Literal, ns_kb: Namespace
) -> URIRef:
if l_hash_value.datatype != NS_XSD.hexBinary:
raise ValueError("Expected hexBinary datatype for l_hash_value.")
hash_value_str: str = binascii.hexlify(l_hash_value.toPython()).decode().lower()

hash_method_str = HASH_METHOD_CASTINGS[l_hash_method][0]

urn_template = "urn:hash::%s:%s"
urn_populated = urn_template % (hash_method_str, hash_value_str)

hash_uuid = str(uuid.uuid5(uuid.NAMESPACE_URL, urn_populated))
hash_uuid = str(hash_method_value_uuid(l_hash_method, l_hash_value))
n_hash = ns_kb["Hash-" + hash_uuid]

return n_hash


Expand Down
4 changes: 2 additions & 2 deletions src/digital_corpora_supplement_ttl.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
NS_UCO_VOCABULARY,
NS_XSD,
)
from case_utils_extras import (
from case_utils.inherent_uuid import (
L_SHA256,
L_SHA3_256,
method_value_to_node,
)
from case_utils_extras import method_value_to_node
from rdflib import Graph, Literal, Namespace, URIRef

NS_CASE_CORPORA = Namespace("http://example.org/ontology/case-corpora/")
Expand Down

0 comments on commit 21e85ce

Please sign in to comment.