Skip to content

Commit

Permalink
Resolves #806.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Aug 9, 2024
1 parent ab4ec73 commit 8117024
Show file tree
Hide file tree
Showing 10 changed files with 79 additions and 14 deletions.
Empty file.
Empty file.
Empty file.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Empty file.
Empty file.
Empty file.
Empty file.
42 changes: 40 additions & 2 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,7 +1522,7 @@ def test_dictionaries(self):

class TestMimeTypeFunctions(unittest.TestCase):
def test_mimeypes_from_extensions(self):
config = dict()
config = dict({"input_dir": "."})
fixtures = [
{
"file_path": "tests/assets/mime_type_test/test.tXt",
Expand All @@ -1540,6 +1540,14 @@ def test_mimeypes_from_extensions(self):
"file_path": "tests/assets/mime_type_test/testtest",
"mime_type": None,
},
{
"file_path": "tests/assets/mime_type_test/test.jpg",
"mime_type": "image/jpeg",
},
{
"file_path": "tests/assets/mime_type_test/test.xml",
"mime_type": "application/xml",
},
]

for fixture in fixtures:
Expand All @@ -1549,7 +1557,7 @@ def test_mimeypes_from_extensions(self):
self.assertEqual(fixture["mime_type"], mimetype)

def test_mimeypes_from_extensions_lazy(self):
config = dict()
config = dict({"input_dir": "."})
fixtures = [
{
"file_path": "tests/assets/mime_type_test/test.txt",
Expand All @@ -1571,6 +1579,36 @@ def test_mimeypes_from_extensions_lazy(self):
)
self.assertEqual(fixture["mime_type"], mimetype)

def test_mimeypes_from_extensions_with_configs(self):
extensions_to_mimetypes = collections.OrderedDict()
extensions_to_mimetypes["txt"] = "foo/bar"
extensions_to_mimetypes["xml"] = "foo/xml"
config = {"input_dir": ".", "extensions_to_mimetypes": extensions_to_mimetypes}
fixtures = [
{
"file_path": "tests/assets/mime_type_test/test.txt",
"mime_type": "foo/bar",
},
{
"file_path": "tests/assets/mime_type_test/test.101910",
"mime_type": None,
},
{
"file_path": "tests/assets/mime_type_test/test.hocr",
"mime_type": "text/vnd.hocr+html",
},
{
"file_path": "tests/assets/mime_type_test/test.xml",
"mime_type": "foo/xml",
},
]

for fixture in fixtures:
mimetype = workbench_utils.get_mimetype_from_extension(
config, fixture["file_path"]
)
self.assertEqual(fixture["mime_type"], mimetype)


if __name__ == "__main__":
unittest.main()
51 changes: 39 additions & 12 deletions workbench_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5137,13 +5137,28 @@ def create_media(
],
}

# WIP on #806. Since our only use case so far for assigning a non-standard/uncommon MIME
# type is for .hocr files, we will reuse the 'paged_content_additional_page_media'
# config setting to hard code logic for that file type. If we need a more generalized
# solution later, we can add it.
# print("DEBUG filename, media_use_tid", filename, media_use_tid)
# if media_use_tids[0] == "920":
# media_json.update({"field_mime_type": [{"value": "text/vnd.hocr+html"}]})
# Use the 'paged_content_additional_page_media' config setting to determine
# if any hOCR files are being added, since we need to explicitly define hOCR
# media's MIME type as "text/vnd.hocr+html".
file_is_hocr = False
if "paged_content_additional_page_media" in config:
file_mimetype = get_mimetype_from_extension(config, filename)
for uri_to_extension_mapping in config[
"paged_content_additional_page_media"
]:
if (
"https://discoverygarden.ca/use#hocr"
in uri_to_extension_mapping
):
file_is_hocr = True

if file_is_hocr is True:
media_use_uri = get_term_uri(config, media_use_tids[0])
if (
media_use_uri == "https://discoverygarden.ca/use#hocr"
and file_mimetype == "text/vnd.hocr+html"
):
media_json.update({"field_mime_type": [{"value": file_mimetype}]})

if "published" in csv_row and len(csv_row["published"]) > 0:
media_json["status"] = {"value": csv_row["published"]}
Expand Down Expand Up @@ -9047,22 +9062,30 @@ def get_extension_from_mimetype(config, mimetype):
return None


def get_mimetype_from_extension(config, filepath, lazy=False):
def get_mimetype_from_extension(config, file_path, lazy=False):
"""For a given file path, return the corresponding MIME type."""
"""Parameters
----------
config : dict
The configuration settings defined by workbench_config.get_config().
filepath: string
The 'extensions_to_mimetypes' setting allows assignment of MIME types
in config.
file_path: string
The path to the local file to get the MIME type for.
lazy: bool
If True, and no entry for a given extension exists in the map, return
"application/octet-stream".
"application/octet-stream" as a default if non MIME type can be determined.
If False, let Python's mimetypes library guess.
Returns
-------
string|None
The MIME type, or None if the MIME type can be determined.
"""
if os.path.isabs(file_path) is True:
filepath = file_path
else:
filepath = os.path.join(config["input_dir"], file_path)

if os.path.exists(filepath):
root, ext = os.path.splitext(filepath)
ext = ext.lstrip(".").lower()
Expand All @@ -9072,13 +9095,17 @@ def get_mimetype_from_extension(config, filepath, lazy=False):
)
return None

# A MIME type used in Islandora but not recognized by Python's mimetypes library.
map = {"hocr": "text/vnd.hocr+html"}

# Modify the map as per config.
if (
"extensions_to_mimetypes" in config
and len(config["extensions_to_mimetypes"]) > 0
):
for mtype, ext in config["extensions_to_mimetypes"].items():
map[ext] = mtype
for extension, mtype in config["extensions_to_mimetypes"].items():
extension = extension.lstrip(".").lower()
map[extension] = mtype

if ext in map:
return map[ext]
Expand Down

0 comments on commit 8117024

Please sign in to comment.