Skip to content

Commit

Permalink
fix: platform specific prefixes in S3 URIs and MLflow tests
Browse files Browse the repository at this point in the history
Addresses an issue where S3 key prefixes followed the platform's path conventions causing tests that validate S3 URIs to fail. The fix is to normalize S3 prefixes to follow POSIX path conventions using Path.as_posix() from pathlib.

Adds a github action for unit tests on a windows-2022 image. test_mlflow.py was updated to use pathlib instead of os.path to fix cross-platform issues with the tests.
  • Loading branch information
keithmanville committed Aug 21, 2023
1 parent f703620 commit 3f56807
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 14 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/tox-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,10 @@ jobs:
run: python3 -m tox run -e ${{ matrix.tox-testenv }}

unit-tests:
runs-on: ubuntu-20.04
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: ["ubuntu-20.04", "windows-2022"]
python-version: ["3.9", "3.10"]
tox-testenv:
- "clean,py39-pytest-cov,report"
Expand All @@ -91,6 +92,7 @@ jobs:
- uses: actions/checkout@v3

- name: install English words dictionary
if: ${{ contains(matrix.os, 'ubuntu') }}
run: sudo apt install -y wamerican

- name: setup python ${{ matrix.python-version }}
Expand All @@ -106,6 +108,7 @@ jobs:
- name: get pip cache dir
id: pip-cache
run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
shell: bash

- name: cache dependencies
uses: actions/[email protected]
Expand Down
10 changes: 7 additions & 3 deletions src/dioptra/restapi/task_plugin/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def create(
plugin_uri_list: Optional[List[str]] = self._s3_service.upload_directory(
directory=tmpdir,
bucket=bucket,
prefix=str(prefix),
prefix=prefix.as_posix(),
include_suffixes=[".py"],
log=log,
)
Expand Down Expand Up @@ -114,7 +114,11 @@ def delete(
return []

prefix: Path = Path(collection) / task_plugin_name
self._s3_service.delete_prefix(bucket=bucket, prefix=str(prefix), log=log)
self._s3_service.delete_prefix(
bucket=bucket,
prefix=prefix.as_posix(),
log=log,
)

log.info(
"TaskPlugin deleted",
Expand Down Expand Up @@ -175,7 +179,7 @@ def get_by_name_in_collection(
task_plugin_name=task_plugin_name,
)

prefix = Path(collection) / task_plugin_name
prefix: Path = Path(collection) / task_plugin_name
modules: List[str] = self._s3_service.list_objects(
bucket=bucket,
prefix=self._s3_service.normalize_prefix(str(prefix), log=log),
Expand Down
17 changes: 7 additions & 10 deletions tests/unit/task_plugins/dioptra_builtins/artifacts/test_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
# https://creativecommons.org/licenses/by/4.0/legalcode
from __future__ import annotations

import os
import uuid
from copy import deepcopy
from pathlib import Path
Expand Down Expand Up @@ -48,7 +47,7 @@ def download_artifacts(
) -> str:
if dst_path is None:
dst_path = "tmp_unit_test"
dst_path = os.path.abspath(dst_path)
dst_path = str(Path(dst_path).absolute())
dst_local_path = dst_path
return dst_local_path

Expand All @@ -73,7 +72,7 @@ def create_run(self, experiment_id, start_time=56020, tags=None, run_name=None):
if not run_name_tag:
tags.append(RunTag(key=MLFLOW_RUN_NAME, value=run_name))
run_uuid = uuid.uuid4().hex
artifact_uri = os.path.join("/path/to/artifacts/", run_uuid, "artifacts")
artifact_uri = Path("/path/to/artifacts/") / run_uuid / "artifacts"

run_info = RunInfo(
run_uuid=run_uuid,
Expand Down Expand Up @@ -157,7 +156,7 @@ def test_download_all_artifacts_in_run(

dst_path = download_all_artifacts_in_run(run_id, artifact_path, destination_path)
assert isinstance(dst_path, str)
assert destination_path == os.path.relpath(dst_path)
assert Path(destination_path) == Path(dst_path).relative_to(Path.cwd())


@pytest.mark.parametrize(
Expand Down Expand Up @@ -199,10 +198,8 @@ def test_upload_data_frame_artifact(

upload_data_frame_artifact(data_frame, file_name, file_format, None, working_dir)

pwd = "." if working_dir is None else working_dir
assert os.path.isfile(
Path(os.path.abspath(pwd)) / Path(file_name).with_suffix("." + output)
)
pwd = Path("." if working_dir is None else working_dir).absolute()
assert (pwd / Path(file_name).with_suffix("." + output)).is_file()


@pytest.mark.parametrize(
Expand Down Expand Up @@ -235,8 +232,8 @@ def test_upload_directory_as_tarball_artifact(
upload_directory_as_tarball_artifact(
source_dir, tarball_filename, tarball_write_mode, working_dir
)
pwd = "." if working_dir is None else working_dir
assert os.path.isfile(Path(os.path.abspath(pwd)) / Path(tarball_filename))
pwd = Path("." if working_dir is None else working_dir).absolute()
assert (pwd / tarball_filename).is_file()


@pytest.mark.parametrize(
Expand Down

0 comments on commit 3f56807

Please sign in to comment.