diff --git a/.devcontainer/onCreateCommand.sh b/.devcontainer/onCreateCommand.sh index c4bf0ea5c..bd69500a8 100755 --- a/.devcontainer/onCreateCommand.sh +++ b/.devcontainer/onCreateCommand.sh @@ -2,7 +2,7 @@ pip install --upgrade pip pip install wheel -pip install openvino-dev==2022.1.0 # [OPTIONAL] to generate optimized models for inference +pip install openvino-dev==2023.0.1 # [OPTIONAL] to generate optimized models for inference pip install mlcube_docker # [OPTIONAL] to deploy GaNDLF models as MLCube-compliant Docker containers pip install medmnist==2.1.0 pip install torch==1.13.1+cpu torchvision==0.14.1+cpu torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 592c8e342..5364dc8df 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -72,7 +72,7 @@ jobs: sudo apt-get install libvips libvips-tools -y python -m pip install --upgrade pip python -m pip install wheel - python -m pip install openvino-dev==2022.1.0 mlcube_docker + python -m pip install openvino-dev==2023.0.1 mlcube_docker pip install torch==1.13.1+cpu torchvision==0.14.1+cpu torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu pip install -e . - name: Run generic unit tests diff --git a/Dockerfile-CPU b/Dockerfile-CPU index 127ffda89..0b5221fa7 100644 --- a/Dockerfile-CPU +++ b/Dockerfile-CPU @@ -8,7 +8,7 @@ RUN apt-get update && apt-get install -y python3.8 python3-pip libjpeg8-dev zlib RUN python3.8 -m pip install --upgrade pip # EXPLICITLY install cpu versions of torch/torchvision (not all versions have +cpu modes on PyPI...) RUN python3.8 -m pip install torch==1.13.1+cpu torchvision==0.14.1+cpu torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cpu -RUN python3.8 -m pip install openvino-dev==2022.1.0 opencv-python-headless mlcube_docker +RUN python3.8 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker # Do some dependency installation separately here to make layer caching more efficient COPY ./setup.py ./setup.py diff --git a/Dockerfile-CUDA11.6 b/Dockerfile-CUDA11.6 index 05409bb47..0b8ed9c42 100644 --- a/Dockerfile-CUDA11.6 +++ b/Dockerfile-CUDA11.6 @@ -10,7 +10,7 @@ LABEL version=1.0 RUN apt-get update && apt-get install -y python3.8 python3-pip libjpeg8-dev zlib1g-dev python3-dev libpython3.8-dev libffi-dev libgl1 RUN python3.8 -m pip install --upgrade pip RUN python3.8 -m pip install torch==1.13.1+cu116 torchvision==0.14.1+cu116 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/cu116 -RUN python3.8 -m pip install openvino-dev==2022.1.0 opencv-python-headless mlcube_docker +RUN python3.8 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker # Do some dependency installation separately here to make layer caching more efficient COPY ./setup.py ./setup.py diff --git a/Dockerfile-ROCm b/Dockerfile-ROCm index 9cf8053fc..62e34cb81 100644 --- a/Dockerfile-ROCm +++ b/Dockerfile-ROCm @@ -8,7 +8,7 @@ LABEL version=1.0 # The base image contains ROCm, python 3.8 and pytorch already, no need to install those RUN python3 -m pip install --upgrade pip RUN python3.8 -m pip install torch==1.13.1+rocm5.2 torchvision==0.14.1+rocm5.2 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/rocm5.2 -RUN python3 -m pip install --upgrade pip && python3 -m pip install openvino-dev==2022.1.0 opencv-python-headless mlcube_docker +RUN python3 -m pip install --upgrade pip && python3 -m pip install openvino-dev==2023.0.1 opencv-python-headless mlcube_docker RUN apt-get update && apt-get install -y libgl1 # Do some dependency installation separately here to make layer caching more efficient diff --git a/GANDLF/cli/post_training_model_optimization.py b/GANDLF/cli/post_training_model_optimization.py index 65a3147b2..46a5ea6ff 100644 --- a/GANDLF/cli/post_training_model_optimization.py +++ b/GANDLF/cli/post_training_model_optimization.py @@ -4,7 +4,7 @@ from GANDLF.utils import version_check, load_model, optimize_and_save_model -def post_training_model_optimization(model_path, config_path): +def post_training_model_optimization(model_path: str, config_path: str) -> bool: """ CLI function to optimize a model for deployment. @@ -15,29 +15,32 @@ def post_training_model_optimization(model_path, config_path): Returns: bool: True if successful, False otherwise. """ - + # Load the model and its parameters from the given paths main_dict = load_model(model_path, "cpu") parameters = main_dict.get("parameters", None) + + # If parameters are not available in the model file, parse them from the config file parameters = ( parseConfig(config_path, version_check_flag=False) if parameters is None else parameters ) - ( - model, - _, - _, - _, - _, - parameters, - ) = create_pytorch_objects(parameters, device="cpu") + + # Create PyTorch objects and set onnx_export to True for optimization + model, _, _, _, _, parameters = create_pytorch_objects(parameters, device="cpu") parameters["model"]["onnx_export"] = True + # Perform version check and load the model's state dictionary version_check(parameters["version"], version_to_check=main_dict["version"]) model.load_state_dict(main_dict["model_state_dict"]) + + # Optimize the model and save it to an ONNX file optimize_and_save_model(model, parameters, model_path, onnx_export=True) + + # Check if the optimized model file exists optimized_model_path = model_path.replace("pth.tar", "onnx") if not os.path.exists(optimized_model_path): - print("Error while optimizing model.") + print("Error while optimizing the model.") return False + return True diff --git a/GANDLF/utils/modelio.py b/GANDLF/utils/modelio.py index fa1004852..e49c59cee 100644 --- a/GANDLF/utils/modelio.py +++ b/GANDLF/utils/modelio.py @@ -1,8 +1,12 @@ -import os, hashlib, pkg_resources, subprocess +import hashlib +import os +import subprocess +from typing import Any, Dict + import torch -from .generic import get_unique_timestamp from ..version import __version__ +from .generic import get_unique_timestamp # these are the base keys for the model dictionary to save model_dict_full = { @@ -31,35 +35,37 @@ def optimize_and_save_model(model, params, path, onnx_export=True): Perform post-training optimization and save it to a file. Args: - model (torch model): Trained torch model. + model (torch.nn.Module): Trained torch model. params (dict): The parameter dictionary. path (str): The path to save the model dictionary to. onnx_export (bool): Whether to export to ONNX and OpenVINO. """ + # Check if ONNX export is enabled in the parameter dictionary onnx_export = params["model"].get("onnx_export", onnx_export) - # check for incompatible topologies and disable onnx export - # customized imagenet_vgg no longer supported for onnx export: https://github.com/pytorch/pytorch/issues/42653 + + # Check for incompatible topologies and disable ONNX export + # Customized imagenet_vgg no longer supported for ONNX export if onnx_export: - if (params["model"]["architecture"] in ["sdnet", "brain_age"]) or ( - "imagenet_vgg" in params["model"]["architecture"] - ): + architecture = params["model"]["architecture"] + if architecture in ["sdnet", "brain_age"] or "imagenet_vgg" in architecture: onnx_export = False - if not (onnx_export): + if not onnx_export: + # Print a warning if ONNX export is disabled and not already warned if "onnx_print" not in params: print("WARNING: Current model is not supported by ONNX/OpenVINO!") params["onnx_print"] = True return else: try: - print("Optimizing best model.") + print("Optimizing the best model.") num_channel = params["model"]["num_channels"] model_dimension = params["model"]["dimension"] - ov_output_data_type = params["model"].get("data_type", "FP32") input_shape = params["patch_size"] onnx_path = path - if not (onnx_path.endswith(".onnx")): + if not onnx_path.endswith(".onnx"): onnx_path = onnx_path.replace("pth.tar", "onnx") + if model_dimension == 2: dummy_input = torch.randn( (1, num_channel, input_shape[0], input_shape[1]) @@ -69,6 +75,7 @@ def optimize_and_save_model(model, params, path, onnx_export=True): (1, num_channel, input_shape[0], input_shape[1], input_shape[2]) ) + # Export the model to ONNX format with torch.no_grad(): torch.onnx.export( model.to("cpu"), @@ -86,63 +93,57 @@ def optimize_and_save_model(model, params, path, onnx_export=True): print("WARNING: Cannot export to ONNX model.") return - # https://github.com/mlcommons/GaNDLF/issues/605 + # Check if OpenVINO is present and try to convert the ONNX model openvino_present = False try: - import openvino - - openvino_present = True + import openvino as ov + from openvino.tools.mo import convert_model + from openvino.runtime import get_version + openvino_present = False + # check for the correct openvino version to prevent inadvertent api breaks + if "2023.0.1" in get_version(): + openvino_present = True except ImportError: print("WARNING: OpenVINO is not present.") if openvino_present: + xml_path = onnx_path.replace("onnx", "xml") + bin_path = onnx_path.replace("onnx", "bin") try: if model_dimension == 2: - subprocess.call( - [ - "mo", - "--input_model", - "{0}".format(onnx_path), - "--input_shape", - "[1,{0},{1},{2}]".format( - num_channel, input_shape[0], input_shape[1] - ), - "--data_type", - "{0}".format(ov_output_data_type), - "--output_dir", - "{0}".format(ov_output_dir), - ], + ov_model = convert_model( + onnx_path, + input_shape=(1, num_channel, input_shape[0], input_shape[1]), ) else: - subprocess.call( - [ - "mo", - "--input_model", - "{0}".format(onnx_path), - "--input_shape", - "[1,{0},{1},{2},{3}]".format( - num_channel, - input_shape[0], - input_shape[1], - input_shape[2], - ), - "--data_type", - "{0}".format(ov_output_data_type), - "--output_dir", - "{0}".format(ov_output_dir), - ], + ov_model = convert_model( + onnx_path, + input_shape=( + 1, + num_channel, + input_shape[0], + input_shape[1], + input_shape[2], + ), ) - except subprocess.CalledProcessError: - print("WARNING: OpenVINO Model Optimizer IR conversion failed.") - - -def save_model(model_dict, model, params, path, onnx_export=True): + ov.runtime.serialize(ov_model, xml_path=xml_path, bin_path=bin_path) + except Exception as e: + print("WARNING: OpenVINO Model Optimizer IR conversion failed: " + e) + + +def save_model( + model_dict: Dict[str, Any], + model: torch.nn.Module, + params: Dict[str, Any], + path: str, + onnx_export: bool = True, +): """ Save the model dictionary to a file. Args: model_dict (dict): Model dictionary to save. - model (torch model): Trained torch model. + model (torch.nn.Module): Trained torch model. params (dict): The parameter dictionary. path (str): The path to save the model dictionary to. onnx_export (bool): Whether to export to ONNX and OpenVINO. @@ -153,6 +154,7 @@ def save_model(model_dict, model, params, path, onnx_export=True): ).hexdigest() model_dict["version"] = __version__ model_dict["parameters"] = params + try: model_dict["git_hash"] = ( subprocess.check_output(["git", "rev-parse", "HEAD"]) @@ -161,20 +163,23 @@ def save_model(model_dict, model, params, path, onnx_export=True): ) except subprocess.CalledProcessError: model_dict["git_hash"] = None + torch.save(model_dict, path) # post-training optimization optimize_and_save_model(model, params, path, onnx_export=onnx_export) -def load_model(path, device, full_sanity_check=True): +def load_model( + path: str, device: torch.device, full_sanity_check: bool = True +) -> Dict[str, Any]: """ Load a model dictionary from a file. Args: path (str): The path to save the model dictionary to. device (torch.device): The device to run the model on. - full_sanity_check (bool): Whether to run full sanity checking on model. + full_sanity_check (bool): Whether to run full sanity checking on the model. Returns: dict: Model dictionary containing model parameters and metadata. @@ -205,7 +210,7 @@ def load_model(path, device, full_sanity_check=True): return model_dict -def load_ov_model(path, device="CPU"): +def load_ov_model(path: str, device: str = "CPU"): """ Load an OpenVINO IR model from an .xml file. diff --git a/docs/setup.md b/docs/setup.md index b3b9eed36..ed15f014d 100644 --- a/docs/setup.md +++ b/docs/setup.md @@ -42,7 +42,7 @@ GaNDLF's primary computational foundation is built on PyTorch, and as such it su The following dependencies are optional, and are needed for specific features of GaNDLF. ```bash -(venv_gandlf) $> pip install openvino-dev==2022.1.0 # [OPTIONAL] to generate post-training optimized models for inference +(venv_gandlf) $> pip install openvino-dev==2023.0.1 # [OPTIONAL] to generate post-training optimized models for inference (venv_gandlf) $> pip install mlcube_docker # [OPTIONAL] to deploy GaNDLF models as MLCube-compliant Docker containers ```