From a65e36e3df286e14f58bef0d5c45acabcc4ffb9a Mon Sep 17 00:00:00 2001 From: Daniil Lyakhov Date: Thu, 21 Nov 2024 04:36:17 -0800 Subject: [PATCH] [Docs][NNCF] TorchFX backend docs (#26997) https://openvino-doc.iotg.sclab.intel.com/nncf-docs/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.html ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --------- Co-authored-by: Roman Kazantsev Co-authored-by: Alexander Suslov --- .../basic-quantization-flow.rst | 24 ++++++++++ .../nncf/ptq/code/ptq_torch_fx.py | 44 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst index b4f31daedfa3e4..62c10e52266ec9 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst @@ -63,6 +63,13 @@ The transformation function is a function that takes a sample from the dataset a :language: python :fragment: [dataset] + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [dataset] + If there is no framework dataset object, you can create your own entity that implements the ``Iterable`` interface in Python, for example the list of images, and returns data samples feasible for inference. In this case, a transformation function is not required. @@ -102,6 +109,12 @@ See the `example section <#examples-of-how-to-apply-nncf-post-training-quantizat :language: python :fragment: [quantization] + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [quantization] After that the model can be converted into the OpenVINO Intermediate Representation (IR) if needed, compiled and run with OpenVINO. If you have not already installed OpenVINO developer tools, install it with ``pip install openvino``. @@ -136,6 +149,17 @@ If you have not already installed OpenVINO developer tools, install it with ``pi :language: python :fragment: [inference] +TorchFX models can utilize OpenVINO optimizations using `torch.compile(..., backend="openvino") `__ functionality: + +.. tab-set:: + + .. tab-item:: TorchFX + :sync: torch_fx + + .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py + :language: python + :fragment: [inference] + Tune quantization parameters ############################ diff --git a/docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py b/docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py new file mode 100644 index 00000000000000..b8f76304099ae6 --- /dev/null +++ b/docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py @@ -0,0 +1,44 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +#! [dataset] +import nncf +import torch + +calibration_loader = torch.utils.data.DataLoader(...) + +def transform_fn(data_item): + images, _ = data_item + return images + +calibration_dataset = nncf.Dataset(calibration_loader, transform_fn) +#! [dataset] + +#! [quantization] +import torchvision +from nncf.torch import disable_patching + +input_fp32 = torch.ones((1, 3, 224, 224)) # FP32 model input +model = torchvision.models.resnet50(pretrained=True) + +with disable_patching(): + exported_model = torch.export.export_for_training(model, args=(input_fp32,)).module() + quantized_model = nncf.quantize(exported_model, calibration_dataset) +#! [quantization] + +#! [inference] +import openvino.torch + +input_fp32 = ... # FP32 model input + +# compile quantized model using torch.compile API +with disable_patching(): + compiled_model_int8 = torch.compile(quantized_model, backend="openvino") + # OpenVINO backend compiles the model during the first call, + # so the first call is expected to be slower than the following calls + res = compiled_model_int8(input_fp32) + + # save the model + exported_program = torch.export.export(quantized_model, args=(input_fp32,)) + torch.export.save(exported_program, 'exported_program.pt2') +#! [inference]