[Docs][NNCF] TorchFX backend docs

openvinotoolkit · Oct 10, 2024 · 6eee583 · 6eee583
1 parent 7d5b7cf
commit 6eee583
Show file tree

Hide file tree

Showing 2 changed files with 75 additions and 0 deletions.
diff --git a/...-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst b/...-optimization-guide/quantizing-models-post-training/basic-quantization-flow.rst
@@ -63,6 +63,13 @@ The transformation function is a function that takes a sample from the dataset a
          :language: python
          :fragment: [dataset]
 
+   .. tab-item:: TorchFX
+      :sync: torch_fx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py
+         :language: python
+         :fragment: [dataset]
+
 If there is no framework dataset object, you can create your own entity that implements the ``Iterable`` interface in Python, for example the list of images, and returns data samples feasible for inference. In this case, a transformation function is not required.
 
 
@@ -102,6 +109,12 @@ See the `example section <#examples-of-how-to-apply-nncf-post-training-quantizat
          :language: python
          :fragment: [quantization]
 
+   .. tab-item:: TorchFX
+      :sync: torch_fx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py
+         :language: python
+         :fragment: [quantization]
 
 After that the model can be converted into the OpenVINO Intermediate Representation (IR) if needed, compiled and run with OpenVINO.
 If you have not already installed OpenVINO developer tools, install it with ``pip install openvino``.
@@ -136,6 +149,13 @@ If you have not already installed OpenVINO developer tools, install it with ``pi
          :language: python
          :fragment:  [inference]
 
+   .. tab-item:: TorchFX
+      :sync: torch_fx
+
+      .. doxygensnippet:: docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py
+         :language: python
+         :fragment:  [inference]
+
 Tune quantization parameters
 ############################
 

diff --git a/docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py b/docs/optimization_guide/nncf/ptq/code/ptq_torch_fx.py
@@ -0,0 +1,55 @@
+# Copyright (C) 2018-2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+#! [dataset]
+import nncf
+import torch
+
+calibration_loader = torch.utils.data.DataLoader(...)
+
+def transform_fn(data_item):
+    images, _ = data_item
+    return images
+
+calibration_dataset = nncf.Dataset(calibration_loader, transform_fn)
+#! [dataset]
+
+#! [quantization]
+import torchvision
+import openvino.torch
+model = torchvision.models.resnet50(pretrained=True)
+
+input_fp32 = ... # FP32 model input
+
+with nncf.torch.disable_patching():
+    exported_model = torch.export.export(model, args=(input_fp32,))
+    quantized_model = nncf.quantize(model, calibration_dataset)
+#! [quantization]
+
+#! [inference]
+import openvino as ov
+
+input_fp32 = ... # FP32 model input
+
+# compile quantized model using torch.compile API
+with nncf.torch.disable_patching():
+    compiled_model_int8 = torch.compile(quantized_model, backend="openvino")
+    # First call compiles an OpenVino model underneath, so it could take longer
+    # than original model call.
+    res = compiled_model_int8(input_fp32)
+    ...
+
+
+# convert exported Torch model to OpenVINO model
+with nncf.torch.disable_patching():
+    exported_quantized_model = torch.export.export(quantized_model, args=(input_fp32,))
+    ov_quantized_model = ov.convert_model(quantized_model, example_input=input_fp32)
+
+# compile the model to transform quantized operations to int8
+model_int8 = ov.compile_model(ov_quantized_model)
+
+res = model_int8(input_fp32)
+
+# save the model
+ov.save_model(ov_quantized_model, "quantized_model.xml")
+#! [inference]