Skip to content

Commit

Permalink
Set backup_mode to None for 8 bit WC
Browse files Browse the repository at this point in the history
  • Loading branch information
l-bat committed Oct 7, 2024
1 parent dc4525d commit 9f70ac5
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 3 deletions.
14 changes: 11 additions & 3 deletions nncf/quantization/quantize_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def compress_weights(
scale_estimation: Optional[bool] = None,
gptq: Optional[bool] = None,
lora_correction: Optional[bool] = None,
backup_mode: BackupMode = BackupMode.INT8_ASYM,
backup_mode: Optional[BackupMode] = None,
advanced_parameters: Optional[AdvancedCompressionParameters] = None,
) -> TModel:
"""
Expand Down Expand Up @@ -446,7 +446,7 @@ def compress_weights(
:type gptq: bool
:param lora_correction: Indicates whether to use Lora Correction algorithm.
:type lora_correction: bool
:param backup_mode: Defines a backup mode for mixed-precision weight compression. Defaults to INT8_ASYM.
:param backup_mode: Defines a backup mode for mixed-precision weight compression.
NONE stands for original floating-point precision of the model weights.
In this mode, weights are retained in their original precision without any quantization.
INT8_SYM stands for 8-bit integer symmetric quantization without zero point.
Expand Down Expand Up @@ -482,6 +482,9 @@ def compress_weights(
"Set them to None."
)

if backup_mode is not None:
raise AttributeError("Torch backend does not support backup_mode option.")

if is_wrapped_model(model):
if not model.nncf.trace_parameters:
raise ValueError(
Expand Down Expand Up @@ -509,6 +512,9 @@ def compress_weights(
f"but given {mode.value} mode."
)

if backup_mode is not None:
raise AttributeError("TorchFX backend does not support backup_mode option.")

if any((awq, scale_estimation, gptq, lora_correction)):
raise AttributeError(
"TorchFX backend does not support 'awq', 'scale_estimation', 'gptq',"
Expand Down Expand Up @@ -549,7 +555,7 @@ def compress_weights(
"Default values of `ratio` (1) and `group_size` (-1) parameters can not be overridden"
)

if backup_mode != BackupMode.INT8_ASYM:
if backup_mode is not None:
raise AttributeError("INT8 modes do not support the `backup_mode` option")

options = {
Expand Down Expand Up @@ -589,6 +595,8 @@ def compress_weights(
if dataset is None
else SensitivityMetric.MAX_ACTIVATION_VARIANCE
)
if backup_mode is None:
backup_mode = BackupMode.INT8_ASYM
if ratio != 1 and dataset is None and sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR:
raise AttributeError(
f"Mixed precision selection based on the given sensitivity metric={sensitivity_metric.value} requires "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@ def test_raise_error_channel_size_is_not_divisible_by_group_size():
{"gptq": True},
{"awq": True},
{"backup_mode": BackupMode.NONE},
{"backup_mode": BackupMode.INT8_ASYM},
{"backup_mode": BackupMode.INT8_SYM},
),
)
Expand Down
4 changes: 4 additions & 0 deletions tests/torch/fx/test_compress_weights.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import torch
from torch._export import capture_pre_autograd_graph

from nncf import BackupMode
from nncf import CompressWeightsMode
from nncf.common.factory import NNCFGraphFactory
from nncf.data.dataset import Dataset
Expand Down Expand Up @@ -208,6 +209,9 @@ def test_compress_weights_functional_model(mode):
{"scale_estimation": True},
{"lora_correction": True},
{"dataset": Dataset([1])},
{"backup_mode": BackupMode.NONE},
{"backup_mode": BackupMode.INT8_ASYM},
{"backup_mode": BackupMode.INT8_SYM},
),
)
def test_raise_error_with_unsupported_params_for_int8(mode, params):
Expand Down
4 changes: 4 additions & 0 deletions tests/torch/ptq/test_weights_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import torch
import torch.nn.functional as F

from nncf import BackupMode
from nncf import CompressWeightsMode
from nncf import SensitivityMetric
from nncf.quantization import compress_weights
Expand Down Expand Up @@ -214,6 +215,9 @@ def forward(self, input):
{"awq": True},
{"scale_estimation": True},
{"lora_correction": True},
{"backup_mode": BackupMode.NONE},
{"backup_mode": BackupMode.INT8_ASYM},
{"backup_mode": BackupMode.INT8_SYM},
),
)
def test_raise_error_with_unsupported_params_for_int8(mode, params):
Expand Down

0 comments on commit 9f70ac5

Please sign in to comment.