Skip to content

Commit

Permalink
Add tinyllama_awq_backup_mode_none_backend_OV test
Browse files Browse the repository at this point in the history
  • Loading branch information
l-bat committed Sep 25, 2024
1 parent b14f966 commit c038ae2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
26 changes: 15 additions & 11 deletions nncf/quantization/algorithms/weight_compression/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ def _get_bitwidth_distribution_str(
dtype_vs_num_weights_map = {}
ratio_defining_weight_names = set(wp.weight_name for wp in ratio_defining_params)
for data in all_params:
dtype = data.compression_config.mode if data.compression_config is not None else "fp16/fp32"
dtype = data.compression_config.mode if data.compression_config is not None else "float"
n_total, n_ratio_defining = dtype_vs_num_weights_map.get(dtype, ([], []))
if data.weight_name in ratio_defining_weight_names:
n_ratio_defining.append(data.num_weights)
Expand Down Expand Up @@ -313,12 +313,8 @@ def apply(
dataset: Optional[Dataset] = None,
) -> TModel:
self._set_backend_entity(model)
# nodes_to_compress includes nodes from the ignored scope to be added to bitwidth_distribution_str
nodes_to_compress = self._get_nodes_to_compress(graph)

activations = {}
if dataset is not None and self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR:
activations = self._get_activations(dataset, self._subset_size, nodes_to_compress, graph, model)
# candidates_to_compress includes nodes from the ignored scope to be added to bitwidth_distribution_str
candidates_to_compress = self._get_nodes_to_compress(graph)
all_weight_params: List[WeightCompressionParameters] = []
weight_names = set()

Expand All @@ -328,8 +324,8 @@ def apply(
ignored_scope_weight_params: List[WeightCompressionParameters] = []

is_last_layer_shared = False
n = len(nodes_to_compress)
for i, node in enumerate(nodes_to_compress):
n = len(candidates_to_compress)
for i, node in enumerate(candidates_to_compress):
for weight_name, weight_port_id in self._backend_entity.get_weight_names_and_port_ids(node, graph):
if weight_name in weight_names:
if i == n - 1:
Expand Down Expand Up @@ -383,15 +379,23 @@ def apply(
all_weight_params.append(weight_params)
weight_names.add(weight_name)

activations = {}
nodes_to_compress = [node for node in candidates_to_compress if node.node_name not in ignored_names]
if dataset is not None and self._sensitivity_metric != SensitivityMetric.WEIGHT_QUANTIZATION_ERROR:
activations = self._get_activations(dataset, self._subset_size, nodes_to_compress, graph, model)

ratio_defining_params = self._get_ratio_defining_params(all_weight_params, is_last_layer_shared)
self._set_weight_compression_config(ratio_defining_params, model, graph, activations)
nncf_logger.info(
self._get_bitwidth_distribution_str(all_weight_params + ignored_scope_weight_params, ratio_defining_params)
)
nodes_names_to_exclude = {
w_params.node_with_weight.node_name for w_params in all_weight_params if w_params.compression_config is None
}
# Filter the weight parameters that should remain in their original floating-point precision
all_weight_params = [w_params for w_params in all_weight_params if w_params.compression_config is not None]
# Remove nodes in the ignored scope from nodes_to_compress
nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in ignored_names]
# Filter nodes_to_compress by excluding nodes that should remain in their original floating-point precision
nodes_to_compress = [node for node in nodes_to_compress if node.node_name not in nodes_names_to_exclude]

if self._awq and activations is not None and self._mode != CompressWeightsMode.E2M1:
awq_params = self._advanced_parameters.awq_params
Expand Down
6 changes: 5 additions & 1 deletion tests/post_training/data/wc_reference_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,8 @@ tinyllama_data_aware_lora_stateful_backend_OV:
tinyllama_NF4_scale_estimation_stateful_per_channel_backend_OV:
metric_value: 0.88663
num_int4: 11
num_int8: 290
num_int8: 290
tinyllama_awq_backup_mode_none_backend_OV:
metric_value: 0.85679
num_int4: 208
num_int8: 0
16 changes: 16 additions & 0 deletions tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import nncf
from nncf import ModelType
from nncf import QuantizationPreset
from nncf.parameters import BackupMode
from nncf.parameters import CompressWeightsMode
from nncf.parameters import SensitivityMetric
from nncf.quantization.advanced_parameters import AdvancedCompressionParameters
Expand Down Expand Up @@ -498,6 +499,21 @@
"params": {"is_stateful": True},
"backends": [BackendType.OV],
},
{
"reported_name": "tinyllama_awq_backup_mode_none",
"model_id": "tinyllama/tinyllama-1.1b-step-50k-105b",
"pipeline_cls": LMWeightCompression,
"compression_params": {
"group_size": 64,
"ratio": 0.8,
"all_layers": True,
"backup_mode": BackupMode.NONE,
"mode": CompressWeightsMode.INT4_ASYM,
"awq": True,
"ignored_scope": nncf.IgnoredScope(types=["Gather"]),
},
"backends": [BackendType.OV],
},
]


Expand Down

0 comments on commit c038ae2

Please sign in to comment.