[WC] Fix ratio_defining_params (#2653)

### Changes  ### Reason for changes Fix bug in `_get_ratio_defining_params` method ### Tests test_shared_gather_all_layers
openvinotoolkit · May 14, 2024 · d94c564 · d94c564
1 parent 8275d57
commit d94c564
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 6 deletions.
diff --git a/nncf/quantization/algorithms/weight_compression/algorithm.py b/nncf/quantization/algorithms/weight_compression/algorithm.py
@@ -175,28 +175,28 @@ def _get_ratio_defining_params(
         if self._mode in [CompressWeightsMode.INT8_SYM, CompressWeightsMode.INT8_ASYM]:
             return all_weight_params
 
-        if self._all_layers:
-            return list(filter(lambda wp: len(wp.reduction_axes) == 1, all_weight_params))
-
         ratio_defining_params = list(
             filter(
                 lambda wp: wp.node_with_weight.metatype in self._backend_entity.matmul_metatypes,
                 all_weight_params,
             )
         )
 
+        # The last MatMul layer is quantized to 4-bits if all_layers=True
+        if not self._all_layers and not is_last_layer_shared:
+            ratio_defining_params = ratio_defining_params[:-1]
+
         # Embedding layers are quantized to 4-bits only if all_layers=True.
         if self._all_layers:
             embedding_params = list(
                 filter(
-                    lambda wp: wp.node_with_weight.metatype in self._backend_entity.embedding_metatypes,
+                    lambda wp: wp.node_with_weight.metatype in self._backend_entity.embedding_metatypes
+                    and len(wp.reduction_axes) == 1,
                     all_weight_params,
                 )
             )
             ratio_defining_params.extend(embedding_params)
 
-        if not self._all_layers and not is_last_layer_shared:
-            ratio_defining_params = ratio_defining_params[:-1]
         return ratio_defining_params
 
     def _set_weight_compression_config(

diff --git a/tests/openvino/native/quantization/test_weights_compression.py b/tests/openvino/native/quantization/test_weights_compression.py
@@ -456,6 +456,21 @@ def test_shared_gather(mode):
             assert op.get_element_type() == weight_name_vs_type[op_name]
 
 
+@pytest.mark.parametrize("all_layers", (True, False))
+def test_shared_gather_all_layers(all_layers):
+    weight_name_vs_type = {
+        "gather_2_data": ov.Type.u4 if all_layers else ov.Type.u8,
+        "shared_data": ov.Type.u4 if all_layers else ov.Type.u8,
+        "matmul_1_data": ov.Type.u4,
+    }
+    model = GatherAndMatmulShareData().ov_model
+    compressed_model = compress_weights(model, CompressWeightsMode.INT4_ASYM, group_size=-1, all_layers=all_layers)
+    for op in compressed_model.get_ordered_ops():
+        op_name = op.get_friendly_name()
+        if op.get_type_name() == "Constant" and op_name in weight_name_vs_type:
+            assert op.get_element_type() == weight_name_vs_type[op_name]
+
+
 @dataclass
 class QuantErrorDesc:
     weight: List[float]