benchdnn: inputs: graph: dt rewrites for sdpa compressed kv cases

oneapi-src · Jan 10, 2025 · d480ae1 · d480ae1
1 parent 5a54e1b
commit d480ae1
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 4 deletions.
diff --git a/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all b/tests/benchdnn/inputs/graph/complex_fusion/harness_mha_all
@@ -54,3 +54,8 @@
 --reset --expected-n-partitions=0 --in-shapes=4:20x117x48x128+3:20x1x128x117+0:20x1x117x128 --case=complex_fusion/mha/MHA-starcoder-inf-int8-bs1.json
 --reset --expected-n-partitions=0 --in-shapes=4:32x16x384x64+3:32x16x64x384+0:32x16x384x64+1:32x1x1x384 --case=complex_fusion/mha/dynamic_quantized_mha-Bert_large-inf-int8-bs1-fake.json
 --reset --in-shapes=4:20x16x384x64+3:20x16x64x384+0:20x16x384x64+1:20x1x1x384 --case=complex_fusion/mha/sdpa-plain-wo-scale-int8-bs1.json
+
+# 0: key, 2: key zps, 6: value, 8: value zps. Change them to use s8 data type.
+--reset --dt=0:s8+2:s8+6:s8+8:s8 --case=complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json
+# Change group size to 128. It also affects the shapes of the scales and zps.
+--reset --dt=0:s8+2:s8+6:s8+8:s8 --op-attrs=0:group_shape:1x1x128x1+99:group_shape:1x1x1x128 --in-shapes=1:1x32x1x32+2:1x32x1x32+7:1x32x32x1+8:1x32x32x1 --case=complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json
diff --git a/tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json b/tests/benchdnn/inputs/graph/complex_fusion/mha/sdpa-compressed-kv-int4-gs32.json
@@ -19,7 +19,7 @@
     ],
     "graph": [
       {
-        "id": 34107656704,
+        "id": 0,
         "name": "aten::dequantize",
         "kind": "DynamicDequantize",
         "attrs": {
@@ -371,7 +371,7 @@
         ]
       },
       {
-        "id": 34107752448,
+        "id": 99,
         "name": "aten::dequantize",
         "kind": "DynamicDequantize",
         "attrs": {
@@ -545,5 +545,3 @@
       }
     ]
   }
-
-