Add block_size attribute for nf4 operator

PaddlePaddle · Nov 2, 2023 · a6f7404 · a6f7404
1 parent 3ce762b
commit a6f7404
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 4 deletions.
diff --git a/csrc/lc/nf4.cu b/csrc/lc/nf4.cu
@@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
     }
 }
 
-
-
-
 PD_BUILD_OP(quantize_nf4)
     .Inputs({"input"})
     .Outputs({"out", "abs_max"})
+    .Attrs({"block_size: int"})
     .SetKernelFn(PD_KERNEL(QuantizeNF4));
diff --git a/paddleslim/lc/quantizers/nf4.py b/paddleslim/lc/quantizers/nf4.py
@@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False):
         self.double_quant_scale = None
 
     def quantize(self, x: paddle.Tensor):
-        out, abs_max = paddleslim_ops.quantize_nf4(x)
+        out, abs_max = paddleslim_ops.quantize_nf4(
+            x, block_size=self.block_size)
         self.quant_scale = abs_max
         return out