Skip to content

Commit

Permalink
Add block_size attribute for nf4 operator
Browse files Browse the repository at this point in the history
  • Loading branch information
wanghaoshuang committed Nov 2, 2023
1 parent 3ce762b commit a6f7404
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 4 deletions.
4 changes: 1 addition & 3 deletions csrc/lc/nf4.cu
Original file line number Diff line number Diff line change
Expand Up @@ -226,10 +226,8 @@ std::vector<paddle::Tensor> QuantizeNF4(const paddle::Tensor& input, int block_s
}
}




PD_BUILD_OP(quantize_nf4)
.Inputs({"input"})
.Outputs({"out", "abs_max"})
.Attrs({"block_size: int"})
.SetKernelFn(PD_KERNEL(QuantizeNF4));
3 changes: 2 additions & 1 deletion paddleslim/lc/quantizers/nf4.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ def __init__(self, block_size=64, double_quant=False):
self.double_quant_scale = None

def quantize(self, x: paddle.Tensor):
out, abs_max = paddleslim_ops.quantize_nf4(x)
out, abs_max = paddleslim_ops.quantize_nf4(
x, block_size=self.block_size)
self.quant_scale = abs_max
return out

Expand Down

0 comments on commit a6f7404

Please sign in to comment.