Fix FOAK dequant for compatibility with local gptq package

foundation-model-stack · Jul 11, 2024 · 1cf8811 · 1cf8811
1 parent 0858912
commit 1cf8811
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 1 deletion.
diff --git a/.../fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py b/.../fused-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/fast_lora.py
@@ -98,6 +98,9 @@ def get_lora_parameters(proj):
     base_layer = proj.base_layer if hasattr(proj, "base_layer") else proj
     qstate = extract_gptq_state(base_layer)
 
+    if base_layer.__module__.startswith("auto_gptq"):
+        setattr(qstate.qzeros, "requires_offset", True)
+
     if not hasattr(proj, "disable_adapters") or proj.disable_adapters or proj.merged:
         return qstate, None, None, None, None
 

diff --git a/...d-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/triton/kernels.py b/...d-ops-and-kernels/src/fms_acceleration_foak/fused_ops/unsloth_lora/gptq/triton/kernels.py
@@ -110,7 +110,10 @@ def dequant_kernel_248(
     zeros = zeros & maxq
 
     # Dequantize
-    zeros = zeros + 1
+    # false if using local gptqpackage, official autogptq should be true
+    if getattr(qzeros_ptr, "requires_offset", False):
+        zeros = zeros + 1
+
     weights = weights - zeros
     weights = weights.to(tl.float32)
     weights = scales * weights