From 659b177f9531ad582349cabcddbcacd2b0e6545a Mon Sep 17 00:00:00 2001 From: ssbuild <462304@qq.com> Date: Sat, 5 Aug 2023 04:16:41 +0800 Subject: [PATCH] fix qwen quantization Signed-off-by: ssbuild <462304@qq.com> --- src/deep_training/nlp/models/baichuan/modeling_baichuan.py | 2 +- src/deep_training/nlp/models/baichuan2/modeling_baichuan.py | 2 +- src/deep_training/nlp/models/chatglm/__init__.py | 2 +- src/deep_training/nlp/models/chatglm2/modeling_chatglm.py | 2 +- src/deep_training/nlp/models/internlm/quantization.py | 4 ++-- src/deep_training/nlp/models/qwen/modeling_qwen.py | 2 +- src/deep_training/nlp/models/qwen/quantization.py | 4 ++-- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/deep_training/nlp/models/baichuan/modeling_baichuan.py b/src/deep_training/nlp/models/baichuan/modeling_baichuan.py index aafc4665..49777124 100644 --- a/src/deep_training/nlp/models/baichuan/modeling_baichuan.py +++ b/src/deep_training/nlp/models/baichuan/modeling_baichuan.py @@ -584,7 +584,7 @@ def __init__(self, config: BaiChuanConfig,**kwargs): self.post_init() self.quantized = False - if self.config.quantization_bit is not None and self.config.quantization_bit not in [0, 32]: + if self.config.quantization_bit in [4,8]: self.quantize(self.config.quantization_bit, empty_init=True) def get_input_embeddings(self): diff --git a/src/deep_training/nlp/models/baichuan2/modeling_baichuan.py b/src/deep_training/nlp/models/baichuan2/modeling_baichuan.py index 4efc6161..210192d0 100644 --- a/src/deep_training/nlp/models/baichuan2/modeling_baichuan.py +++ b/src/deep_training/nlp/models/baichuan2/modeling_baichuan.py @@ -379,7 +379,7 @@ def __init__(self, config,**kwargs): self.post_init() self.quantized = False - if self.config.quantization_bit is not None and self.config.quantization_bit not in [0,32]: + if self.config.quantization_bit in [4,8]: self.quantize(self.config.quantization_bit,empty_init=True) diff --git a/src/deep_training/nlp/models/chatglm/__init__.py b/src/deep_training/nlp/models/chatglm/__init__.py index 05aab778..a588195b 100644 --- a/src/deep_training/nlp/models/chatglm/__init__.py +++ b/src/deep_training/nlp/models/chatglm/__init__.py @@ -1071,7 +1071,7 @@ def __init__(self, config: ChatGLMConfig): self.config = config self.quantized = False - if self.config.quantization_bit: + if self.config.quantization_bit in [4,8]: self.quantize(self.config.quantization_bit, empty_init=True,dtype=self.transformer.params_dtype or torch.half) def get_output_embeddings(self): diff --git a/src/deep_training/nlp/models/chatglm2/modeling_chatglm.py b/src/deep_training/nlp/models/chatglm2/modeling_chatglm.py index c39d2a18..734fc1d9 100644 --- a/src/deep_training/nlp/models/chatglm2/modeling_chatglm.py +++ b/src/deep_training/nlp/models/chatglm2/modeling_chatglm.py @@ -876,7 +876,7 @@ def __init__(self, config: ChatGLMConfig,device=None): self.config = config self.quantized = False - if self.config.quantization_bit: + if self.config.quantization_bit in [4,8]: self.quantize(self.config.quantization_bit, empty_init=True) def _update_model_kwargs_for_generation( diff --git a/src/deep_training/nlp/models/internlm/quantization.py b/src/deep_training/nlp/models/internlm/quantization.py index 566dc5e2..fb098628 100644 --- a/src/deep_training/nlp/models/internlm/quantization.py +++ b/src/deep_training/nlp/models/internlm/quantization.py @@ -161,7 +161,7 @@ def quantize(model, bits, empty_init=False, device=None,**kwarg): QuantizedLinear( bits=bits, weight=w.weight.to(torch.cuda.current_device()), - bias=None, + bias=w.bias.to(torch.cuda.current_device()), empty_init=empty_init, device=w.weight.device if device is None else device, dtype=w.weight.dtype, @@ -176,7 +176,7 @@ def quantize(model, bits, empty_init=False, device=None,**kwarg): QuantizedLinear( bits=bits, weight=w.weight.to(torch.cuda.current_device()), - bias=None, + bias=w.bias.to(torch.cuda.current_device()), empty_init=empty_init, device=w.weight.device if device is None else device, dtype=w.weight.dtype, diff --git a/src/deep_training/nlp/models/qwen/modeling_qwen.py b/src/deep_training/nlp/models/qwen/modeling_qwen.py index 9d8f2208..e79174e0 100644 --- a/src/deep_training/nlp/models/qwen/modeling_qwen.py +++ b/src/deep_training/nlp/models/qwen/modeling_qwen.py @@ -824,7 +824,7 @@ def __init__(self, config,**kwargs): self.post_init() self.quantized = False - if self.config.quantization_bit is not None and self.config.quantization_bit not in [0, 32]: + if self.config.quantization_bit in [4,8]: self.quantize(self.config.quantization_bit, empty_init=True) def get_output_embeddings(self): diff --git a/src/deep_training/nlp/models/qwen/quantization.py b/src/deep_training/nlp/models/qwen/quantization.py index c77abac6..a42c02b1 100644 --- a/src/deep_training/nlp/models/qwen/quantization.py +++ b/src/deep_training/nlp/models/qwen/quantization.py @@ -160,7 +160,7 @@ def quantize(model, bits, empty_init=False, device=None,**kwarg): QuantizedLinear( bits=bits, weight=w.weight.to(torch.cuda.current_device()), - bias=None, + bias=w.bias.to(torch.cuda.current_device()), empty_init=empty_init, device=w.weight.device if device is None else device, dtype=w.weight.dtype, @@ -175,7 +175,7 @@ def quantize(model, bits, empty_init=False, device=None,**kwarg): QuantizedLinear( bits=bits, weight=w.weight.to(torch.cuda.current_device()), - bias=None, + bias=w.bias.to(torch.cuda.current_device()), empty_init=empty_init, device=w.weight.device if device is None else device, dtype=w.weight.dtype,