From 4166fe0792de447afb57b5f776987ff89781ccb9 Mon Sep 17 00:00:00 2001
From: ssbuild <462304@qq.cn>
Date: Fri, 3 Mar 2023 15:31:36 +0800
Subject: [PATCH] 0.0.16

Signed-off-by: ssbuild <462304@qq.cn>
---
 README.md                                     |   2 +
 nlp/models/lora/__init__.py                   | 191 +++++-------------
 .../lora/{config.py => configuration.py}      |  34 ++--
 nlp/models/transformer.py                     |  24 +--
 setup.py                                      |   2 +-
 utils/trainer.py                              |  35 ++--
 6 files changed, 104 insertions(+), 184 deletions(-)
 rename nlp/models/lora/{config.py => configuration.py} (86%)
diff --git a/README.md b/README.md
index 47df97e8..8663487c 100644
--- a/README.md
+++ b/README.md
@@ -8,6 +8,8 @@
   -  [poetry_training](https://github.com/ssbuild/poetry_training)
 
 ## 更新
+- <strong>2023年03月02</strong>
+  - 增加loRA 训练 , lion优化器 , 完整训练参考 [chatyuan_finetuning](https://github.com/ssbuild/chatyuan_finetuning)
 - <strong>2023年02月15</strong>
   - 增加诗歌PaLM预训练模型 
 - <strong>2023年02月13</strong>
diff --git a/nlp/models/lora/__init__.py b/nlp/models/lora/__init__.py
index 71151c0f..e9a9ee52 100644
--- a/nlp/models/lora/__init__.py
+++ b/nlp/models/lora/__init__.py
@@ -10,33 +10,33 @@
 from typing import Optional, Union, List
 
 import torch
-from torch.nn import Linear
 from transformers import Conv1D
 from transformers.utils import PushToHubMixin
 
-from .config import PeftConfig, LoraConfig, WEIGHTS_NAME
-from ...layers.lora.layers import MergedLinear, Linear8bitLt, is_bnb_available, LoraLayer
+from .configuration import LoraArguments, WEIGHTS_NAME
+from ...layers.lora.layers import MergedLinear, is_bnb_available, LoraLayer, Linear
 from ...layers.lora.utils import mark_only_lora_as_trainable
 
+__all__ = [
+    'LoraArguments',
+    'LoraModel',
+    'LoraLayer'
+]
 
 if is_bnb_available():
     import bitsandbytes as bnb
+    from ...layers.lora.layers import  Linear8bitLt
 
-def _set_trainable(model):
-    if model.modules_to_save is not None:
-        for name, param in model.named_parameters():
-            if any(module_name in name for module_name in model.modules_to_save):
-                param.requires_grad = True
 
 
 
 
-def get_peft_model_state_dict(model, state_dict=None):
+def get_lora_model_state_dict(model, state_dict=None):
     """
-    Get the state dict of the Peft model.
+    Get the state dict of the Lora model.
 
     Args:
-        model ([`PeftModel`]): The Peft model. When using torch.nn.DistributedDataParallel, DeepSpeed or FSDP,
+        model ([`LoraModel`]): The Lora model. When using torch.nn.DistributedDataParallel, DeepSpeed or FSDP,
         the model should be the underlying model/unwrapped model (i.e. model.module).
         state_dict (`dict`, *optional*, defaults to `None`):
             The state dict of the model. If not provided, the state dict of the model
@@ -45,10 +45,10 @@ def get_peft_model_state_dict(model, state_dict=None):
     if state_dict is None:
         state_dict = model.state_dict()
 
-    # to_return = lora_state_dict(model, bias=model.peft_config.bias)
+    # to_return = lora_state_dict(model, bias=model.lora_config.bias)
     # adapted from `https://github.com/microsoft/LoRA/blob/main/loralib/utils.py`
     # to directly with the state dict which is necessary when using DeepSpeed or FSDP
-    bias = model.peft_config.bias
+    bias = model.lora_config.bias
     if bias == "none":
         to_return = {k: state_dict[k] for k in state_dict if "lora_" in k}
     elif bias == "all":
@@ -71,29 +71,28 @@ def get_peft_model_state_dict(model, state_dict=None):
     return to_return
 
 
-def set_peft_model_state_dict(model, peft_model_state_dict):
+def set_lora_model_state_dict(model, lora_model_state_dict):
     """
-    Set the state dict of the Peft model.
+    Set the state dict of the Lora model.
 
     Args:
-        model ([`PeftModel`]): The Peft model.
-        peft_model_state_dict (`dict`): The state dict of the Peft model.
+        model ([`LoraModel`]): The Lora model.
+        lora_model_state_dict (`dict`): The state dict of the Lora model.
     """
 
-    model.load_state_dict(peft_model_state_dict, strict=False)
+    model.load_state_dict(lora_model_state_dict, strict=False)
     return model
 
 
 
-class LoraModel(torch.nn.Module):
-
-
-    def __init__(self, config, model):
-        super().__init__()
-        self.peft_config = config
+class LoraModel(torch.nn.Module,PushToHubMixin):
+    def __init__(self, model, config):
+        torch.nn.Module.__init__(self)
+        PushToHubMixin.__init__(self)
+        self.lora_config = config
         self.model = model
         self._find_and_replace()
-        mark_only_lora_as_trainable(self.model, self.peft_config.bias)
+        mark_only_lora_as_trainable(self.model, self.lora_config.bias)
         self.forward = self.model.forward
 
     def _find_and_replace(self):
@@ -105,24 +104,24 @@ def _find_and_replace(self):
         #     )
         is_target_modules_in_base_model = False
         kwargs = {
-            "r": self.peft_config.r,
-            "lora_alpha": self.peft_config.lora_alpha,
-            "lora_dropout": self.peft_config.lora_dropout,
-            "fan_in_fan_out": self.peft_config.fan_in_fan_out,
-            "merge_weights": self.peft_config.merge_weights or self.peft_config.inference_mode,
+            "r": self.lora_config.r,
+            "lora_alpha": self.lora_config.lora_alpha,
+            "lora_dropout": self.lora_config.lora_dropout,
+            "fan_in_fan_out": self.lora_config.fan_in_fan_out,
+            "merge_weights": self.lora_config.merge_weights or self.lora_config.inference_mode,
         }
         key_list = [key for key, _ in self.model.named_modules()]
         for key in key_list:
-            if isinstance(self.peft_config.target_modules, str):
-                target_module_found = re.fullmatch(self.peft_config.target_modules, key)
+            if isinstance(self.lora_config.target_modules, str):
+                target_module_found = re.fullmatch(self.lora_config.target_modules, key)
             else:
-                target_module_found = any(key.endswith(target_key) for target_key in self.peft_config.target_modules)
+                target_module_found = any(key.endswith(target_key) for target_key in self.lora_config.target_modules)
             if target_module_found:
                 if not is_target_modules_in_base_model:
                     is_target_modules_in_base_model = True
                 parent, target, target_name = self._get_submodules(key)
                 bias = target.bias is not None
-                if loaded_in_8bit and isinstance(target, bnb.nn.Linear8bitLt) and self.peft_config.enable_lora is None:
+                if loaded_in_8bit and isinstance(target, bnb.nn.Linear8bitLt) and self.lora_config.enable_lora is None:
                     kwargs.update(
                         {
                             "has_fp16_weights": target.state.has_fp16_weights,
@@ -132,10 +131,10 @@ def _find_and_replace(self):
                         }
                     )
                     new_module = Linear8bitLt(target.in_features, target.out_features, bias=bias, **kwargs)
-                elif isinstance(target, torch.nn.Linear) and self.peft_config.enable_lora is None:
+                elif isinstance(target, torch.nn.Linear) and self.lora_config.enable_lora is None:
                     new_module = Linear(target.in_features, target.out_features, bias=bias, **kwargs)
-                elif self.peft_config.enable_lora is not None:
-                    kwargs.update({"enable_lora": self.peft_config.enable_lora})
+                elif self.lora_config.enable_lora is not None:
+                    kwargs.update({"enable_lora": self.lora_config.enable_lora})
                     if isinstance(target, Conv1D):
                         in_features, out_features = target.weight.shape
                     else:
@@ -150,7 +149,7 @@ def _find_and_replace(self):
                 self._replace_module(parent, target_name, new_module, target)
         if not is_target_modules_in_base_model:
             raise ValueError(
-                f"Target modules {self.peft_config.target_modules} not found in the base model. "
+                f"Target modules {self.lora_config.target_modules} not found in the base model. "
                 f"Please check the target modules and try again."
             )
 
@@ -180,8 +179,8 @@ def __getattr__(self, name: str):
     def modules_to_save(self):
         return None
 
-    def get_peft_config_as_dict(self, inference: bool = False):
-        config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(self.peft_config).items()}
+    def get_lora_config_as_dict(self, inference: bool = False):
+        config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(self.lora_config).items()}
         if inference:
             config["inference_mode"] = True
         return config
@@ -197,44 +196,6 @@ def enable_adapter_layers(self):
     def disable_adapter_layers(self):
         self._set_adapter_layers(enabled=False)
 
-
-class PeftModel(PushToHubMixin, torch.nn.Module):
-    """
-    Parameter-Efficient Fine-Tuning Model. Base model encompassing various Peft methods.
-
-    Args:
-        model ([`PreTrainedModel`]): The base transformer model used for Peft.
-        peft_config ([`PeftConfig`]): The configuration of the Peft model.
-
-
-    **Attributes**:
-        - **base_model** ([`PreTrainedModel`]) -- The base transformer model used for Peft.
-        - **peft_config** ([`PeftConfig`]) -- The configuration of the Peft model.
-        - **modules_to_save** (`list` of `str`) -- The list of sub-module names to save when
-        saving the model.
-        - **prompt_encoder** ([`PromptEncoder`]) -- The prompt encoder used for Peft if
-        `isinstance(self.peft_config, PromptLearningConfig)`.
-        - **prompt_tokens** (`torch.Tensor`) -- The virtual prompt tokens used for Peft if
-        `isinstance(self.peft_config, PromptLearningConfig)`.
-        - **transformer_backbone_name** (`str`) -- The name of the transformer
-        backbone in the base model if `isinstance(self.peft_config, PromptLearningConfig)`.
-        - **word_embeddings** (`torch.nn.Embedding`) -- The word embeddings of the transformer backbone
-        in the base model if `isinstance(self.peft_config, PromptLearningConfig)`.
-    """
-
-    def __init__(self, model, peft_config: PeftConfig):
-        super().__init__()
-        self.peft_config = peft_config
-        self.base_model = model
-        self.config = self.base_model.config
-        self.modules_to_save = None
-
-        self.base_model = LoraModel(peft_config, model)
-        if getattr(self.peft_config, "modules_to_save", None) is not None:
-            self.modules_to_save = self.peft_config.modules_to_save
-            _set_trainable(self)
-        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-
     def save_pretrained(self, save_directory, **kwargs):
         r"""
         Args:
@@ -252,21 +213,17 @@ def save_pretrained(self, save_directory, **kwargs):
         os.makedirs(save_directory, exist_ok=True)
 
         # save only the trainable weights
-        output_state_dict = get_peft_model_state_dict(self, kwargs.get("state_dict", None))
+        output_state_dict = get_lora_model_state_dict(self, kwargs.get("state_dict", None))
         torch.save(output_state_dict, os.path.join(save_directory, WEIGHTS_NAME))
 
         # save the config and change the inference mode to `True`
-        if self.peft_config.base_model_name_or_path is None:
-            self.peft_config.base_model_name_or_path = (
-                self.base_model.model.__dict__.get("name_or_path", None)
-            )
-        inference_mode = self.peft_config.inference_mode
-        self.peft_config.inference_mode = True
-        self.peft_config.save_pretrained(save_directory)
-        self.peft_config.inference_mode = inference_mode
+        inference_mode = self.lora_config.inference_mode
+        self.lora_config.inference_mode = True
+        self.lora_config.save_pretrained(save_directory)
+        self.lora_config.inference_mode = inference_mode
 
     @classmethod
-    def from_pretrained(cls, model, model_id, **kwargs):
+    def from_pretrained(cls, model, pretrained_model_name_or_path, **kwargs):
         r"""
         Args:
         Instantiate a `LoraModel` from a pretrained Lora configuration and weights.
@@ -280,40 +237,24 @@ def from_pretrained(cls, model, model_id, **kwargs):
                     - A path to a directory containing a Lora configuration file saved using the
                         `save_pretrained` method, e.g., ``./my_lora_config_directory/``.
         """
-        from .mapping import MODEL_TYPE_TO_PEFT_MODEL_MAPPING, PEFT_TYPE_TO_CONFIG_MAPPING
-
-        # load the config
-        config = PEFT_TYPE_TO_CONFIG_MAPPING[PeftConfig.from_pretrained(model_id).peft_type].from_pretrained(model_id)
-
-
-
-        if config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys():
-            model = cls(model, config)
-        else:
-            model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config)
-
+        lora_config: LoraArguments = LoraArguments.from_pretrained(pretrained_model_name_or_path)
+        model = cls(model, lora_config)
         # load weights if any
-        if os.path.exists(os.path.join(model_id, WEIGHTS_NAME)):
-            filename = os.path.join(model_id, WEIGHTS_NAME)
+        if os.path.exists(os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)):
+            filename = os.path.join(pretrained_model_name_or_path, WEIGHTS_NAME)
         else:
-            try:
-                raise ValueError()
-                # filename = hf_hub_download(model_id, WEIGHTS_NAME)
-            except:  # noqa
-                raise ValueError(
-                    f"Can't find weights for {model_id} in {model_id} or in the Hugging Face Hub. "
-                    f"Please check that the file {WEIGHTS_NAME} is present at {model_id}."
-                )
+            raise ValueError(
+                f"Can't find weights for {pretrained_model_name_or_path} in {pretrained_model_name_or_path} or in the Hugging Face Hub. "
+                f"Please check that the file {WEIGHTS_NAME} is present at {pretrained_model_name_or_path}."
+            )
 
         adapters_weights = torch.load(
             filename, map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"))
         # load the weights into the model
-        model = set_peft_model_state_dict(model, adapters_weights)
+        model = set_lora_model_state_dict(model, adapters_weights)
 
         return model
 
-
-
     def print_trainable_parameters(self):
         """
         Prints the number of trainable parameters in the model.
@@ -338,26 +279,4 @@ def __getattr__(self, name: str):
         try:
             return super().__getattr__(name)  # defer to nn.Module's logic
         except AttributeError:
-            return getattr(self.base_model, name)
-
-    def forward(self, *args, **kwargs):
-        """
-        Forward pass of the model.
-        """
-        return self.get_base_model()(*args, **kwargs)
-
-    @contextmanager
-    def disable_adapter(self):
-        """
-        Disables the adapter module.
-        """
-
-        self.base_model.disable_adapter_layers()
-        yield
-        self.base_model.enable_adapter_layers()
-
-    def get_base_model(self):
-        """
-        Returns the base model.
-        """
-        return self.base_model.model
\ No newline at end of file
+            return getattr(self.model, name)
diff --git a/nlp/models/lora/config.py b/nlp/models/lora/configuration.py
similarity index 86%
rename from nlp/models/lora/config.py
rename to nlp/models/lora/configuration.py
index 58ca860f..78596efd 100644
--- a/nlp/models/lora/config.py
+++ b/nlp/models/lora/configuration.py
@@ -12,7 +12,7 @@
 CONFIG_NAME = "adapter_config.json"
 
 @dataclass
-class PeftConfigMixin(PushToHubMixin):
+class LoraConfigMixin(PushToHubMixin):
     r"""
     This is the base configuration class for PEFT adapter models. It contains all the methods that are common to all
     PEFT adapter models. This class inherits from `transformers.utils.PushToHubMixin` which contains the methods to
@@ -67,11 +67,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
         if os.path.isfile(os.path.join(pretrained_model_name_or_path, CONFIG_NAME)):
             config_file = os.path.join(pretrained_model_name_or_path, CONFIG_NAME)
         else:
-            try:
-                ...
-                # config_file = hf_hub_download(pretrained_model_name_or_path, CONFIG_NAME)
-            except:
-                raise ValueError(f"Can't find config.json at '{pretrained_model_name_or_path}'")
+            raise ValueError(f"Can't find config.json at '{pretrained_model_name_or_path}'")
 
         loaded_attributes = cls.from_json_file(config_file)
 
@@ -98,27 +94,16 @@ def from_json_file(cls, path_json_file, **kwargs):
         return json_object
 
 
-@dataclass
-class PeftConfig(PeftConfigMixin):
-    """
-    This is the base configuration class to store the configuration of a :class:`~peft.PeftModel`.
 
-    Args:
-        peft_type (Union[[`~peft.utils.config.PeftType`], `str`]): The type of Peft method to use.
-        task_type (Union[[`~peft.utils.config.TaskType`], `str`]): The type of task to perform.
-        inference_mode (`bool`, defaults to `False`): Whether to use the Peft model in inference mode.
-    """
-
-    base_model_name_or_path: str = field(default=None, metadata={"help": "The name of the base model to use."})
-    inference_mode: bool = field(default=False, metadata={"help": "Whether to use inference mode"})
 
 
 @dataclass
-class LoraConfig(PeftConfig):
+class LoraArguments(LoraConfigMixin):
     """
     This is the configuration class to store the configuration of a [`~peft.Lora`].
 
     Args:
+        inference_mode (`bool`, defaults to `False`): Whether to use the Peft model in inference mode.
         r (`int`): Lora attention dimension
         target_modules (`Union[List[str],str]`): The names of the modules to apply Lora to.
         lora_alpha (`float`): The alpha parameter for Lora scaling.
@@ -131,7 +116,10 @@ class LoraConfig(PeftConfig):
         modules_to_save (`List[str]`):List of modules apart from LoRA layers to be set as trainable
             and saved in the final checkpoint.
     """
+    lora_model_name_or_path: str = field(default=None, metadata={"help": "The name of the base model to use."})
+    inference_mode: bool = field(default=False, metadata={"help": "Whether to use inference mode"})
 
+    with_lora: bool = field(default=False, metadata={"help": "whether use lora"})
     r: int = field(default=8, metadata={"help": "Lora attention dimension"})
     target_modules: Optional[Union[List[str], str]] = field(
         default=None,
@@ -160,4 +148,10 @@ class LoraConfig(PeftConfig):
         },
     )
 
-    def __post_init__(self): ...
\ No newline at end of file
+    def __post_init__(self):
+        if self.inference_mode:
+            self.merge_weights = True
+
+        if self.target_modules is not None and len(self.target_modules) == 1:
+            self.fan_in_fan_out = True
+            self.enable_lora = [True, False, True]
\ No newline at end of file
diff --git a/nlp/models/transformer.py b/nlp/models/transformer.py
index 49023f27..28b4d44a 100644
--- a/nlp/models/transformer.py
+++ b/nlp/models/transformer.py
@@ -233,18 +233,14 @@ def model(self):
     def model(self, model):
         self.set_model(model)
 
-    def set_model(self, model):
-        # keep_keys = [
-        #     'config_class','load_tf_weights','base_model_prefix','supports_gradient_checkpointing','_init_weights','_set_gradient_checkpointing',
-        #     '_keys_to_ignore_on_load_missing','_keys_to_ignore_on_load_unexpected','_no_split_modules','is_parallelizable','_shift_right','main_input_name',
-        #     '_get_feat_extract_output_lengths','_get_feature_vector_attention_mask',#dummy_inputs
-        # ]
-        keep_keys = ['config_class','base_model_prefix']
-        for k in keep_keys:
-            o = getattr(model,k,None)
-            if o is None:
-                continue
-            setattr(self,k,o)
+    def set_model(self, model , copy_attr=True):
+        if copy_attr:
+            keep_keys = ['config_class','base_model_prefix']
+            for k in keep_keys:
+                o = getattr(model,k,None)
+                if o is None:
+                    continue
+                setattr(self,k,o)
 
         assert self.base_model_prefix is not None, ValueError('base_model_prefix is not allow empty')
         setattr(self, self.base_model_prefix, model)
@@ -350,9 +346,11 @@ def model(self):
     def model(self, model):
         self.set_model(model)
 
-    def set_model(self, model):
+    def set_model(self, model , copy_attr=True):
         assert model is not None
         self.__backbone = model
+        if not copy_attr:
+            return
 
         copy_attr = [
             'log','log_dict'
diff --git a/setup.py b/setup.py
index 50c78eb9..e58adbfc 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 ignore = ['test','tests']
 setup(
     name='deep_training',
-    version='0.0.15@post2',
+    version='0.0.16',
     description='an easy training architecture',
     long_description='torch_training: https://github.com/ssbuild/deep_training.git',
     license='Apache License 2.0',
diff --git a/utils/trainer.py b/utils/trainer.py
index ace710c6..e6f66161 100644
--- a/utils/trainer.py
+++ b/utils/trainer.py
@@ -62,6 +62,22 @@ def _monitor_candidates(self, trainer: "pl.Trainer") -> Dict[str, Tensor]:
     def on_get_metric( self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"):
         return {}
 
+    def update_best(self,val):
+        flag = False
+        if isinstance(val, torch.Tensor):
+            if self.monitor not in self.best:
+                flag = True
+                self.best[self.monitor] = val
+            else:
+                monitor_op = torch.le if self.mode.lower() == 'min' else torch.ge
+                if monitor_op(val, self.best[self.monitor]).bool().cpu().item():
+                    flag = True
+        else:
+            warnings.warn('monitor {} is not tensor'.format(self.monitor))
+
+        if flag:
+            self.best[self.monitor] = val
+        return flag
 
     def on_save_model(
         self, trainer: "pl.Trainer", pl_module: "pl.LightningModule"
@@ -71,17 +87,8 @@ def on_save_model(
         monitor_candidates.update(self.on_get_metric(trainer,pl_module))
         val = monitor_candidates.get(self.monitor,None)
         if val is not None:
-            flag = False
-            if isinstance(val,torch.Tensor):
-                if self.monitor not in self.best:
-                    self.best[self.monitor] = val
-                monitor_op = torch.le if self.mode.lower() == 'min' else torch.ge
-                if monitor_op(val ,self.best[self.monitor]).bool().cpu().item():
-                    flag = True
-            else:
-                warnings.warn('monitor {} is not tensor'.format(self.monitor))
+            flag = self.update_best(val)
             if flag:
-                self.best[self.monitor] = val
                 logging.info('epoch {} ,step {} , save best {}, {}\n'.format(monitor_candidates['epoch'],
                                                                            monitor_candidates['step'],
                                                                            self.best[self.monitor],
@@ -90,16 +97,16 @@ def on_save_model(
 
             if self.last_weight_file is not None:
                 logging.info('epoch {} ,step {} , save {}\n'.format(monitor_candidates['epoch'],
-                                                                       monitor_candidates['step'],
-                                                                       self.last_weight_file))
+                                                                    monitor_candidates['step'],
+                                                                    self.last_weight_file))
                 trainer.save_checkpoint(self.last_weight_file)
 
         else:
             warnings.warn('monitor {} is not in metirc , save lastest checkpoint!'.format(self.monitor))
 
             logging.info('epoch {} ,step {} , save {}\n'.format(monitor_candidates['epoch'],
-                                                                       monitor_candidates['step'],
-                                                                       self.weight_file))
+                                                                monitor_candidates['step'],
+                                                                self.weight_file))
             trainer.save_checkpoint(self.weight_file)