diff --git a/README.md b/README.md index cae15f6f..aeda5492 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,10 @@ ## update - 2023-07-11 - - support baichuan v2 完整训练 [baichuan2_finetuning](https://github.com/ssbuild/baichuan2_finetuning) - - fix adalora some bugs + - 2023-07-11 support baichuan v2 完整训练 [baichuan2_finetuning](https://github.com/ssbuild/baichuan2_finetuning) + - 2023-07-11 fix adalora some bugs + - 2023-07-16 support rwkv world training + - 2023-07-16 0.1.12 release - 2023-07-04 - 0.1.11 release diff --git a/setup.py b/setup.py index 5b7fda66..611b414e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ ignore = ['test','tests'] setup( name='deep_training', - version='0.1.11.post1', + version='0.1.12', description='an easy training architecture', long_description='torch_training: https://github.com/ssbuild/deep_training.git', license='Apache License 2.0', diff --git a/src/nlp/models/baichuan2/modeling_baichuan.py b/src/nlp/models/baichuan2/modeling_baichuan.py index 7b773ac8..a7907769 100644 --- a/src/nlp/models/baichuan2/modeling_baichuan.py +++ b/src/nlp/models/baichuan2/modeling_baichuan.py @@ -374,6 +374,12 @@ def get_output_embeddings(self): def set_output_embeddings(self, new_embeddings): self.lm_head = new_embeddings + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + def forward( self, input_ids: torch.LongTensor = None, diff --git a/src/nlp/models/chatglm2/tokenization_chatglm.py b/src/nlp/models/chatglm2/tokenization_chatglm.py index 7f80bf8d..af6c93b5 100644 --- a/src/nlp/models/chatglm2/tokenization_chatglm.py +++ b/src/nlp/models/chatglm2/tokenization_chatglm.py @@ -66,7 +66,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer): model_input_names = ["input_ids", "attention_mask", "position_ids"] def __init__(self, vocab_file, padding_side="left", **kwargs): - super().__init__(padding_side=padding_side, **kwargs) + super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=False, **kwargs) self.name = "GLMTokenizer" self.vocab_file = vocab_file @@ -83,6 +83,10 @@ def get_command(self, token): assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}" return self.tokenizer.special_tokens[token] + @property + def unk_token(self) -> str: + return "" + @property def pad_token(self) -> str: return "" diff --git a/src/nlp/models/rwkv4/modeling_rwkv.py b/src/nlp/models/rwkv4/modeling_rwkv.py index 538ae64c..b7ef4c92 100644 --- a/src/nlp/models/rwkv4/modeling_rwkv.py +++ b/src/nlp/models/rwkv4/modeling_rwkv.py @@ -692,6 +692,12 @@ def __init__(self, config: RwkvConfig): # Initialize weights and apply final processing self.post_init() + def get_input_embeddings(self): + return self.rwkv.emb + + def set_input_embeddings(self, value): + self.rwkv.emb = value + def get_output_embeddings(self): return self.head