diff --git a/README.md b/README.md
index cae15f6f..aeda5492 100644
--- a/README.md
+++ b/README.md
@@ -36,8 +36,10 @@
## update
- 2023-07-11
- - support baichuan v2 完整训练 [baichuan2_finetuning](https://github.com/ssbuild/baichuan2_finetuning)
- - fix adalora some bugs
+ - 2023-07-11 support baichuan v2 完整训练 [baichuan2_finetuning](https://github.com/ssbuild/baichuan2_finetuning)
+ - 2023-07-11 fix adalora some bugs
+ - 2023-07-16 support rwkv world training
+ - 2023-07-16 0.1.12 release
- 2023-07-04
- 0.1.11 release
diff --git a/setup.py b/setup.py
index 5b7fda66..611b414e 100644
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@
ignore = ['test','tests']
setup(
name='deep_training',
- version='0.1.11.post1',
+ version='0.1.12',
description='an easy training architecture',
long_description='torch_training: https://github.com/ssbuild/deep_training.git',
license='Apache License 2.0',
diff --git a/src/nlp/models/baichuan2/modeling_baichuan.py b/src/nlp/models/baichuan2/modeling_baichuan.py
index 7b773ac8..a7907769 100644
--- a/src/nlp/models/baichuan2/modeling_baichuan.py
+++ b/src/nlp/models/baichuan2/modeling_baichuan.py
@@ -374,6 +374,12 @@ def get_output_embeddings(self):
def set_output_embeddings(self, new_embeddings):
self.lm_head = new_embeddings
+ def set_decoder(self, decoder):
+ self.model = decoder
+
+ def get_decoder(self):
+ return self.model
+
def forward(
self,
input_ids: torch.LongTensor = None,
diff --git a/src/nlp/models/chatglm2/tokenization_chatglm.py b/src/nlp/models/chatglm2/tokenization_chatglm.py
index 7f80bf8d..af6c93b5 100644
--- a/src/nlp/models/chatglm2/tokenization_chatglm.py
+++ b/src/nlp/models/chatglm2/tokenization_chatglm.py
@@ -66,7 +66,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
model_input_names = ["input_ids", "attention_mask", "position_ids"]
def __init__(self, vocab_file, padding_side="left", **kwargs):
- super().__init__(padding_side=padding_side, **kwargs)
+ super().__init__(padding_side=padding_side, clean_up_tokenization_spaces=False, **kwargs)
self.name = "GLMTokenizer"
self.vocab_file = vocab_file
@@ -83,6 +83,10 @@ def get_command(self, token):
assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
return self.tokenizer.special_tokens[token]
+ @property
+ def unk_token(self) -> str:
+ return ""
+
@property
def pad_token(self) -> str:
return ""
diff --git a/src/nlp/models/rwkv4/modeling_rwkv.py b/src/nlp/models/rwkv4/modeling_rwkv.py
index 538ae64c..b7ef4c92 100644
--- a/src/nlp/models/rwkv4/modeling_rwkv.py
+++ b/src/nlp/models/rwkv4/modeling_rwkv.py
@@ -692,6 +692,12 @@ def __init__(self, config: RwkvConfig):
# Initialize weights and apply final processing
self.post_init()
+ def get_input_embeddings(self):
+ return self.rwkv.emb
+
+ def set_input_embeddings(self, value):
+ self.rwkv.emb = value
+
def get_output_embeddings(self):
return self.head