diff --git a/README.md b/README.md
index d8b1e378..2588fd22 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,8 @@
- keras 模块封装
## 更新
+- 2023年04月11
+ - deep_training 0.1.2 重构lora v2, 增加adalora
- 2023年04月07
- deep_training 0.1.1 同步更新chatglm 词表配置信息
- 2023年04月02
diff --git a/data_helper/training_args.py b/data_helper/training_args.py
index 28a489c8..ba0ae796 100644
--- a/data_helper/training_args.py
+++ b/data_helper/training_args.py
@@ -144,14 +144,15 @@ class TrainingArguments:
"},
)
adv: dict = field(
- default_factory= lambda: {
- 'mode': None, # None, fgm, fgsm_local, fgsm, pgd, free_local, free
- 'emb_name=': 'embedding',
- 'attack_iters': 2, # pgd
- 'minibatch_replays': 2, # free
- 'alpha': 0.1, # pgd
- 'epsilon': 1.0 # pgd,fgm
- },
+ # default_factory= lambda: {
+ # 'mode': None, # None, fgm, fgsm_local, fgsm, pgd, free_local, free
+ # 'emb_name=': 'embedding',
+ # 'attack_iters': 2, # pgd
+ # 'minibatch_replays': 2, # free
+ # 'alpha': 0.1, # pgd
+ # 'epsilon': 1.0 # pgd,fgm
+ # },
+ default=None,
metadata={"help": "对抗训练"},
)
hierarchical_position: float = field(
diff --git a/nlp/models/lora/__init__.py b/nlp/models/lora/__init__.py
index 19eb2ba2..e0b895c2 100644
--- a/nlp/models/lora/__init__.py
+++ b/nlp/models/lora/__init__.py
@@ -1,2 +1,6 @@
# @Time : 2023/3/2 20:55
-# @Author : tk
\ No newline at end of file
+# @Author : tk
+
+# 兼容旧版本
+from .v1 import LoraModel,LoraArguments
+from .v2 import LoraModel as LoraModelV2,LoraArguments as LoraArgumentsV2
\ No newline at end of file
diff --git a/nlp/models/lora/v2/configuration.py b/nlp/models/lora/v2/configuration.py
index 2364d21b..cac26b0e 100644
--- a/nlp/models/lora/v2/configuration.py
+++ b/nlp/models/lora/v2/configuration.py
@@ -234,6 +234,7 @@ def save_pretrained(self, save_directory, **kwargs):
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
config = LORA_TYPE_TO_CONFIG_MAPPING[LoraBaseArguments.from_pretrained(pretrained_model_name_or_path,**kwargs).lora_type].from_pretrained(pretrained_model_name_or_path,**kwargs)
+ assert config.with_lora , ValueError('lora config get bad with_lora ',config.with_lora)
# config = cls()
# config.lora = None
# config.adalora = None
diff --git a/nlp/models/transformer.py b/nlp/models/transformer.py
index dd67f684..b29406e3 100644
--- a/nlp/models/transformer.py
+++ b/nlp/models/transformer.py
@@ -6,8 +6,6 @@
import typing
from functools import partial
from typing import Any, IO
-
-import pytorch_lightning as pl
import torch
from torch import nn, Tensor
from transformers import (
@@ -30,608 +28,9 @@
AutoModel
)
+from .transformer_base import TransformerBase,TransformerLightningModule,MyLightningModule
from ..layers.mask import unilm_mask
from ..losses.lm_loss import LM_loss
-from ..utils import configure_optimizers, get_value_from_args
-from ..utils.adversarial import AdversarialMethods
-from ...data_helper import TrainingArguments, ModelArguments, PrefixModelArguments, DataArguments
-
-
-# class TransformerMeta(type):
-# def __new__(cls, name, base, attr,*args,**kwargs):
-# alter = tuple(b for b in base if issubclass(b,TransformerBase))
-# cls_ = super(TransformerMeta, cls).__new__(cls, name, (TransformerLightningModule,) + tuple(b for b in base if not issubclass(b, TransformerBase)), attr)
-# cls_.__BACKBONE_CLASS__ = alter
-# return cls_
-
-
-
-def verify_manual_optimization_support(trainer: "pl.Trainer", model: "pl.LightningModule") -> None:
- if model.automatic_optimization:
- return
- trainer.gradient_clip_val = None
- trainer.accumulate_grad_batches = 1
-
-
-class MyLightningModule(pl.LightningModule):
- def __init__(self,*args: Any, **kwargs: Any):
- super(MyLightningModule, self).__init__(*args,**kwargs)
-
- @classmethod
- def load_from_checkpoint(
- cls,
- checkpoint_path: typing.Union[str, IO],
- map_location = None,
- hparams_file: typing.Optional[str] = None,
- strict: bool = True,
- **kwargs: Any,
- ) -> typing.Union["pl.LightningModule", "pl.LightningDataModule","MyLightningModule"]:
- return super(MyLightningModule, cls).load_from_checkpoint(checkpoint_path,map_location,hparams_file,strict,**kwargs)
-
- @property
- def backbone(self):
- return self.__model
-
- @property
- def model(self):
- return self.__model
-
- def convert_to_onnx(self, file_path,
- input_sample=(
- ("input_ids",torch.ones(size=(1, 128), dtype=torch.int32)),
- ("attention_mask",torch.ones(size=(1, 128), dtype=torch.int32)),
- ),
- input_names=("input_ids", "attention_mask"),
- output_names=("pred_ids",),
- dynamic_axes=None or {"input_ids": [0, 1], "attention_mask": [0, 1], "pred_ids": [0, 1]},
- opset_version=14,
- verbose=True,
- do_constant_folding=True
- ):
- self.eval()
- self.to('cuda')
- self.to_onnx(file_path,
- input_sample=input_sample,
- verbose=verbose,
- opset_version=opset_version,
- do_constant_folding=do_constant_folding,
- input_names=input_names,
- output_names=output_names,
- dynamic_axes=dynamic_axes)
-
-
-
-class TransformerFakeMeta(type):
- def __new__(cls, name, base, attr,*args,**kwargs):
- base_new = tuple(b for b in base if b != MyLightningModule)
- if name == 'TransformerBase':
- base_new = base_new + (nn.Module,)
- with_pl = kwargs.get('with_pl',False)
- backbone_class = None
- if with_pl:
- backbone_class = tuple(b for b in base if issubclass(b, TransformerBase))
- base_new = (TransformerLightningModule,) + tuple(b for b in base_new if not issubclass(b, TransformerBase))
- cls_ = super(TransformerFakeMeta, cls).__new__(cls, name, base_new, attr)
- if backbone_class is not None:
- cls_.__BACKBONE_CLASS__ = backbone_class
- return cls_
-
-
-
-class PreTrainedModel_Data:
- base_model_prefix = None
- config_class = None
-
-class TransformerBase(MyLightningModule,metaclass=TransformerFakeMeta):
- def __init__(self,*args,**kwargs):
- config = get_value_from_args('config',PretrainedConfig,*args,**kwargs)
- super(TransformerBase, self).__init__()
- self.config = config
- self._premodel_data = PreTrainedModel_Data()
- self._trainer: typing.Optional["pl.Trainer"] = None
-
- def forward(self, *args, **batch):
- return self.model(*args,**batch)
-
- def compute_loss(self, *args,**batch) -> tuple:
- return self.model(*args,**batch)
-
- def post_init(self):
- return self.model.post_init()
-
- def init_weights(self):
- return self.model.init_weights()
-
- @property
- def trainer(self):
- return self._trainer
-
- @trainer.setter
- def trainer(self,trainer: typing.Optional["pl.Trainer"]):
- self._trainer = trainer
-
- @property
- def current_epoch(self):
- return self.trainer.current_epoch if self._trainer else 0
-
- @property
- def global_step(self):
- return self.trainer.global_step if self._trainer else 0
-
- @property
- def max_epochs(self) -> typing.Optional[int]:
- return self.trainer.max_epochs if self._trainer else 0
-
- @property
- def min_epochs(self) -> typing.Optional[int]:
- return self.trainer.min_epochs if self._trainer else 0
-
- @property
- def max_steps(self) -> int:
- return self.trainer.max_steps if self._trainer else 0
-
- @property
- def min_steps(self) -> int:
- return self.trainer.min_steps if self._trainer else 0
-
-
- def from_pretrained(self,CLS, *args, **kwargs):
- config = get_value_from_args('config', PretrainedConfig, *args, **kwargs)
- model_args = get_value_from_args('model_args', ModelArguments, *args, **kwargs)
-
- if model_args.model_name_or_path:
- args_new = tuple(v for v in args
- if not isinstance(v, ModelArguments) and \
- not isinstance(v, TrainingArguments) and \
- not isinstance(v,PretrainedConfig) and \
- not isinstance(v,PrefixModelArguments) and \
- not isinstance(v,DataArguments)
- )
- kwargs_new = {k: v for k, v in kwargs.items()
- if not isinstance(v, ModelArguments) and \
- not isinstance(v, TrainingArguments) and \
- not isinstance(v, PretrainedConfig) and \
- not isinstance(v, PrefixModelArguments) and \
- not isinstance(v, DataArguments)
- }
-
- model_kwargs = {
- "cache_dir": model_args.cache_dir,
- "revision": model_args.model_revision,
- "use_auth_token": True if model_args.use_auth_token else None,
- }
- cls_ = CLS.from_pretrained(
- model_args.model_name_or_path,
- *args_new,
- **kwargs_new,
- from_tf=bool(".ckpt" in model_args.model_name_or_path),
- config=config,
- **model_kwargs
- )
- elif hasattr(CLS,'from_config'):
- cls_ = CLS.from_config(config)
- cls_.post_init()
- elif hasattr(CLS, '_from_config'):
- cls_ = CLS._from_config(config)
- cls_.post_init()
- else:
- cls_ = CLS(config)
- cls_.post_init()
- return cls_
-
- @property
- def model(self):
- if not self._premodel_data.base_model_prefix:
- return None
- return getattr(self, self._premodel_data.base_model_prefix,None)
-
- @model.setter
- def model(self, model):
- self.set_model(model)
-
- def set_model(self, model , copy_attr=True):
- if copy_attr:
- keep_keys = ['config_class','base_model_prefix']
- for k in keep_keys:
- o = getattr(model,k,None)
- if o is None:
- continue
- if o == 'model':
- o = 'model_'
- setattr(self._premodel_data,k,o)
-
- assert self._premodel_data.base_model_prefix is not None, ValueError('base_model_prefix is not allow empty')
- setattr(self, self._premodel_data.base_model_prefix, model)
-
- def get_model_lr(self):
- return [(self.model if self._premodel_data.base_model_prefix is not None else self , self.config.task_specific_params['learning_rate']), ]
-
-
-
-class TransformerLightningModule(MyLightningModule):
- def __init__(self, *args,**kwargs):
- config = get_value_from_args('config',PretrainedConfig,*args,**kwargs)
- model_args = get_value_from_args('model_args', ModelArguments, *args, **kwargs)
- training_args = get_value_from_args('training_args', TrainingArguments, *args, **kwargs)
- super(TransformerLightningModule, self).__init__()
- if not hasattr(config, 'task_specific_params') or config.task_specific_params is None:
- config.task_specific_params = {}
- task_specific_params = config.task_specific_params
- task_specific_params['learning_rate'] = training_args.learning_rate
- task_specific_params['learning_rate_for_task'] = training_args.learning_rate_for_task \
- if training_args.learning_rate_for_task is not None else training_args.learning_rate
- print(training_args)
- print(model_args)
- if training_args.adv['mode'] != None:
- assert training_args.adv['mode'] in AdversarialMethods.keys(), ValueError('no support adv mode {} , must be in {}'.format(training_args.adv['mode'],','.join(AdversarialMethods.keys())))
- self.automatic_optimization = False
-
- try:
- self.save_hyperparameters(ignore=['config'])
- except:
- pass
- self.config = config
- self.model_args = model_args
- self.training_args = training_args
- self.__backbone : typing.Optional[TransformerBase] = None
- if hasattr(self,'__BACKBONE_CLASS__') and len(self.__BACKBONE_CLASS__) > 0:
- self.set_model(self.__BACKBONE_CLASS__[0](*args, **kwargs))
-
-
- self.training_step_fn = self.training_step
- self.embeddings_forward_fn = None
-
- #对抗训练
- if training_args.adv['mode'] is not None:
- self.embeddings_forward_fn = self.get_embeddings_module().embeddings.forward
-
- self.training_step = self.adv_training_step
- if training_args.adv['mode'].find('local') != -1:
- self.adversarial = AdversarialMethods[training_args.adv['mode']](model=self.model)
- else:
- self.adversarial = AdversarialMethods[training_args.adv['mode']](model=self.model,
- emb_name=training_args.adv.get('emb_name', 'embedding'))
- k = 'pytorch_lightning.trainer.configuration_validator'
- if k in sys.modules:
- setattr( sys.modules[k],'__verify_manual_optimization_support' , verify_manual_optimization_support)
- else:
- self.adversarial = None
-
- self.gradient_clip_val = training_args.max_grad_norm
-
- if training_args.hierarchical_position is not None and (training_args.hierarchical_position > 0 and training_args.hierarchical_position < 1):
- #绝对位置编码 分层位置编码
- def forward(cls,input: Tensor) -> Tensor:
- # return F.embedding(
- # input, self.weight, self.padding_idx, self.max_norm,
- # self.norm_type, self.scale_grad_by_freq, self.sparse)
- position_ids = input
- alpha = training_args.hierarchical_position
- embeddings = cls.weight - alpha * cls.weight[:1]
- embeddings = embeddings / (1 - alpha)
- x_idx = position_ids // cls.num_embeddings
- y_idx = position_ids % cls.num_embeddings
-
- embeddings_x = torch.index_select(embeddings,dim=0,index=x_idx.view(-1))
- embeddings_y = torch.index_select(embeddings,dim=0,index=y_idx.view(-1))
- embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y
- return embeddings
-
- position_embeddings = self.get_embeddings_module().embeddings.position_embeddings
- position_embeddings.forward = partial(forward,position_embeddings)
-
-
- def get_embeddings_module(self):
- base_model_prefix = self.backbone.base_model_prefix
- current_model = self.backbone.model
- tmp_obj = current_model
- while tmp_obj is not None:
- if hasattr(tmp_obj, 'embeddings'):
- return tmp_obj
- current_model = tmp_obj
- tmp_obj = getattr(current_model, base_model_prefix, None)
- return tmp_obj
-
- @property
- def backbone(self):
- return self.__backbone
-
- @property
- def model(self):
- return self.__backbone
-
- @model.setter
- def model(self, model):
- self.set_model(model)
-
- def set_model(self, model , copy_attr=True):
- assert model is not None
- self.__backbone = model
- if not copy_attr:
- return
-
- copy_attr = [
- 'log','log_dict'
- ]
- for k in copy_attr:
- setattr(self.__backbone, k, getattr(self, k))
-
- event_ = [
- 'configure_optimizers',
- 'configure_gradient_clipping',
- 'lr_scheduler_step',
- 'optimizer_step',
- 'optimizer_zero_grad',
- ]
- for e in event_:
- a = getattr(self.__backbone, e, None)
- if a is not None:
- setattr(self,e,a)
-
-
- def get_model_lr(self):
- return self.model.get_model_lr()
-
-
- def compute_loss(self,*args, **kwargs):
- kwargs.update(dict(args))
- return self.model.compute_loss(**kwargs)
-
-
- def forward(self,*args, **kwargs):
- kwargs.update(dict(args))
- return self.compute_loss(**kwargs)
-
-
- def setup(self, stage: str) -> None:
- setattr(self.backbone, 'trainer', self.trainer)
- setattr(self.backbone, 'estimated_stepping_batches', self.trainer.estimated_stepping_batches)
-
-
- def get_named_parameters(self):
- training_args = self.training_args
- model_attrs = self.get_model_lr()
- no_decay = ["bias", "LayerNorm.weight"]
- def __get_named_parameters(a : nn.Module):
- return [
- {
- "params": [p for n, p in a.named_parameters() if not any(nd in n for nd in no_decay)],
- "weight_decay": training_args.weight_decay, "lr": lr,
- },
- {
- "params": [p for n, p in a.named_parameters() if any(nd in n for nd in no_decay)],
- "weight_decay": 0.0, "lr": lr,
- },
- ]
-
- opt = []
- a: nn.Module
- for a, lr in model_attrs:
- opt += __get_named_parameters(a)
- return opt
-
- def configure_optimizers(self):
- return configure_optimizers(self.get_named_parameters(), self.training_args,self.trainer.estimated_stepping_batches)
-
-
- def manual_backward(self,loss: Tensor, *args: Any, **kwargs: Any):
- if isinstance(loss,dict):
- loss = loss['loss']
- return super(TransformerLightningModule, self).manual_backward(loss)
-
-
-
- def adv_training_step(self,batch):
- mode = self.training_args.adv['mode']
- opt = self.optimizers()
- gradient_clip_val = self.gradient_clip_val
- epsilon = self.training_args.adv['epsilon']
- if mode == 'fgm':
- opt.zero_grad()
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
- self.adversarial.attack(epsilon=epsilon)
- loss = self.training_step_fn(batch)
- opt.zero_grad()
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- self.adversarial.restore() # 恢复embedding参数
- self.model.zero_grad()
- elif mode == 'fgsm_local':
- alpha = self.training_args.adv['alpha']
- opt.zero_grad()
- delta = torch.zeros((*batch['input_ids'].size()[:2], self.config.hidden_size), dtype=torch.float32).to(
- batch['input_ids'].device)
- def forward_fn(*args, **kwargs):
- embedding_output = self.embeddings_forward_fn(*args, **kwargs)
- embedding_output += delta
- return embedding_output
- setattr(self.get_embeddings_module().embeddings, 'forward', forward_fn)
- delta = self.adversarial.attack(is_first_attack=True, delta=delta,alpha=alpha,epsilon=epsilon)
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
-
- delta = self.adversarial.attack(delta=delta,alpha=alpha,epsilon=epsilon)
- loss = self.training_step_fn(batch)
- opt.zero_grad()
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- self.model.zero_grad()
-
- setattr(self.get_embeddings_module().embeddings, 'forward', self.embeddings_forward_fn)
- elif mode == 'fgsm':
- alpha = self.training_args.adv['alpha']
- self.adversarial.attack(is_first_attack=True,alpha=alpha,epsilon=epsilon)
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
-
- self.adversarial.attack(alpha=alpha,epsilon=epsilon)
- loss = self.training_step_fn(batch)
- opt.zero_grad()
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- self.adversarial.restore() # 恢复embedding参数
- self.model.zero_grad()
- elif mode == 'pgd':
- alpha = self.training_args.adv['alpha']
- opt.zero_grad()
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
-
- self.adversarial.backup_grad()
- attack_iters = self.training_args.adv['attack_iters']
- for t in range(attack_iters):
- self.adversarial.attack(is_first_attack=(t == 0),alpha=alpha,epsilon=epsilon)
- if t != attack_iters - 1:
- opt.zero_grad()
- else:
- self.adversarial.restore_grad()
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- self.adversarial.restore() # 恢复embedding参数
- opt.step()
- self.model.zero_grad()
- elif mode == 'free_local':
- if not hasattr(self.adversarial,'delta_'):
- setattr(self.adversarial,'delta_', torch.zeros((batch['input_ids'].size(0),self.config.max_position_embeddings, self.config.hidden_size),requires_grad=True).to(batch['input_ids'].device))
- delta = getattr(self.adversarial,'delta_')
- def forward_fn(*args, **kwargs):
- embedding_output = self.embeddings_forward_fn(*args, **kwargs)
- embedding_output += delta[:embedding_output.size(0),:embedding_output.size(1)]
- return embedding_output
-
- setattr(self.get_embeddings_module().embeddings, 'forward', forward_fn)
- for _ in range(self.training_args.adv['minibatch_replays']):
- delta.retain_grad()
- loss = self.training_step_fn(batch)
- opt.zero_grad()
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- delta = self.adversarial.attack(delta=delta,epsilon=epsilon)
- # delta.grad.zero_()
- self.model.zero_grad()
-
- setattr(self.get_embeddings_module().embeddings, 'forward', self.embeddings_forward_fn)
- elif mode == 'free':
- for _ in range(self.training_args.adv['minibatch_replays']):
- opt.zero_grad()
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- self.adversarial.attack(epsilon=epsilon)
- self.model.zero_grad()
- else:
- opt.zero_grad()
- loss = self.training_step_fn(batch)
- self.manual_backward(loss)
- if gradient_clip_val is not None:
- self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
- opt.step()
- self.model.zero_grad()
- return loss
-
- def training_step(self, batch):
- if isinstance(batch, dict):
- outputs = self.compute_loss(**batch)
- else:
- outputs = self.compute_loss(**dict(batch))
- loss = outputs[0]
- if isinstance(loss,dict):
- self.log_dict(loss,prog_bar=True)
- else:
- self.log('loss',loss,prog_bar=True)
- return loss
-
- def validation_step(self, batch, batch_idx, dataloader_idx=0):
- if isinstance(batch, dict):
- outputs = self.compute_loss(**batch)
- else:
- outputs = self.compute_loss(**dict(batch))
-
- loss = outputs[0]
- o = {}
- if loss is not None:
- if isinstance(loss, dict):
- o = loss
- if 'loss' in o:
- o['val_loss'] = o.pop('loss')
- else:
- o['val_loss'] = loss.cpu().detach().numpy()
-
- out = outputs[1:]
- if isinstance(out,(tuple,list)):
- o['outputs'] = []
- obj = o['outputs']
- for t in out:
- if t is None:
- obj.append(t)
- elif isinstance(t,torch.Tensor):
- obj.append(t.cpu().detach().numpy())
- elif isinstance(t, list) or isinstance(t, tuple):
- tmp_list =[_ for _ in t]
- for idx in range(len(tmp_list)):
- node = tmp_list[idx]
- if isinstance(node, torch.Tensor):
- tmp_list[idx] = node.cpu().detach().numpy()
- elif isinstance(node, list) or isinstance(node, tuple):
- tmp_list[idx] = [_.cpu().detach().numpy() for _ in node]
- else:
- raise ValueError('validation_step: outputs not support', type(t))
- obj.append(tmp_list)
- elif isinstance(t, dict):
- obj.append({k:v.cpu().detach().numpy() for k,v in t.items()})
- else:
- raise ValueError('validation_step: outputs not support', type(t))
- else:
- o['outputs'] = out.cpu().detach().numpy()
- return o
-
- def test_step(self, batch, batch_idx):
- if isinstance(batch, dict):
- outputs = self.compute_loss(**batch)
- else:
- outputs = self.compute_loss(**dict(batch))
- o = {}
- out = outputs
- if isinstance(out, (tuple, list)):
- o['outputs'] = []
- obj = o['outputs']
- for t in out:
- if t is None:
- obj.append(t)
- elif isinstance(t, torch.Tensor):
- obj.append(t.cpu().detach().numpy())
- elif isinstance(t, list) or isinstance(t, tuple):
- tmp_list =[_ for _ in t]
- for idx in range(len(tmp_list)):
- node = tmp_list[idx]
- if isinstance(node,torch.Tensor):
- tmp_list[idx] = node.cpu().detach().numpy()
- elif isinstance(node, list) or isinstance(node, tuple):
- tmp_list[idx] = [_.cpu().detach().numpy() for _ in node]
- else:
- raise ValueError('test_step: outputs not support', type(t))
- obj.append(tmp_list)
- elif isinstance(t, dict):
- obj.append({k: v.cpu().detach().numpy() for k, v in t.items()})
- else:
- raise ValueError('test_step: outputs not support',type(t))
- else:
- o['outputs'] = out.cpu().detach().numpy()
- return o
-
@@ -730,28 +129,18 @@ def __init__(self,*args: Any, **kwargs: Any):
-
-
-
-
-
class TransformerForTokenClassification(TransformerBase):
def __init__(self,*args: Any, **kwargs: Any):
super().__init__( *args, **kwargs)
self.set_model(self.from_pretrained(AutoModelForTokenClassification, *args, **kwargs))
-
-
-
class TransformerForMultipleChoice(TransformerBase):
def __init__(self,*args: Any, **kwargs: Any):
super().__init__( *args, **kwargs)
self.set_model(self.from_pretrained(AutoModelForMultipleChoice, *args, **kwargs))
-
-
class TransformerForNextSentencePrediction(TransformerBase):
def __init__(self,*args: Any, **kwargs: Any):
super().__init__( *args, **kwargs)
@@ -775,9 +164,6 @@ def __init__(self,*args: Any, **kwargs: Any):
self.set_model(self.from_pretrained(AutoModelForImageSegmentation, *args, **kwargs))
-
-
-
class TransformerForSemanticSegmentation(TransformerBase):
def __init__(self,*args: Any, **kwargs: Any):
@@ -785,8 +171,6 @@ def __init__(self,*args: Any, **kwargs: Any):
self.set_model(self.from_pretrained(AutoModelForSemanticSegmentation, *args, **kwargs))
-
-
class TransformerForObjectDetection(TransformerBase):
def __init__(self,*args: Any, **kwargs: Any):
super().__init__( *args, **kwargs)
diff --git a/nlp/models/transformer_base.py b/nlp/models/transformer_base.py
new file mode 100644
index 00000000..416d701b
--- /dev/null
+++ b/nlp/models/transformer_base.py
@@ -0,0 +1,615 @@
+# -*- coding: utf-8 -*-
+# @Time : 2023/4/11 14:35
+
+import sys
+import typing
+from functools import partial
+from typing import Any, IO
+
+import pytorch_lightning as pl
+import torch
+from torch import nn, Tensor
+from transformers import (
+ PretrainedConfig,
+)
+
+
+from ..utils import configure_optimizers, get_value_from_args
+from ..utils.adversarial import AdversarialMethods
+from ...data_helper import TrainingArguments, ModelArguments, PrefixModelArguments, DataArguments
+
+
+# class TransformerMeta(type):
+# def __new__(cls, name, base, attr,*args,**kwargs):
+# alter = tuple(b for b in base if issubclass(b,TransformerBase))
+# cls_ = super(TransformerMeta, cls).__new__(cls, name, (TransformerLightningModule,) + tuple(b for b in base if not issubclass(b, TransformerBase)), attr)
+# cls_.__BACKBONE_CLASS__ = alter
+# return cls_
+
+
+
+def verify_manual_optimization_support(trainer: "pl.Trainer", model: "pl.LightningModule") -> None:
+ if model.automatic_optimization:
+ return
+ trainer.gradient_clip_val = None
+ trainer.accumulate_grad_batches = 1
+
+
+class MyLightningModule(pl.LightningModule):
+ def __init__(self,*args: Any, **kwargs: Any):
+ super(MyLightningModule, self).__init__(*args,**kwargs)
+
+ @classmethod
+ def load_from_checkpoint(
+ cls,
+ checkpoint_path: typing.Union[str, IO],
+ map_location = None,
+ hparams_file: typing.Optional[str] = None,
+ strict: bool = True,
+ **kwargs: Any,
+ ) -> typing.Union["pl.LightningModule", "pl.LightningDataModule","MyLightningModule"]:
+ return super(MyLightningModule, cls).load_from_checkpoint(checkpoint_path,map_location,hparams_file,strict,**kwargs)
+
+ @property
+ def backbone(self):
+ return self.__model
+
+ @property
+ def model(self):
+ return self.__model
+
+ def convert_to_onnx(self, file_path,
+ input_sample=(
+ ("input_ids",torch.ones(size=(1, 128), dtype=torch.int32)),
+ ("attention_mask",torch.ones(size=(1, 128), dtype=torch.int32)),
+ ),
+ input_names=("input_ids", "attention_mask"),
+ output_names=("pred_ids",),
+ dynamic_axes=None or {"input_ids": [0, 1], "attention_mask": [0, 1], "pred_ids": [0, 1]},
+ opset_version=14,
+ verbose=True,
+ do_constant_folding=True
+ ):
+ self.eval()
+ self.to('cuda')
+ self.to_onnx(file_path,
+ input_sample=input_sample,
+ verbose=verbose,
+ opset_version=opset_version,
+ do_constant_folding=do_constant_folding,
+ input_names=input_names,
+ output_names=output_names,
+ dynamic_axes=dynamic_axes)
+
+
+
+class TransformerFakeMeta(type):
+ def __new__(cls, name, base, attr,*args,**kwargs):
+ base_new = tuple(b for b in base if b != MyLightningModule)
+ if name == 'TransformerBase':
+ base_new = base_new + (nn.Module,)
+ with_pl = kwargs.get('with_pl',False)
+ backbone_class = None
+ if with_pl:
+ backbone_class = tuple(b for b in base if issubclass(b, TransformerBase))
+ base_new = (TransformerLightningModule,) + tuple(b for b in base_new if not issubclass(b, TransformerBase))
+ cls_ = super(TransformerFakeMeta, cls).__new__(cls, name, base_new, attr)
+ if backbone_class is not None:
+ cls_.__BACKBONE_CLASS__ = backbone_class
+ return cls_
+
+
+
+class PreTrainedModel_Data:
+ base_model_prefix = None
+ config_class = None
+
+class TransformerBase(MyLightningModule,metaclass=TransformerFakeMeta):
+ def __init__(self,*args,**kwargs):
+ config = get_value_from_args('config',PretrainedConfig,*args,**kwargs)
+ super(TransformerBase, self).__init__()
+ self.config = config
+ self._premodel_data = PreTrainedModel_Data()
+ self._trainer: typing.Optional["pl.Trainer"] = None
+
+ def forward(self, *args, **batch):
+ return self.model(*args,**batch)
+
+ def compute_loss(self, *args,**batch) -> tuple:
+ return self.model(*args,**batch)
+
+ def post_init(self):
+ return self.model.post_init()
+
+ def init_weights(self):
+ return self.model.init_weights()
+
+ @property
+ def trainer(self):
+ return self._trainer
+
+ @trainer.setter
+ def trainer(self,trainer: typing.Optional["pl.Trainer"]):
+ self._trainer = trainer
+
+ @property
+ def current_epoch(self):
+ return self.trainer.current_epoch if self._trainer else 0
+
+ @property
+ def global_step(self):
+ return self.trainer.global_step if self._trainer else 0
+
+ @property
+ def max_epochs(self) -> typing.Optional[int]:
+ return self.trainer.max_epochs if self._trainer else 0
+
+ @property
+ def min_epochs(self) -> typing.Optional[int]:
+ return self.trainer.min_epochs if self._trainer else 0
+
+ @property
+ def max_steps(self) -> int:
+ return self.trainer.max_steps if self._trainer else 0
+
+ @property
+ def min_steps(self) -> int:
+ return self.trainer.min_steps if self._trainer else 0
+
+
+ def from_pretrained(self,CLS, *args, **kwargs):
+ config = get_value_from_args('config', PretrainedConfig, *args, **kwargs)
+ model_args = get_value_from_args('model_args', ModelArguments, *args, **kwargs)
+
+ if model_args.model_name_or_path:
+ args_new = tuple(v for v in args
+ if not isinstance(v, ModelArguments) and \
+ not isinstance(v, TrainingArguments) and \
+ not isinstance(v,PretrainedConfig) and \
+ not isinstance(v,PrefixModelArguments) and \
+ not isinstance(v,DataArguments)
+ )
+ kwargs_new = {k: v for k, v in kwargs.items()
+ if not isinstance(v, ModelArguments) and \
+ not isinstance(v, TrainingArguments) and \
+ not isinstance(v, PretrainedConfig) and \
+ not isinstance(v, PrefixModelArguments) and \
+ not isinstance(v, DataArguments)
+ }
+
+ model_kwargs = {
+ "cache_dir": model_args.cache_dir,
+ "revision": model_args.model_revision,
+ "use_auth_token": True if model_args.use_auth_token else None,
+ }
+ cls_ = CLS.from_pretrained(
+ model_args.model_name_or_path,
+ *args_new,
+ **kwargs_new,
+ from_tf=bool(".ckpt" in model_args.model_name_or_path),
+ config=config,
+ **model_kwargs
+ )
+ elif hasattr(CLS,'from_config'):
+ cls_ = CLS.from_config(config)
+ cls_.post_init()
+ elif hasattr(CLS, '_from_config'):
+ cls_ = CLS._from_config(config)
+ cls_.post_init()
+ else:
+ cls_ = CLS(config)
+ cls_.post_init()
+ return cls_
+
+ @property
+ def model(self):
+ if not self._premodel_data.base_model_prefix:
+ return None
+ return getattr(self, self._premodel_data.base_model_prefix,None)
+
+ @model.setter
+ def model(self, model):
+ self.set_model(model)
+
+ def set_model(self, model , copy_attr=True):
+ if copy_attr:
+ keep_keys = ['config_class','base_model_prefix']
+ for k in keep_keys:
+ o = getattr(model,k,None)
+ if o is None:
+ continue
+ if o == 'model':
+ o = 'model_'
+ setattr(self._premodel_data,k,o)
+
+ assert self._premodel_data.base_model_prefix is not None, ValueError('base_model_prefix is not allow empty')
+ setattr(self, self._premodel_data.base_model_prefix, model)
+
+ def get_model_lr(self):
+ return [(self.model if self._premodel_data.base_model_prefix is not None else self , self.config.task_specific_params['learning_rate']), ]
+
+
+
+class TransformerLightningModule(MyLightningModule):
+ def __init__(self, *args,**kwargs):
+ config = get_value_from_args('config',PretrainedConfig,*args,**kwargs)
+ model_args = get_value_from_args('model_args', ModelArguments, *args, **kwargs)
+ training_args = get_value_from_args('training_args', TrainingArguments, *args, **kwargs)
+ super(TransformerLightningModule, self).__init__()
+ if not hasattr(config, 'task_specific_params') or config.task_specific_params is None:
+ config.task_specific_params = {}
+ task_specific_params = config.task_specific_params
+ task_specific_params['learning_rate'] = training_args.learning_rate
+ task_specific_params['learning_rate_for_task'] = training_args.learning_rate_for_task \
+ if training_args.learning_rate_for_task is not None else training_args.learning_rate
+ print(training_args)
+ print(model_args)
+ if training_args.adv is not None and training_args.adv['mode'] != None:
+ assert training_args.adv['mode'] in AdversarialMethods.keys(), ValueError('no support adv mode {} , must be in {}'.format(training_args.adv['mode'],','.join(AdversarialMethods.keys())))
+ self.automatic_optimization = False
+
+ try:
+ self.save_hyperparameters(ignore=['config'])
+ except:
+ pass
+ self.config = config
+ self.model_args = model_args
+ self.training_args = training_args
+ self.__backbone : typing.Optional[TransformerBase] = None
+ if hasattr(self,'__BACKBONE_CLASS__') and len(self.__BACKBONE_CLASS__) > 0:
+ self.set_model(self.__BACKBONE_CLASS__[0](*args, **kwargs))
+
+
+ self.training_step_fn = self.training_step
+ self.embeddings_forward_fn = None
+
+ #对抗训练
+ if training_args.adv is not None and training_args.adv['mode'] is not None:
+ self.embeddings_forward_fn = self.get_embeddings_module().embeddings.forward
+
+ self.training_step = self.adv_training_step
+ if training_args.adv['mode'].find('local') != -1:
+ self.adversarial = AdversarialMethods[training_args.adv['mode']](model=self.model)
+ else:
+ self.adversarial = AdversarialMethods[training_args.adv['mode']](model=self.model,
+ emb_name=training_args.adv.get('emb_name', 'embedding'))
+ k = 'pytorch_lightning.trainer.configuration_validator'
+ if k in sys.modules:
+ setattr( sys.modules[k],'__verify_manual_optimization_support' , verify_manual_optimization_support)
+ else:
+ self.adversarial = None
+
+ self.gradient_clip_val = training_args.max_grad_norm
+
+ if training_args.hierarchical_position is not None and (training_args.hierarchical_position > 0 and training_args.hierarchical_position < 1):
+ #绝对位置编码 分层位置编码
+ def forward(cls,input: Tensor) -> Tensor:
+ # return F.embedding(
+ # input, self.weight, self.padding_idx, self.max_norm,
+ # self.norm_type, self.scale_grad_by_freq, self.sparse)
+ position_ids = input
+ alpha = training_args.hierarchical_position
+ embeddings = cls.weight - alpha * cls.weight[:1]
+ embeddings = embeddings / (1 - alpha)
+ x_idx = position_ids // cls.num_embeddings
+ y_idx = position_ids % cls.num_embeddings
+
+ embeddings_x = torch.index_select(embeddings,dim=0,index=x_idx.view(-1))
+ embeddings_y = torch.index_select(embeddings,dim=0,index=y_idx.view(-1))
+ embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y
+ return embeddings
+
+ position_embeddings = self.get_embeddings_module().embeddings.position_embeddings
+ position_embeddings.forward = partial(forward,position_embeddings)
+
+
+ def get_embeddings_module(self):
+ base_model_prefix = self.backbone.base_model_prefix
+ current_model = self.backbone.model
+ tmp_obj = current_model
+ while tmp_obj is not None:
+ if hasattr(tmp_obj, 'embeddings'):
+ return tmp_obj
+ current_model = tmp_obj
+ tmp_obj = getattr(current_model, base_model_prefix, None)
+ return tmp_obj
+
+ @property
+ def backbone(self):
+ return self.__backbone
+
+ @property
+ def model(self):
+ return self.__backbone
+
+ @model.setter
+ def model(self, model):
+ self.set_model(model)
+
+ def set_model(self, model , copy_attr=True):
+ assert model is not None
+ self.__backbone = model
+ if not copy_attr:
+ return
+
+ copy_attr = [
+ 'log','log_dict'
+ ]
+ for k in copy_attr:
+ setattr(self.__backbone, k, getattr(self, k))
+
+ event_ = [
+ 'configure_optimizers',
+ 'configure_gradient_clipping',
+ 'lr_scheduler_step',
+ 'optimizer_step',
+ 'optimizer_zero_grad',
+ ]
+ for e in event_:
+ a = getattr(self.__backbone, e, None)
+ if a is not None:
+ setattr(self,e,a)
+
+
+ def get_model_lr(self):
+ return self.model.get_model_lr()
+
+
+ def compute_loss(self,*args, **kwargs):
+ kwargs.update(dict(args))
+ return self.model.compute_loss(**kwargs)
+
+
+ def forward(self,*args, **kwargs):
+ kwargs.update(dict(args))
+ return self.compute_loss(**kwargs)
+
+
+ def setup(self, stage: str) -> None:
+ setattr(self.backbone, 'trainer', self.trainer)
+ setattr(self.backbone, 'estimated_stepping_batches', self.trainer.estimated_stepping_batches)
+
+
+ def get_named_parameters(self):
+ training_args = self.training_args
+ model_attrs = self.get_model_lr()
+ no_decay = ["bias", "LayerNorm.weight"]
+ def __get_named_parameters(a : nn.Module):
+ return [
+ {
+ "params": [p for n, p in a.named_parameters() if not any(nd in n for nd in no_decay)],
+ "weight_decay": training_args.weight_decay, "lr": lr,
+ },
+ {
+ "params": [p for n, p in a.named_parameters() if any(nd in n for nd in no_decay)],
+ "weight_decay": 0.0, "lr": lr,
+ },
+ ]
+
+ opt = []
+ a: nn.Module
+ for a, lr in model_attrs:
+ opt += __get_named_parameters(a)
+ return opt
+
+ def configure_optimizers(self):
+ return configure_optimizers(self.get_named_parameters(), self.training_args,self.trainer.estimated_stepping_batches)
+
+
+ def manual_backward(self,loss: Tensor, *args: Any, **kwargs: Any):
+ if isinstance(loss,dict):
+ loss = loss['loss']
+ return super(TransformerLightningModule, self).manual_backward(loss)
+
+
+
+ def adv_training_step(self,batch):
+ mode = self.training_args.adv['mode']
+ opt = self.optimizers()
+ gradient_clip_val = self.gradient_clip_val
+ epsilon = self.training_args.adv['epsilon']
+ if mode == 'fgm':
+ opt.zero_grad()
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+ self.adversarial.attack(epsilon=epsilon)
+ loss = self.training_step_fn(batch)
+ opt.zero_grad()
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ self.adversarial.restore() # 恢复embedding参数
+ self.model.zero_grad()
+ elif mode == 'fgsm_local':
+ alpha = self.training_args.adv['alpha']
+ opt.zero_grad()
+ delta = torch.zeros((*batch['input_ids'].size()[:2], self.config.hidden_size), dtype=torch.float32).to(
+ batch['input_ids'].device)
+ def forward_fn(*args, **kwargs):
+ embedding_output = self.embeddings_forward_fn(*args, **kwargs)
+ embedding_output += delta
+ return embedding_output
+ setattr(self.get_embeddings_module().embeddings, 'forward', forward_fn)
+ delta = self.adversarial.attack(is_first_attack=True, delta=delta,alpha=alpha,epsilon=epsilon)
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+
+ delta = self.adversarial.attack(delta=delta,alpha=alpha,epsilon=epsilon)
+ loss = self.training_step_fn(batch)
+ opt.zero_grad()
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ self.model.zero_grad()
+
+ setattr(self.get_embeddings_module().embeddings, 'forward', self.embeddings_forward_fn)
+ elif mode == 'fgsm':
+ alpha = self.training_args.adv['alpha']
+ self.adversarial.attack(is_first_attack=True,alpha=alpha,epsilon=epsilon)
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+
+ self.adversarial.attack(alpha=alpha,epsilon=epsilon)
+ loss = self.training_step_fn(batch)
+ opt.zero_grad()
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ self.adversarial.restore() # 恢复embedding参数
+ self.model.zero_grad()
+ elif mode == 'pgd':
+ alpha = self.training_args.adv['alpha']
+ opt.zero_grad()
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+
+ self.adversarial.backup_grad()
+ attack_iters = self.training_args.adv['attack_iters']
+ for t in range(attack_iters):
+ self.adversarial.attack(is_first_attack=(t == 0),alpha=alpha,epsilon=epsilon)
+ if t != attack_iters - 1:
+ opt.zero_grad()
+ else:
+ self.adversarial.restore_grad()
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ self.adversarial.restore() # 恢复embedding参数
+ opt.step()
+ self.model.zero_grad()
+ elif mode == 'free_local':
+ if not hasattr(self.adversarial,'delta_'):
+ setattr(self.adversarial,'delta_', torch.zeros((batch['input_ids'].size(0),self.config.max_position_embeddings, self.config.hidden_size),requires_grad=True).to(batch['input_ids'].device))
+ delta = getattr(self.adversarial,'delta_')
+ def forward_fn(*args, **kwargs):
+ embedding_output = self.embeddings_forward_fn(*args, **kwargs)
+ embedding_output += delta[:embedding_output.size(0),:embedding_output.size(1)]
+ return embedding_output
+
+ setattr(self.get_embeddings_module().embeddings, 'forward', forward_fn)
+ for _ in range(self.training_args.adv['minibatch_replays']):
+ delta.retain_grad()
+ loss = self.training_step_fn(batch)
+ opt.zero_grad()
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ delta = self.adversarial.attack(delta=delta,epsilon=epsilon)
+ # delta.grad.zero_()
+ self.model.zero_grad()
+
+ setattr(self.get_embeddings_module().embeddings, 'forward', self.embeddings_forward_fn)
+ elif mode == 'free':
+ for _ in range(self.training_args.adv['minibatch_replays']):
+ opt.zero_grad()
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ self.adversarial.attack(epsilon=epsilon)
+ self.model.zero_grad()
+ else:
+ opt.zero_grad()
+ loss = self.training_step_fn(batch)
+ self.manual_backward(loss)
+ if gradient_clip_val is not None:
+ self.clip_gradients(opt, gradient_clip_val=gradient_clip_val)
+ opt.step()
+ self.model.zero_grad()
+ return loss
+
+ def training_step(self, batch):
+ if isinstance(batch, dict):
+ outputs = self.compute_loss(**batch)
+ else:
+ outputs = self.compute_loss(**dict(batch))
+ loss = outputs[0]
+ if isinstance(loss,dict):
+ self.log_dict(loss,prog_bar=True)
+ else:
+ self.log('loss',loss,prog_bar=True)
+ return loss
+
+ def validation_step(self, batch, batch_idx, dataloader_idx=0):
+ if isinstance(batch, dict):
+ outputs = self.compute_loss(**batch)
+ else:
+ outputs = self.compute_loss(**dict(batch))
+
+ loss = outputs[0]
+ o = {}
+ if loss is not None:
+ if isinstance(loss, dict):
+ o = loss
+ if 'loss' in o:
+ o['val_loss'] = o.pop('loss')
+ else:
+ o['val_loss'] = loss.cpu().detach().numpy()
+
+ out = outputs[1:]
+ if isinstance(out,(tuple,list)):
+ o['outputs'] = []
+ obj = o['outputs']
+ for t in out:
+ if t is None:
+ obj.append(t)
+ elif isinstance(t,torch.Tensor):
+ obj.append(t.cpu().detach().numpy())
+ elif isinstance(t, list) or isinstance(t, tuple):
+ tmp_list =[_ for _ in t]
+ for idx in range(len(tmp_list)):
+ node = tmp_list[idx]
+ if isinstance(node, torch.Tensor):
+ tmp_list[idx] = node.cpu().detach().numpy()
+ elif isinstance(node, list) or isinstance(node, tuple):
+ tmp_list[idx] = [_.cpu().detach().numpy() for _ in node]
+ else:
+ raise ValueError('validation_step: outputs not support', type(t))
+ obj.append(tmp_list)
+ elif isinstance(t, dict):
+ obj.append({k:v.cpu().detach().numpy() for k,v in t.items()})
+ else:
+ raise ValueError('validation_step: outputs not support', type(t))
+ else:
+ o['outputs'] = out.cpu().detach().numpy()
+ return o
+
+ def test_step(self, batch, batch_idx):
+ if isinstance(batch, dict):
+ outputs = self.compute_loss(**batch)
+ else:
+ outputs = self.compute_loss(**dict(batch))
+ o = {}
+ out = outputs
+ if isinstance(out, (tuple, list)):
+ o['outputs'] = []
+ obj = o['outputs']
+ for t in out:
+ if t is None:
+ obj.append(t)
+ elif isinstance(t, torch.Tensor):
+ obj.append(t.cpu().detach().numpy())
+ elif isinstance(t, list) or isinstance(t, tuple):
+ tmp_list =[_ for _ in t]
+ for idx in range(len(tmp_list)):
+ node = tmp_list[idx]
+ if isinstance(node,torch.Tensor):
+ tmp_list[idx] = node.cpu().detach().numpy()
+ elif isinstance(node, list) or isinstance(node, tuple):
+ tmp_list[idx] = [_.cpu().detach().numpy() for _ in node]
+ else:
+ raise ValueError('test_step: outputs not support', type(t))
+ obj.append(tmp_list)
+ elif isinstance(t, dict):
+ obj.append({k: v.cpu().detach().numpy() for k, v in t.items()})
+ else:
+ raise ValueError('test_step: outputs not support',type(t))
+ else:
+ o['outputs'] = out.cpu().detach().numpy()
+ return o
\ No newline at end of file
diff --git a/setup.py b/setup.py
index f3385a44..adc9fd9d 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
ignore = ['test','tests']
setup(
name='deep_training',
- version='0.1.2rc0',
+ version='0.1.2',
description='an easy training architecture',
long_description='torch_training: https://github.com/ssbuild/deep_training.git',
license='Apache License 2.0',