Skip to content

Commit

Permalink
0.1.11
Browse files Browse the repository at this point in the history
Signed-off-by: ssbuild <[email protected]>
  • Loading branch information
ssbuild committed Jul 4, 2023
1 parent 396b4bb commit 09eca29
Show file tree
Hide file tree
Showing 7 changed files with 349 additions and 184 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@


## update
- <strong>2023-07-04</strong>
- 0.1.11 release
- fix baichuan and chatglm2 some bugs
- support conv2d for lora
- support arrow parquet dataset

- <strong>2023-06-06</strong>
- 0.1.11 rc0 add baichuan model 完整训练 [baichuan_finetuning](https://github.com/ssbuild/baichuan_finetuning)
- 0.1.11 rc1 add chatglm2 model 完整训练 [chatglm2_finetuning](https://github.com/ssbuild/chatglm2_finetuning)
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
ignore = ['test','tests']
setup(
name='deep_training',
version='0.1.11rc1',
version='0.1.11',
description='an easy training architecture',
long_description='torch_training: https://github.com/ssbuild/deep_training.git',
license='Apache License 2.0',
url='https://github.com/ssbuild/deep_training',
author='ssbuild',
author_email='[email protected]',
install_requires=['lightning>=2',
'numpy-io>=0.0.3 , < 0.1.0',
'numpy-io>=0.0.5 , < 0.1.0',
'sentencepiece',
'numpy',
'transformers>=4.22',
Expand Down
99 changes: 11 additions & 88 deletions src/data_helper/data_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@
import os
import typing
import torch
from fastdatasets import memory as MEMORY
from fastdatasets.common.iterable_dataset import IterableDatasetBase
from fastdatasets.common.random_dataset import RandomDatasetBase
from fastdatasets.torch_dataset import IterableDataset as torch_IterableDataset, Dataset as torch_Dataset
from torch.utils.data import DataLoader, IterableDataset
# from fastdatasets import memory as MEMORY
# from fastdatasets.common.iterable_dataset import IterableDatasetBase
# from fastdatasets.common.random_dataset import RandomDatasetBase
# from fastdatasets.torch_dataset import IterableDataset as torch_IterableDataset, Dataset as torch_Dataset
# from torch.utils.data import DataLoader, IterableDataset
from transformers import PreTrainedTokenizer, PretrainedConfig
from .training_args import ModelArguments, DataArguments, TrainingArguments
from ..utils.func import is_chinese_char
from numpy_io.core.writer import DataWriteHelper
from numpy_io.pytorch_loader.data_helper import DataHelperBase,load_tokenizer, load_configure
from numpy_io.core.writer import DataWriteHelper

__all__ = [
'DataHelper',
Expand Down Expand Up @@ -43,24 +43,23 @@ class DataHelper(DataHelperBase):
model_args: typing.Optional[ModelArguments] = None
training_args: typing.Optional[TrainingArguments] = None
data_args: typing.Optional[DataArguments] = None

def __init__(self,
model_args: ModelArguments,
training_args: typing.Optional[TrainingArguments] = None,
data_args: typing.Optional[DataArguments] = None,
**kwargs):
super(DataHelper, self).__init__()


self.train_files = []
self.eval_files = []
self.test_files = []
if data_args:
super(DataHelper, self).__init__(data_args.data_backend,data_args.convert_file,data_args.output_dir,data_args.intermediate_name)
else:
super(DataHelper, self).__init__(None, None, None, None)


self.label2id = None
self.id2label = None
self.max_seq_length_dict = {}
self._external_kwargs = kwargs
self.backend = data_args.data_backend if data_args else 'record'
self.model_args = model_args
self.training_args = training_args
self.data_args = data_args
Expand Down Expand Up @@ -253,82 +252,6 @@ def load_tokenizer_and_config(self,
return tokenizer, config


# 返回制作特征数据的中间文件
def get_intermediate_file(self, intermediate_name, mode):
data_args: DataArguments = self.data_args
if data_args.data_backend.startswith('memory'):
# 内存数据: list
intermediate_output = []
logging.info('make data {} {}...'.format(data_args.output_dir,
intermediate_name + '-' + mode + '.' + self.backend))
else:
# 本地文件数据: 文件名
intermediate_output = os.path.join(data_args.output_dir,
intermediate_name + '-' + mode + '.' + self.backend)
logging.info('make data {}...'.format(intermediate_output))
return intermediate_output


def make_dataset_with_args(self, input_files,
mode,
shuffle=False,
num_process_worker: int=0,
overwrite: bool=False,
mixed_data=True,
dupe_factor=1):
'''
mode: one of [ train , eval , test]
shuffle: whether shuffle data
num_process_worker: the number of mutiprocess
overwrite: whether overwrite data
mixed_data: Whether the mixed data
'''
logging.info('make_dataset {} {}...'.format(','.join(input_files),mode))
if mode == 'train':
contain_objs = self.train_files
elif mode == 'eval' or mode == 'val':
contain_objs = self.eval_files
elif mode == 'test' or mode == 'predict':
contain_objs = self.test_files
else:
raise ValueError('{} invalid '.format(mode))

if not input_files:
logging.info('input_files empty!')
return

data_args: DataArguments = self.data_args
for i in range(dupe_factor):

if data_args.convert_file:
if mixed_data:
intermediate_name = data_args.intermediate_name + '_dupe_factor_{}'.format(i)
intermediate_output = self.get_intermediate_file(intermediate_name, mode)

if isinstance(intermediate_output, list) or not os.path.exists(intermediate_output) or overwrite:
data = self.on_get_corpus(input_files, mode)
self.make_dataset(intermediate_output,
data,
mode,
num_process_worker=num_process_worker,
shuffle=shuffle)
contain_objs.append(intermediate_output)
else:
for fid,input_item in enumerate(input_files):
intermediate_name = data_args.intermediate_name + '_file_{}_dupe_factor_{}'.format(fid,i)
intermediate_output = self.get_intermediate_file(intermediate_name, mode)

if isinstance(intermediate_output, list) or not os.path.exists(intermediate_output) or overwrite:
data = self.on_get_corpus([input_item], mode)
self.make_dataset(intermediate_output,
data,
mode,
num_process_worker=num_process_worker,
shuffle=shuffle)
contain_objs.append(intermediate_output)

else:
for input_item in input_files:
contain_objs.append(input_item)


171 changes: 171 additions & 0 deletions src/nlp/layers/lora_v2/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import math
import sys
import warnings
from typing import Union, Tuple

import torch
from torch import nn
from torch.nn import functional as F
Expand Down Expand Up @@ -60,6 +62,7 @@ def __init__(
self,
in_features: int,
out_features: int,
**kwargs
):
self.r = {}
self.lora_alpha = {}
Expand All @@ -75,6 +78,7 @@ def __init__(
self.disable_adapters = False
self.in_features = in_features
self.out_features = out_features
self.kwargs = kwargs

def update_layer(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weights,dtype=None):
self.r[adapter_name] = r
Expand Down Expand Up @@ -116,6 +120,31 @@ def update_layer_embedding(self, adapter_name, r, lora_alpha, lora_dropout, init
self.reset_lora_parameters(adapter_name)
self.to(self.weight.device)

def update_layer_conv2d(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weights,dtype=None):
self.r[adapter_name] = r
self.lora_alpha[adapter_name] = lora_alpha
if lora_dropout > 0.0:
lora_dropout_layer = nn.Dropout(p=lora_dropout)
else:
lora_dropout_layer = nn.Identity()

self.lora_dropout.update(nn.ModuleDict({adapter_name: lora_dropout_layer}))
# Actual trainable parameters
if r > 0:
kernel_size = self.kwargs["kernel_size"]
stride = self.kwargs["stride"]
padding = self.kwargs["padding"]
self.lora_A.update(
nn.ModuleDict({adapter_name: nn.Conv2d(self.in_features, r, kernel_size, stride, padding, bias=False,dtype=dtype)})
)
self.lora_B.update(
nn.ModuleDict({adapter_name: nn.Conv2d(r, self.out_features, (1, 1), (1, 1), bias=False,dtype=dtype)})
)
self.scaling[adapter_name] = lora_alpha / r
if init_lora_weights:
self.reset_lora_parameters(adapter_name)
self.to(self.weight.device)

def reset_lora_parameters(self, adapter_name):
if adapter_name in self.lora_A.keys():
# initialize A the same way as the default for nn.Linear and B to zero
Expand Down Expand Up @@ -297,6 +326,148 @@ def forward(self, x: torch.Tensor):
return nn.Embedding.forward(self, x)



class Conv2d(nn.Conv2d, LoraLayer):
# Lora implemented in a conv2d layer
def __init__(
self,
adapter_name: str,
in_channels: int,
out_channels: int,
kernel_size: Union[int, Tuple[int]],
stride: Union[int, Tuple[int]] = 1,
padding: Union[int, Tuple[int]] = 0,
r: int = 0,
lora_alpha: int = 1,
lora_dropout: float = 0.0,
**kwargs,
):
init_lora_weights = kwargs.pop("init_lora_weights", True)

nn.Conv2d.__init__(self, in_channels, out_channels, kernel_size, stride, padding)
LoraLayer.__init__(
self,
in_features=in_channels,
out_features=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
)
# Freezing the pre-trained weight matrix
self.weight.requires_grad = False

nn.Conv2d.reset_parameters(self)
self.update_layer_conv2d(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights,dtype=kwargs.get('dtype',None))
self.active_adapter = adapter_name

def merge(self):
if self.active_adapter not in self.lora_A.keys():
return
if self.merged:
warnings.warn("Already merged. Nothing to do.")
return
if self.r[self.active_adapter] > 0:
# https://github.com/bmaltais/kohya_ss/blob/feb6728762a8f463d15ba936d189d4c3abfaa1ab/networks/lora.py#L117
if self.weight.size()[2:4] == (1, 1):
# conv2d 1x1
self.weight.data += (
self.lora_B[self.active_adapter].weight.squeeze(3).squeeze(2)
@ self.lora_A[self.active_adapter].weight.squeeze(3).squeeze(2)
).unsqueeze(2).unsqueeze(3) * self.scaling[self.active_adapter]
else:
# conv2d 3x3
self.weight.data += (
F.conv2d(
self.lora_A[self.active_adapter].weight.permute(1, 0, 2, 3),
self.lora_B[self.active_adapter].weight,
).permute(1, 0, 2, 3)
* self.scaling[self.active_adapter]
)
self.merged = True

def unmerge(self):
if self.active_adapter not in self.lora_A.keys():
return
if not self.merged:
warnings.warn("Already unmerged. Nothing to do.")
return
if self.r[self.active_adapter] > 0:
if self.weight.size()[2:4] == (1, 1):
# conv2d 1x1
self.weight.data -= (
self.lora_B[self.active_adapter].weight.squeeze(3).squeeze(2)
@ self.lora_A[self.active_adapter].weight.squeeze(3).squeeze(2)
).unsqueeze(2).unsqueeze(3) * self.scaling[self.active_adapter]
else:
# conv2d 3x3
self.weight.data += (
F.conv2d(
self.lora_A[self.active_adapter].weight.permute(1, 0, 2, 3),
self.lora_B[self.active_adapter].weight,
).permute(1, 0, 2, 3)
* self.scaling[self.active_adapter]
)
self.merged = False

def forward(self, x: torch.Tensor):
previous_dtype = x.dtype

if self.active_adapter not in self.lora_A.keys():
return F.conv2d(
x,
self.weight,
bias=self.bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
)
if self.disable_adapters:
if self.r[self.active_adapter] > 0 and self.merged:
self.unmerge()
result = F.conv2d(
x,
self.weight,
bias=self.bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
)
elif self.r[self.active_adapter] > 0 and not self.merged:
result = F.conv2d(
x,
self.weight,
bias=self.bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
)

x = x.to(self.lora_A[self.active_adapter].weight.dtype)

result += (
self.lora_B[self.active_adapter](
self.lora_A[self.active_adapter](self.lora_dropout[self.active_adapter](x))
)
* self.scaling[self.active_adapter]
)
else:
result = F.conv2d(
x,
self.weight,
bias=self.bias,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
groups=self.groups,
)

result = result.to(previous_dtype)

return result

if is_bnb_available():

class Linear8bitLt(bnb.nn.Linear8bitLt, LoraLayer):
Expand Down
Loading

0 comments on commit 09eca29

Please sign in to comment.