-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathrun_pretrain.py
143 lines (130 loc) · 6.48 KB
/
run_pretrain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import argparse
import torch
from exp.exp_pretrain import Exp_All_Task as Exp_All_Task_SSL
import random
import numpy as np
import wandb
from utils.ddp import is_main_process, init_distributed_mode
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='UniTS Pretrain')
parser.add_argument('--fix_seed', type=int, default=None, help='seed')
# basic config
parser.add_argument('--task_name', type=str, required=False, default='ALL_task',
help='task name')
parser.add_argument('--is_training', type=int,
required=True, default=1, help='status')
parser.add_argument('--model_id', type=str, required=True,
default='test', help='model id')
parser.add_argument('--model', type=str, required=True, default='UniTS',
help='model name')
# data loader
parser.add_argument('--data', type=str, required=False,
default='All', help='dataset type')
parser.add_argument('--features', type=str, default='M',
help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
parser.add_argument('--target', type=str, default='OT',
help='target feature in S or MS task')
parser.add_argument('--freq', type=str, default='h',
help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
parser.add_argument('--task_data_config_path', type=str,
default='exp/all_task_pretrain.yaml', help='root path of the task and data yaml file')
parser.add_argument('--subsample_pct', type=float,
default=None, help='subsample percent')
# pretrain
parser.add_argument('--right_prob', type=float,
default=1.0, help='right mask prob')
parser.add_argument('--min_mask_ratio', type=float,
default=0.5, help='min right mask prob')
parser.add_argument('--max_mask_ratio', type=float,
default=0.8, help='max right mask prob')
parser.add_argument('--min_keep_ratio', type=float, default=None,
help='min crop ratio for various length in pretraining')
# ddp
parser.add_argument('--local-rank', type=int, help='local rank')
parser.add_argument("--dist_url", default="env://", type=str, help="""url used to set up
distributed training; see https://pytorch.org/docs/stable/distributed.html""")
parser.add_argument('--num_workers', type=int, default=0,
help='data loader num workers')
# optimization
parser.add_argument('--itr', type=int, default=1, help='experiments times')
parser.add_argument('--train_epochs', type=int,
default=10, help='train epochs')
parser.add_argument('--warmup_epochs', type=int,
default=0, help='warmup epochs')
parser.add_argument('--batch_size', type=int, default=32,
help='batch size of train input data')
parser.add_argument('--acc_it', type=int, default=32,
help='acc iteration to enlarge batch size')
parser.add_argument('--learning_rate', type=float,
default=0.0001, help='optimizer learning rate')
parser.add_argument('--min_lr', type=float, default=1e-6,
help='optimizer learning rate')
parser.add_argument('--beta2', type=float,
default=0.999, help='optimizer beta2')
parser.add_argument('--weight_decay', type=float,
default=0.0, help='optimizer weight decay')
parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
parser.add_argument('--eps', type=float, default=1e-08,
help='eps for optimizer')
parser.add_argument('--des', type=str, default='test',
help='exp description')
parser.add_argument('--debug', type=str,
default='enabled', help='disabled')
parser.add_argument('--clip_grad', type=float, default=None, help="""Maximal parameter
gradient norm if using gradient clipping.""")
parser.add_argument('--checkpoints', type=str,
default='./checkpoints/', help='location of model checkpoints')
parser.add_argument("--memory_check", action="store_true", default=True)
parser.add_argument("--large_model", action="store_true", default=True)
# model settings
parser.add_argument('--d_model', type=int, default=512,
help='dimension of model')
parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
parser.add_argument('--e_layers', type=int, default=2,
help='num of encoder layers')
parser.add_argument("--patch_len", type=int, default=16)
parser.add_argument("--stride", type=int, default=8)
parser.add_argument("--prompt_num", type=int, default=10)
args = parser.parse_args()
init_distributed_mode(args)
print('Args in experiment:')
print(args)
if args.fix_seed is not None:
random.seed(args.fix_seed)
torch.manual_seed(args.fix_seed)
np.random.seed(args.fix_seed)
exp_name = '{}_{}_{}_{}_ft{}_dm{}_el{}_{}'.format(
args.task_name,
args.model_id,
args.model,
args.data,
args.features,
args.d_model,
args.e_layers,
args.des)
if is_main_process():
wandb.init(
name=exp_name,
# set the wandb project where this run will be logged
project="pretrain",
# track hyperparameters and run metadata
config=args,
mode=args.debug,
)
Exp = Exp_All_Task_SSL
if args.is_training:
for ii in range(args.itr):
# setting record of experiments
setting = '{}_{}_{}_{}_ft{}_dm{}_el{}_{}_{}'.format(
args.task_name,
args.model_id,
args.model,
args.data,
args.features,
args.d_model,
args.e_layers,
args.des, ii)
exp = Exp(args) # set experiments
print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
exp.train(setting)
torch.cuda.empty_cache()