-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
157 lines (143 loc) · 6.09 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
data_dir = '.'
task_name = 'berts'
output_dir = 'bert_f1'
bert_dir = '/data/jliu/data/BertModel/bert-base-cased/'
bert_config_file = bert_dir + 'config.json'
vocab_file = bert_dir + 'vocab.txt'
do_lower_case = False
num_train_epochs = 22
train_batch_size = 10
eval_batch_size = 24
max_seq_length = 512
learning_rate = 5e-5
warmup_proportion = 0.1
gradient_accumulation_steps = 1
seed = 1234
do_train = True
do_eval = True
resume = False
# python run_classifier.py
# --task_name bert
# --do_train
# --do_eval
# --data_dir .
# --vocab_file $BERT_BASE_DIR/vocab.txt
# --bert_config_file $BERT_BASE_DIR/bert_config.json
# --init_checkpoint $BERT_BASE_DIR/pytorch_model.bin
# --max_seq_length 512
# --train_batch_size 24
# --learning_rate 3e-5
# --num_train_epochs 20.0
# --output_dir bert_f1
# --gradient_accumulation_steps 2
# parser.add_argument("
# --data_dir",
# default=None,
# type=str,
# required=True,
# help="The input data dir. Should contain the .tsv files (or other data files) for the task.")
# parser.add_argument("
## --bert_config_file",
# default=None,
# type=str,
# required=True,
# help="The config json file corresponding to the pre-trained BERT model. \n"
# "This specifies the model architecture.")
# parser.add_argument("--task_name",
# default=None,
# type=str,
# required=True,
# help="The name of the task to train.")
# parser.add_argument("--vocab_file",
# default=None,
# type=str,
# required=True,
# help="The vocabulary file that the BERT model was trained on.")
# parser.add_argument("--output_dir",
# default=None,
# type=str,
# required=True,
# help="The output directory where the model checkpoints will be written.")
# parser.add_argument("--init_checkpoint",
# default=None,
# type=str,
# help="Initial checkpoint (usually from a pre-trained BERT model).")
# parser.add_argument("--do_lower_case",
# default=False,
# action='store_true',
# help="Whether to lower case the input text. True for uncased models, False for cased models.")
# parser.add_argument("--max_seq_length",
# default=128,
# type=int,
# help="The maximum total input sequence length after WordPiece tokenization. \n"
# "Sequences longer than this will be truncated, and sequences shorter \n"
# "than this will be padded.")
# parser.add_argument("--do_train",
# default=False,
# action='store_true',
# help="Whether to run training.")
# parser.add_argument("--do_eval",
# default=False,
# action='store_true',
# help="Whether to run eval on the dev set.")
# parser.add_argument("--train_batch_size",
# default=32,
# type=int,
# help="Total batch size for training.")
# parser.add_argument("--eval_batch_size",
# default=32,
# type=int,
# help="Total batch size for eval.")
# parser.add_argument("--learning_rate",
# default=5e-5,
# type=float,
# help="The initial learning rate for Adam.")
# parser.add_argument("--num_train_epochs",
# default=3.0,
# type=float,
# help="Total number of training epochs to perform.")
# parser.add_argument("--warmup_proportion",
# default=0.1,
# type=float,
# help="Proportion of training to perform linear learning rate warmup for. "
# "E.g., 0.1 = 10%% of training.")
# parser.add_argument("--save_checkpoints_steps",
# default=1000,
# type=int,
# help="How often to save the model checkpoint.")
# parser.add_argument("--no_cuda",
# default=False,
# action='store_true',
# help="Whether not to use CUDA when available")
# parser.add_argument("--local_rank",
# type=int,
# default=-1,
# help="local_rank for distributed training on gpus")
# parser.add_argument('--seed',
# type=int,
# default=666,
# help="random seed for initialization")
# parser.add_argument('--gradient_accumulation_steps',
# type=int,
# default=2,
# help="Number of updates steps to accumualte before performing a backward/update pass.")
# parser.add_argument('--optimize_on_cpu',
# default=False,
# action='store_true',
# help="Whether to perform optimization and keep the optimizer averages on CPU")
# parser.add_argument('--fp16',
# default=False,
# action='store_true',
# help="Whether to use 16-bit float precision instead of 32-bit")
# parser.add_argument('--loss_scale',
# type=float, default=128,
# help='Loss scaling, positive power of 2 values can improve fp16 convergence.')
# parser.add_argument("--resume",
# default=False,
# action='store_true',
# help="Whether to resume the training.")
# parser.add_argument("--f1eval",
# default=True,
# action='store_true',
# help="Whether to use f1 for dev evaluation during training.")
# args = parser.parse_args()