forked from PaddlePaddle/Paddle3D
-
Notifications
You must be signed in to change notification settings - Fork 0
/
petr_vovnet_gridmask_p4_800x320_amp.yml
177 lines (170 loc) · 4.63 KB
/
petr_vovnet_gridmask_p4_800x320_amp.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
batch_size: 1
epochs: 24
amp_cfg:
use_amp: True
# only enable backbone and fpn
enable: False
level: O1
scaler:
init_loss_scaling: 512.0
train_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_train.pkl
mode: train
class_names: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: LoadAnnotations3D
with_bbox_3d: True
with_label_3d: True
- type: SampleRangeFilter
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
- type: SampleNameFilter
classes: [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.47, 0.625]
final_dim: [320, 800]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: True
- type: GlobalRotScaleTransImage
rot_range: [-0.3925, 0.3925]
translation_std: [0, 0, 0]
scale_ratio_range: [0.95, 1.05]
reverse_angle: True
training: True
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [57.375, 57.120, 58.395]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['gt_bboxes_3d', 'gt_labels_3d', 'img']
val_dataset:
type: NuscenesMVDataset
dataset_root: data/nuscenes/
ann_file: data/nuscenes/petr_nuscenes_annotation_val.pkl
mode: val
class_names: ['car', 'truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian',
'traffic_cone']
transforms:
- type: LoadMultiViewImageFromFiles
to_float32: True
- type: ResizeCropFlipImage
sample_aug_cfg:
resize_lim: [0.47, 0.625]
final_dim: [320, 800]
bot_pct_lim: [0.0, 0.0]
rot_lim: [0.0, 0.0]
H: 900
W: 1600
rand_flip: True
training: False
- type: NormalizeMultiviewImage
mean: [103.530, 116.280, 123.675]
std: [57.375, 57.120, 58.395]
- type: PadMultiViewImage
size_divisor: 32
- type: SampleFilerByKey
keys: ['img']
model:
type: Petr3D
use_recompute: True
use_grid_mask: True
backbone:
# use recompute to save memory
type: VoVNetCP
spec_name: V-99-eSE
norm_eval: True
frozen_stages: -1
input_ch: 3
out_features: ('stage4','stage5',)
neck:
# remove unused parameters
type: CPFPN
in_channels: [768, 1024]
out_channels: 256
num_outs: 2
pts_bbox_head:
type: PETRHead
num_classes: 10
in_channels: 256
num_query: 900
LID: true
with_multiview: true
position_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
normedlinear: False
transformer:
type: PETRTransformer
decoder_embed_dims: 256
decoder:
type: PETRTransformerDecoder
return_intermediate: True
num_layers: 6
transformerlayers:
type: PETRTransformerDecoderLayer
attns:
- type: MultiHeadAttention
embed_dims: 256
num_heads: 8
attn_drop: 0.1
drop_prob: 0.1
- type: PETRMultiheadAttention
embed_dims: 256
num_heads: 8
attn_drop: 0.1
drop_prob: 0.1
batch_first: True
feedforward_channels: 2048
ffn_dropout: 0.1
operation_order: ['self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm']
positional_encoding:
type: SinePositionalEncoding3D
num_feats: 128
normalize: True
bbox_coder:
type: NMSFreeCoder
post_center_range: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]
point_cloud_range: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
max_num: 300
voxel_size: [0.2, 0.2, 8]
num_classes: 10
loss_cls:
type: WeightedFocalLoss
gamma: 2.0
alpha: 0.25
loss_weight: 2.0
reduction: sum
loss_bbox:
type: WeightedL1Loss
loss_weight: 0.25
reduction: sum
optimizer:
type: AdamW
weight_decay: 0.01
grad_clip:
type: ClipGradByGlobalNorm
clip_norm: 35
lr_scheduler:
type: LinearWarmup
learning_rate:
type: CosineAnnealingDecay
learning_rate: 0.0002
T_max: 84408 # 3517 * 24e
eta_min: 0.0000002
warmup_steps: 500
start_lr: 0.0
end_lr: 0.0002