From b10c39acfbaf4fbfc0563746c5dd9ceef3b7a17c Mon Sep 17 00:00:00 2001 From: JingweiZhang12 Date: Tue, 20 Jun 2023 15:09:32 +0800 Subject: [PATCH 01/25] support training dsvt --- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 97 ++++- projects/DSVT/dsvt/dsvt.py | 6 +- projects/DSVT/dsvt/dsvt_head.py | 367 +++++++++++++++++- projects/DSVT/dsvt/utils.py | 141 ++++++- 4 files changed, 594 insertions(+), 17 deletions(-) diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index ac7a38b9bc..f9288f09e3 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -88,25 +88,28 @@ loss_cls=dict( type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0), loss_bbox=dict(type='mmdet.L1Loss', reduction='mean', loss_weight=2.0), + loss_iou=dict(type='mmdet.L1Loss', reduction='none', loss_weight=1.0), + loss_reg_iou=dict( + type='mmdet3d.DIoU3DLoss', reduction='mean', loss_weight=2.0), norm_bbox=True), # model training and testing settings train_cfg=dict( - pts=dict( - grid_size=grid_size, - voxel_size=voxel_size, - out_size_factor=4, - dense_reg=1, - gaussian_overlap=0.1, - max_objs=500, - min_radius=2, - code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0])), + grid_size=grid_size, + voxel_size=voxel_size, + point_cloud_range=point_cloud_range, + out_size_factor=1, + dense_reg=1, + gaussian_overlap=0.1, + max_objs=500, + min_radius=2, + code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]), test_cfg=dict( max_per_img=500, max_pool_nms=False, min_radius=[4, 12, 10, 1, 0.85, 0.175], iou_rectifier=[[0.68, 0.71, 0.65]], pc_range=[-80, -80], - out_size_factor=4, + out_size_factor=1, voxel_size=voxel_size[:2], nms_type='rotate', multi_class_nms=True, @@ -149,11 +152,16 @@ # remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler), + dict( + type='RandomFlip3D', + sync_2d=False, + flip_ratio_bev_horizontal=0.5, + flip_ratio_bev_vertical=0.5), dict( type='GlobalRotScaleTrans', rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05], - translation_std=[0.5, 0.5, 0]), + translation_std=[0.5, 0.5, 0.5]), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectNameFilter', classes=class_names), @@ -191,6 +199,26 @@ ] dataset_type = 'WaymoDataset' +train_dataloader = dict( + batch_size=1, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='waymo_infos_train.pkl', + data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), + pipeline=train_pipeline, + modality=input_modality, + test_mode=False, + metainfo=metainfo, + # we use box_type_3d='LiDAR' in kitti and nuscenes dataset + # and box_type_3d='Depth' in sunrgbd and scannet dataset. + box_type_3d='LiDAR', + # load one frame every five frames + load_interval=5, + backend_args=backend_args)) val_dataloader = dict( batch_size=4, num_workers=4, @@ -223,6 +251,52 @@ vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') +lr = 1e-5 +# This schedule is mainly used by models on nuScenes dataset +# max_norm=10 is better for SECOND +optim_wrapper = dict( + type='OptimWrapper', + optimizer=dict(type='Adam', lr=lr, weight_decay=0.05, betas=(0.9, 0.999)), + clip_grad=dict(max_norm=10, norm_type=2)) +# learning rate +param_scheduler = [ + dict( + type='CosineAnnealingLR', + T_max=1.2, + eta_min=lr * 100, + begin=0, + end=1.2, + by_epoch=True, + convert_to_iter_based=True), + dict( + type='CosineAnnealingLR', + T_max=10.8, + eta_min=lr * 1e-4, + begin=1.2, + end=12, + by_epoch=True, + convert_to_iter_based=True), + # momentum scheduler + dict( + type='CosineAnnealingMomentum', + T_max=1.2, + eta_min=0.85, + begin=0, + end=1.2, + by_epoch=True, + convert_to_iter_based=True), + dict( + type='CosineAnnealingMomentum', + T_max=10.8, + eta_min=0.95, + begin=1.2, + end=12, + by_epoch=True, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=12) # runtime settings val_cfg = dict() @@ -237,3 +311,4 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), checkpoint=dict(type='CheckpointHook', interval=5)) +custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] diff --git a/projects/DSVT/dsvt/dsvt.py b/projects/DSVT/dsvt/dsvt.py index 3c5405679f..e6f6ceffbe 100644 --- a/projects/DSVT/dsvt/dsvt.py +++ b/projects/DSVT/dsvt/dsvt.py @@ -103,7 +103,11 @@ def loss(self, batch_inputs_dict: Dict[List, torch.Tensor], Returns: dict[str, Tensor]: A dictionary of loss components. """ - pass + pts_feats = self.extract_feat(batch_inputs_dict) + losses = dict() + loss = self.bbox_head.loss(pts_feats, batch_data_samples) + losses.update(loss) + return losses def predict(self, batch_inputs_dict: Dict[str, Optional[Tensor]], batch_data_samples: List[Det3DDataSample], diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 35167919f0..2d7d8d03db 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -1,12 +1,15 @@ from typing import Dict, List, Tuple import torch +from mmcv.ops import boxes_iou3d from mmdet.models.utils import multi_apply from mmengine.structures import InstanceData from torch import Tensor from mmdet3d.models import CenterHead from mmdet3d.models.layers import circle_nms, nms_bev +from mmdet3d.models.utils import (clip_sigmoid, draw_heatmap_gaussian, + gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr @@ -18,8 +21,16 @@ class DSVTCenterHead(CenterHead): This head adds IoU prediction branch based on the original CenterHead. """ - def __init__(self, *args, **kwargs): + def __init__(self, + *args, + loss_iou=dict( + type='mmdet.L1Loss', reduction='mean', loss_weight=1), + loss_reg_iou=None, + **kwargs): super(DSVTCenterHead, self).__init__(*args, **kwargs) + self.loss_iou = MODELS.build(loss_iou) + self.loss_iou_reg = MODELS.build( + loss_reg_iou) if loss_reg_iou is not None else None def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. @@ -66,7 +77,296 @@ def loss(self, pts_feats: List[Tensor], Returns: dict: Losses of each branch. """ - pass + outs = self(pts_feats) + batch_gt_instance_3d = [] + for data_sample in batch_data_samples: + batch_gt_instance_3d.append(data_sample.gt_instances_3d) + losses = self.loss_by_feat(outs, batch_gt_instance_3d) + return losses + + def _decode_all_preds(self, + pred_dict, + point_cloud_range=None, + voxel_size=None): + batch_size, _, H, W = pred_dict['reg'].shape + + batch_center = pred_dict['reg'].permute(0, 2, 3, 1).contiguous().view( + batch_size, H * W, 2) # (B, H, W, 2) + batch_center_z = pred_dict['height'].permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_dim = pred_dict['dim'].exp().permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 3) # (B, H, W, 3) + batch_rot_cos = pred_dict['rot'][:, 0].unsqueeze(dim=1).permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_rot_sin = pred_dict['rot'][:, 1].unsqueeze(dim=1).permute( + 0, 2, 3, 1).contiguous().view(batch_size, H * W, 1) # (B, H, W, 1) + batch_vel = pred_dict['vel'].permute(0, 2, 3, 1).contiguous().view( + batch_size, H * W, 2) if 'vel' in pred_dict.keys() else None + + angle = torch.atan2(batch_rot_sin, batch_rot_cos) # (B, H*W, 1) + + ys, xs = torch.meshgrid([ + torch.arange( + 0, H, device=batch_center.device, dtype=batch_center.dtype), + torch.arange( + 0, W, device=batch_center.device, dtype=batch_center.dtype) + ]) + ys = ys.view(1, H, W).repeat(batch_size, 1, 1) + xs = xs.view(1, H, W).repeat(batch_size, 1, 1) + xs = xs.view(batch_size, -1, 1) + batch_center[:, :, 0:1] + ys = ys.view(batch_size, -1, 1) + batch_center[:, :, 1:2] + + xs = xs * voxel_size[0] + point_cloud_range[0] + ys = ys * voxel_size[1] + point_cloud_range[1] + + box_part_list = [xs, ys, batch_center_z, batch_dim, angle] + if batch_vel is not None: + box_part_list.append(batch_vel) + + box_preds = torch.cat((box_part_list), + dim=-1).view(batch_size, H, W, -1) + + return box_preds + + def _transpose_and_gather_feat(self, feat, ind): + feat = feat.permute(0, 2, 3, 1).contiguous() + feat = feat.view(feat.size(0), -1, feat.size(3)) + feat = self._gather_feat(feat, ind) + return feat + + def calc_iou_loss(self, iou_preds, batch_box_preds, mask, ind, gt_boxes): + """ + Args: + iou_preds: (batch x 1 x h x w) + batch_box_preds: (batch x (7 or 9) x h x w) + mask: (batch x max_objects) + ind: (batch x max_objects) + gt_boxes: List of batch groundtruth boxes. + + Returns: + """ + if mask.sum() == 0: + return iou_preds.new_zeros((1)) + + mask = mask.bool() + selected_iou_preds = self._transpose_and_gather_feat(iou_preds, + ind)[mask] + + selected_box_preds = self._transpose_and_gather_feat( + batch_box_preds, ind)[mask] + gt_boxes = torch.cat(gt_boxes) + assert gt_boxes.size(0) == selected_box_preds.size(0) + iou_target = boxes_iou3d(selected_box_preds[:, 0:7], gt_boxes[:, 0:7]) + iou_target = torch.diag(iou_target).view(-1) + iou_target = iou_target * 2 - 1 # [0, 1] ==> [-1, 1] + + loss = self.loss_iou(selected_iou_preds.view(-1), iou_target) + return loss + + def calc_iou_reg_loss(self, batch_box_preds, mask, ind, gt_boxes): + if mask.sum() == 0: + return batch_box_preds.new_zeros((1)) + + mask = mask.bool() + + selected_box_preds = self._transpose_and_gather_feat( + batch_box_preds, ind)[mask] + gt_boxes = torch.cat(gt_boxes) + assert gt_boxes.size(0) == selected_box_preds.size(0) + loss = self.loss_iou_reg(selected_box_preds[:, 0:7], gt_boxes[:, 0:7]) + + return loss + + def get_targets( + self, + batch_gt_instances_3d: List[InstanceData], + ) -> Tuple[List[Tensor]]: + """Generate targets. + + How each output is transformed: + + Each nested list is transposed so that all same-index elements in + each sub-list (1, ..., N) become the new sub-lists. + [ [a0, a1, a2, ... ], [b0, b1, b2, ... ], ... ] + ==> [ [a0, b0, ... ], [a1, b1, ... ], [a2, b2, ... ] ] + + The new transposed nested list is converted into a list of N + tensors generated by concatenating tensors in the new sub-lists. + [ tensor0, tensor1, tensor2, ... ] + + Args: + batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of + gt_instances. It usually includes ``bboxes_3d`` and\ + ``labels_3d`` attributes. + + Returns: + Returns: + tuple[list[torch.Tensor]]: Tuple of target including + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the + position of the valid boxes. + - list[torch.Tensor]: Masks indicating which + boxes are valid. + """ + heatmaps, anno_boxes, inds, masks, task_gt_bboxes = multi_apply( + self.get_targets_single, batch_gt_instances_3d) + # Transpose heatmaps + heatmaps = list(map(list, zip(*heatmaps))) + heatmaps = [torch.stack(hms_) for hms_ in heatmaps] + # Transpose anno_boxes + anno_boxes = list(map(list, zip(*anno_boxes))) + anno_boxes = [torch.stack(anno_boxes_) for anno_boxes_ in anno_boxes] + # Transpose inds + inds = list(map(list, zip(*inds))) + inds = [torch.stack(inds_) for inds_ in inds] + # Transpose inds + masks = list(map(list, zip(*masks))) + masks = [torch.stack(masks_) for masks_ in masks] + # Transpose inds + task_gt_bboxes = list(map(list, zip(*task_gt_bboxes))) + return heatmaps, anno_boxes, inds, masks, task_gt_bboxes + + def get_targets_single(self, + gt_instances_3d: InstanceData) -> Tuple[Tensor]: + """Generate training targets for a single sample. + + Args: + gt_instances_3d (:obj:`InstanceData`): Gt_instances of + single data sample. It usually includes + ``bboxes_3d`` and ``labels_3d`` attributes. + + Returns: + tuple[list[torch.Tensor]]: Tuple of target including + the following results in order. + + - list[torch.Tensor]: Heatmap scores. + - list[torch.Tensor]: Ground truth boxes. + - list[torch.Tensor]: Indexes indicating the position + of the valid boxes. + - list[torch.Tensor]: Masks indicating which boxes + are valid. + """ + gt_labels_3d = gt_instances_3d.labels_3d + gt_bboxes_3d = gt_instances_3d.bboxes_3d + device = gt_labels_3d.device + gt_bboxes_3d = torch.cat( + (gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]), + dim=1).to(device) + max_objs = self.train_cfg['max_objs'] * self.train_cfg['dense_reg'] + grid_size = torch.tensor(self.train_cfg['grid_size']).to(device) + pc_range = torch.tensor(self.train_cfg['point_cloud_range']) + voxel_size = torch.tensor(self.train_cfg['voxel_size']) + + feature_map_size = grid_size[:2] // self.train_cfg['out_size_factor'] + + # reorganize the gt_dict by tasks + task_masks = [] + flag = 0 + for class_name in self.class_names: + task_masks.append([ + torch.where(gt_labels_3d == class_name.index(i) + flag) + for i in class_name + ]) + flag += len(class_name) + + task_boxes = [] + task_classes = [] + flag2 = 0 + for idx, mask in enumerate(task_masks): + task_box = [] + task_class = [] + for m in mask: + task_box.append(gt_bboxes_3d[m]) + # 0 is background for each task, so we need to add 1 here. + task_class.append(gt_labels_3d[m] + 1 - flag2) + task_boxes.append(torch.cat(task_box, axis=0).to(device)) + task_classes.append(torch.cat(task_class).long().to(device)) + flag2 += len(mask) + draw_gaussian = draw_heatmap_gaussian + heatmaps, anno_boxes, inds, masks = [], [], [], [] + + for idx, task_head in enumerate(self.task_heads): + heatmap = gt_bboxes_3d.new_zeros( + (len(self.class_names[idx]), feature_map_size[1], + feature_map_size[0])) + + anno_box = gt_bboxes_3d.new_zeros((max_objs, 8), + dtype=torch.float32) + + ind = gt_labels_3d.new_zeros((max_objs), dtype=torch.int64) + mask = gt_bboxes_3d.new_zeros((max_objs), dtype=torch.uint8) + + num_objs = min(task_boxes[idx].shape[0], max_objs) + + for k in range(num_objs): + cls_id = task_classes[idx][k] - 1 + + length = task_boxes[idx][k][3] + width = task_boxes[idx][k][4] + length = length / voxel_size[0] / self.train_cfg[ + 'out_size_factor'] + width = width / voxel_size[1] / self.train_cfg[ + 'out_size_factor'] + + if width > 0 and length > 0: + radius = gaussian_radius( + (width, length), + min_overlap=self.train_cfg['gaussian_overlap']) + radius = max(self.train_cfg['min_radius'], int(radius)) + + # be really careful for the coordinate system of + # your box annotation. + x, y, z = task_boxes[idx][k][0], task_boxes[idx][k][ + 1], task_boxes[idx][k][2] + + coor_x = ( + x - pc_range[0] + ) / voxel_size[0] / self.train_cfg['out_size_factor'] + coor_y = ( + y - pc_range[1] + ) / voxel_size[1] / self.train_cfg['out_size_factor'] + + center = torch.tensor([coor_x, coor_y], + dtype=torch.float32, + device=device) + center_int = center.to(torch.int32) + + # throw out not in range objects to avoid out of array + # area when creating the heatmap + if not (0 <= center_int[0] < feature_map_size[0] + and 0 <= center_int[1] < feature_map_size[1]): + continue + + draw_gaussian(heatmap[cls_id], center_int, radius) + + new_idx = k + x, y = center_int[0], center_int[1] + + assert (y * feature_map_size[0] + x < + feature_map_size[0] * feature_map_size[1]) + + ind[new_idx] = y * feature_map_size[0] + x + mask[new_idx] = 1 + # TODO: support other outdoor dataset + rot = task_boxes[idx][k][6] + box_dim = task_boxes[idx][k][3:6] + if self.norm_bbox: + box_dim = box_dim.log() + anno_box[new_idx] = torch.cat([ + center - torch.tensor([x, y], device=device), + z.unsqueeze(0), box_dim, + torch.sin(rot).unsqueeze(0), + torch.cos(rot).unsqueeze(0) + ]) + + heatmaps.append(heatmap) + anno_boxes.append(anno_box) + masks.append(mask) + inds.append(ind) + return heatmaps, anno_boxes, inds, masks, task_boxes def loss_by_feat(self, preds_dicts: Tuple[List[dict]], batch_gt_instances_3d: List[InstanceData], *args, @@ -79,13 +379,72 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], tasks head, and the internal list indicate different FPN level. batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of - gt_instances. It usually includes ``bboxes_3d`` and\ + gt_instances. It usually includes ``bboxes_3d`` and ``labels_3d`` attributes. Returns: dict[str,torch.Tensor]: Loss of heatmap and bbox of each task. """ - pass + heatmaps, anno_boxes, inds, masks, task_gt_bboxes = self.get_targets( + batch_gt_instances_3d) + loss_dict = dict() + for task_id, preds_dict in enumerate(preds_dicts): + # heatmap focal loss + preds_dict[0]['heatmap'] = clip_sigmoid(preds_dict[0]['heatmap']) + num_pos = heatmaps[task_id].eq(1).float().sum().item() + loss_heatmap = self.loss_cls( + preds_dict[0]['heatmap'], + heatmaps[task_id], + avg_factor=max(num_pos, 1)) + target_box = anno_boxes[task_id] + # reconstruct the anno_box from multiple reg heads + preds_dict[0]['anno_box'] = torch.cat( + (preds_dict[0]['reg'], preds_dict[0]['height'], + preds_dict[0]['dim'], preds_dict[0]['rot']), + dim=1) + + # Regression loss for dimension, offset, height, rotation + ind = inds[task_id] + num = masks[task_id].float().sum() + pred = preds_dict[0]['anno_box'].permute(0, 2, 3, 1).contiguous() + pred = pred.view(pred.size(0), -1, pred.size(3)) + pred = self._gather_feat(pred, ind) + mask = masks[task_id].unsqueeze(2).expand_as(target_box).float() + isnotnan = (~torch.isnan(target_box)).float() + mask *= isnotnan + + code_weights = self.train_cfg.get('code_weights', None) + bbox_weights = mask * mask.new_tensor(code_weights) + loss_bbox = self.loss_bbox( + pred, target_box, bbox_weights, avg_factor=(num + 1e-4)) + loss_dict[f'task{task_id}.loss_heatmap'] = loss_heatmap + loss_dict[f'task{task_id}.loss_bbox'] = loss_bbox + + if 'iou' in preds_dict[0]: + batch_box_preds = self._decode_all_preds( + pred_dict=preds_dict[0], + point_cloud_range=self.train_cfg['point_cloud_range'], + voxel_size=self.train_cfg['voxel_size'] + ) # (B, H, W, 7 or 9) + + batch_box_preds_for_iou = batch_box_preds.permute( + 0, 3, 1, 2) # (B, 7 or 9, H, W) + loss_dict[f'task{task_id}.loss_iou'] = self.calc_iou_loss( + iou_preds=preds_dict[0]['iou'], + batch_box_preds=batch_box_preds_for_iou.clone().detach(), + mask=mask.all(dim=-1), + ind=ind, + gt_boxes=task_gt_bboxes[task_id]) + + if self.loss_iou_reg is not None: + loss_dict[f'task{task_id}.loss_reg_iou'] = \ + self.calc_iou_reg_loss( + batch_box_preds=batch_box_preds_for_iou, + mask=mask.all(dim=-1), + ind=ind, + gt_boxes=task_gt_bboxes[task_id]) + + return loss_dict def predict(self, pts_feats: Tuple[torch.Tensor], diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 7c40383ce7..4d697d2dbb 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -3,10 +3,11 @@ import numpy as np import torch import torch.nn as nn +from mmdet.models.losses.utils import weighted_loss from torch import Tensor from mmdet3d.models.task_modules import CenterPointBBoxCoder -from mmdet3d.registry import TASK_UTILS +from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds get_inner_win_inds_cuda = ingroup_inds @@ -298,3 +299,141 @@ def decode(self, 'support post_center_range is not None for now!') return predictions_dicts + + +def center_to_corner2d(center, dim): + corners_norm = torch.tensor( + [[-0.5, -0.5], [-0.5, 0.5], [0.5, 0.5], [0.5, -0.5]], + device=dim.device).type_as(center) # (4, 2) + corners = dim.view([-1, 1, 2]) * corners_norm.view([1, 4, 2]) # (N, 4, 2) + corners = corners + center.view(-1, 1, 2) + return corners + + +@weighted_loss +def diou3d_loss(pred_boxes, gt_boxes, eps: float = 1e-7): + """ + https://github.com/agent-sgs/PillarNet/blob/master/det3d/core/utils/center_utils.py # noqa + Args: + pred_boxes (N, 7): + gt_boxes (N, 7): + + Returns: + _type_: _description_ + """ + assert pred_boxes.shape[0] == gt_boxes.shape[0] + + qcorners = center_to_corner2d(pred_boxes[:, :2], + pred_boxes[:, 3:5]) # (N, 4, 2) + gcorners = center_to_corner2d(gt_boxes[:, :2], gt_boxes[:, + 3:5]) # (N, 4, 2) + + inter_max_xy = torch.minimum(qcorners[:, 2], gcorners[:, 2]) + inter_min_xy = torch.maximum(qcorners[:, 0], gcorners[:, 0]) + out_max_xy = torch.maximum(qcorners[:, 2], gcorners[:, 2]) + out_min_xy = torch.minimum(qcorners[:, 0], gcorners[:, 0]) + + # calculate area + volume_pred_boxes = pred_boxes[:, 3] * pred_boxes[:, 4] * pred_boxes[:, 5] + volume_gt_boxes = gt_boxes[:, 3] * gt_boxes[:, 4] * gt_boxes[:, 5] + + inter_h = torch.minimum( + pred_boxes[:, 2] + 0.5 * pred_boxes[:, 5], + gt_boxes[:, 2] + 0.5 * gt_boxes[:, 5]) - torch.maximum( + pred_boxes[:, 2] - 0.5 * pred_boxes[:, 5], + gt_boxes[:, 2] - 0.5 * gt_boxes[:, 5]) + inter_h = torch.clamp(inter_h, min=0) + + inter = torch.clamp((inter_max_xy - inter_min_xy), min=0) + volume_inter = inter[:, 0] * inter[:, 1] * inter_h + volume_union = volume_gt_boxes + volume_pred_boxes - volume_inter + eps + + # boxes_iou3d_gpu(pred_boxes, gt_boxes) + inter_diag = torch.pow(gt_boxes[:, 0:3] - pred_boxes[:, 0:3], 2).sum(-1) + + outer_h = torch.maximum( + gt_boxes[:, 2] + 0.5 * gt_boxes[:, 5], + pred_boxes[:, 2] + 0.5 * pred_boxes[:, 5]) - torch.minimum( + gt_boxes[:, 2] - 0.5 * gt_boxes[:, 5], + pred_boxes[:, 2] - 0.5 * pred_boxes[:, 5]) + outer_h = torch.clamp(outer_h, min=0) + outer = torch.clamp((out_max_xy - out_min_xy), min=0) + outer_diag = outer[:, 0]**2 + outer[:, 1]**2 + outer_h**2 + eps + + dious = volume_inter / volume_union - inter_diag / outer_diag + dious = torch.clamp(dious, min=-1.0, max=1.0) + + loss = 1 - dious + + return loss + + +@MODELS.register_module() +class DIoU3DLoss(nn.Module): + r"""3D bboxes Implementation of `Distance-IoU Loss: Faster and Better + Learning for Bounding Box Regression https://arxiv.org/abs/1911.08287`_. + + Code is modified from https://github.com/Zzh-tju/DIoU. + + Args: + eps (float): Epsilon to avoid log(0). + reduction (str): Options are "none", "mean" and "sum". + loss_weight (float): Weight of loss. + """ + + def __init__(self, + eps: float = 1e-6, + reduction: str = 'mean', + loss_weight: float = 1.0) -> None: + super().__init__() + self.eps = eps + self.reduction = reduction + self.loss_weight = loss_weight + + def forward(self, + pred: Tensor, + target: Tensor, + weight: Optional[Tensor] = None, + avg_factor: Optional[int] = None, + reduction_override: Optional[str] = None, + **kwargs) -> Tensor: + """Forward function. + + Args: + pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2), + shape (n, 4). + target (Tensor): The learning target of the prediction, + shape (n, 4). + weight (Optional[Tensor], optional): The weight of loss for each + prediction. Defaults to None. + avg_factor (Optional[int], optional): Average factor that is used + to average the loss. Defaults to None. + reduction_override (Optional[str], optional): The reduction method + used to override the original reduction method of the loss. + Defaults to None. Options are "none", "mean" and "sum". + + Returns: + Tensor: Loss tensor. + """ + if weight is not None and not torch.any(weight > 0): + if pred.dim() == weight.dim() + 1: + weight = weight.unsqueeze(1) + return (pred * weight).sum() # 0 + assert reduction_override in (None, 'none', 'mean', 'sum') + reduction = ( + reduction_override if reduction_override else self.reduction) + if weight is not None and weight.dim() > 1: + # TODO: remove this in the future + # reduce the weight of shape (n, 4) to (n,) to match the + # giou_loss of shape (n,) + assert weight.shape == pred.shape + weight = weight.mean(-1) + loss = self.loss_weight * diou3d_loss( + pred, + target, + weight, + eps=self.eps, + reduction=reduction, + avg_factor=avg_factor, + **kwargs) + return loss From 7fecc0f2e5e98507a4c6486cff652ea2fe01541b Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 9 Aug 2023 13:33:27 +0800 Subject: [PATCH 02/25] fix batch_size --- convert_ckpt.py | 34 +++++++++++++++++++ mmdet3d/engine/hooks/visualization_hook.py | 4 +-- mmdet3d/structures/points/base_points.py | 6 ++-- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 20 ++--------- 4 files changed, 42 insertions(+), 22 deletions(-) create mode 100644 convert_ckpt.py diff --git a/convert_ckpt.py b/convert_ckpt.py new file mode 100644 index 0000000000..f5c3c9317e --- /dev/null +++ b/convert_ckpt.py @@ -0,0 +1,34 @@ +# from mmengine.config import Config +# cfg = Config.fromfile(path) +# print(cfg) +import torch + +mm3d_model = torch.load('checkpoints/dsvt_convert.pth') +dsvt_model = dict() +dsvt_model['model_state'] = dict() +for k, v in mm3d_model.items(): + if 'voxel_encoder' in k: + k = k.replace('voxel_encoder', 'vfe') + if 'middle_encoder' in k: + k = k.replace('middle_encoder', 'backbone_3d') + if 'backbone.' in k: + k = k.replace('backbone', 'backbone_2d') + if 'neck' in k: + k = k.replace('neck', 'backbone_2d') + if 'bbox_head.shared_conv' in k: + k = k.replace('bbox_head.shared_conv.conv', 'dense_head.shared_conv.0') + k = k.replace('bbox_head.shared_conv.bn', 'dense_head.shared_conv.1') + if 'bbox_head.task_heads' in k: + k = k.replace('bbox_head.task_heads', 'dense_head.heads_list') + if 'reg' in k: + k = k.replace('reg', 'center') + if 'height' in k: + k = k.replace('height', 'center_z') + if 'heatmap' in k: + k = k.replace('heatmap', 'hm') + if '0.conv' in k: + k = k.replace('0.conv', '0.0') + if '0.bn' in k: + k = k.replace('0.bn', '0.1') + dsvt_model['model_state'][k] = v +torch.save(dsvt_model, 'dsvt_ckpt.pth') diff --git a/mmdet3d/engine/hooks/visualization_hook.py b/mmdet3d/engine/hooks/visualization_hook.py index ffec1addc3..9de46d9692 100644 --- a/mmdet3d/engine/hooks/visualization_hook.py +++ b/mmdet3d/engine/hooks/visualization_hook.py @@ -78,11 +78,11 @@ def __init__(self, 'needs to be excluded.') self.vis_task = vis_task - if wait_time == -1: + if show and wait_time == -1: print_log( 'Manual control mode, press [Right] to next sample.', logger='current') - else: + elif show: print_log( 'Autoplay mode, press [SPACE] to pause.', logger='current') self.wait_time = wait_time diff --git a/mmdet3d/structures/points/base_points.py b/mmdet3d/structures/points/base_points.py index 4cb54ce895..188d20d270 100644 --- a/mmdet3d/structures/points/base_points.py +++ b/mmdet3d/structures/points/base_points.py @@ -247,10 +247,10 @@ def in_range_3d( """ in_range_flags = ((self.tensor[:, 0] > point_range[0]) & (self.tensor[:, 1] > point_range[1]) - & (self.tensor[:, 2] > point_range[2]) + # & (self.tensor[:, 2] > point_range[2]) & (self.tensor[:, 0] < point_range[3]) - & (self.tensor[:, 1] < point_range[4]) - & (self.tensor[:, 2] < point_range[5])) + & (self.tensor[:, 1] < point_range[4])) + # & (self.tensor[:, 2] < point_range[5])) return in_range_flags @property diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index f9288f09e3..0ac5c66c48 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -180,21 +180,7 @@ norm_intensity=True, norm_elongation=True, backend_args=backend_args), - dict( - type='MultiScaleFlipAug3D', - img_scale=(1333, 800), - pts_scale_ratio=1, - flip=False, - transforms=[ - dict( - type='GlobalRotScaleTrans', - rot_range=[0, 0], - scale_ratio_range=[1., 1.], - translation_std=[0, 0, 0]), - dict(type='RandomFlip3D'), - dict( - type='PointsRangeFilter', point_cloud_range=point_cloud_range) - ]), + dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='Pack3DDetInputs', keys=['points']) ] @@ -220,8 +206,8 @@ load_interval=5, backend_args=backend_args)) val_dataloader = dict( - batch_size=4, - num_workers=4, + batch_size=1, + num_workers=1, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), From 232c3b89132327eddb72f9d09a834771593b7031 Mon Sep 17 00:00:00 2001 From: sjh Date: Thu, 10 Aug 2023 14:57:59 +0800 Subject: [PATCH 03/25] chage cam to lidar --- .gitignore | 4 ++ mmdet3d/datasets/waymo_dataset.py | 14 +++-- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 9 +++ tools/create_data.py | 40 ++++++------- tools/dataset_converters/kitti_converter.py | 57 ++++++++++--------- tools/dataset_converters/kitti_data_utils.py | 7 ++- .../dataset_converters/update_infos_to_v2.py | 24 ++++---- tools/dataset_converters/waymo_converter.py | 53 +++++++++-------- 8 files changed, 119 insertions(+), 89 deletions(-) diff --git a/.gitignore b/.gitignore index 27cb9c7cb4..36817b21a6 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,7 @@ data/sunrgbd/OFFICIAL_SUNRGBD/ # Waymo evaluation mmdet3d/evaluation/functional/waymo_utils/compute_detection_metrics_main mmdet3d/evaluation/functional/waymo_utils/compute_detection_let_metrics_main +mmdet3d/evaluation/functional/waymo_utils/compute_segmentation_metrics_main + +check_waymo.py +remove_infos.py diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 5b3a83824e..243630d006 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -5,7 +5,7 @@ import numpy as np from mmdet3d.registry import DATASETS -from mmdet3d.structures import CameraInstance3DBoxes +from mmdet3d.structures import LiDARInstance3DBoxes from .det3d_dataset import Det3DDataset from .kitti_dataset import KittiDataset @@ -165,11 +165,13 @@ def parse_ann_info(self, info: dict) -> dict: # in waymo, lidar2cam = R0_rect @ Tr_velo_to_cam # convert gt_bboxes_3d to velodyne coordinates with `lidar2cam` - lidar2cam = np.array(info['images'][self.default_cam_key]['lidar2cam']) - gt_bboxes_3d = CameraInstance3DBoxes( - ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d, - np.linalg.inv(lidar2cam)) - ann_info['gt_bboxes_3d'] = gt_bboxes_3d + # lidar2cam = np.array(info['images'][self.default_cam_key] + # ['lidar2cam']) + # gt_bboxes_3d = CameraInstance3DBoxes( + # ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d, + # np.linalg.inv(lidar2cam)) + # ann_info['gt_bboxes_3d'] = gt_bboxes_3d + gt_bboxes_3d = LiDARInstance3DBoxes(ann_info['gt_bboxes_3d']) anns_results = dict( gt_bboxes_3d=gt_bboxes_3d, diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index ac7a38b9bc..9132b45cb6 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -5,6 +5,7 @@ voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] +# data_root = 'data/waymo_mini/kitti_format/' data_root = 'data/waymo/kitti_format/' class_names = ['Car', 'Pedestrian', 'Cyclist'] metainfo = dict(classes=class_names) @@ -210,6 +211,14 @@ backend_args=backend_args)) test_dataloader = val_dataloader +# val_evaluator = dict( +# type='WaymoMetric', +# ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', +# waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', +# data_root='./data/waymo_mini/waymo_format', +# backend_args=backend_args, +# convert_kitti_format=False, +# idx2metainfo='./data/waymo_mini/waymo_format/idx2metainfo.pkl') val_evaluator = dict( type='WaymoMetric', ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', diff --git a/tools/create_data.py b/tools/create_data.py index 34356c2a8f..eff511ef4a 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -7,8 +7,10 @@ from tools.dataset_converters import lyft_converter as lyft_converter from tools.dataset_converters import nuscenes_converter as nuscenes_converter from tools.dataset_converters import semantickitti_converter -from tools.dataset_converters.create_gt_database import ( - GTDatabaseCreater, create_groundtruth_database) +# from tools.dataset_converters.create_gt_database import ( +# GTDatabaseCreater, create_groundtruth_database) +from tools.dataset_converters.create_gt_database import \ + create_groundtruth_database from tools.dataset_converters.update_infos_to_v2 import update_pkl_infos @@ -185,9 +187,7 @@ def waymo_data_prep(root_path, """ from tools.dataset_converters import waymo_converter as waymo - splits = [ - 'training', 'validation', 'testing', 'testing_3d_camera_only_detection' - ] + splits = ['training', 'validation'] for i, split in enumerate(splits): load_dir = osp.join(root_path, 'waymo_format', split) if split == 'validation': @@ -203,29 +203,29 @@ def waymo_data_prep(root_path, in ['testing', 'testing_3d_camera_only_detection'])) converter.convert() - from tools.dataset_converters.waymo_converter import \ - create_ImageSets_img_ids - create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) + # from tools.dataset_converters.waymo_converter import \ + # create_ImageSets_img_ids + # create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) # Generate waymo infos out_dir = osp.join(out_dir, 'kitti_format') - kitti.create_waymo_info_file( - out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) + # kitti.create_waymo_info_file( + # out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') info_trainval_path = osp.join(out_dir, f'{info_prefix}_infos_trainval.pkl') - info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') + # info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path) update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path) update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_test_path) - GTDatabaseCreater( - 'WaymoDataset', - out_dir, - info_prefix, - f'{info_prefix}_infos_train.pkl', - relative_path=False, - with_mask=False, - num_worker=workers).create() + # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_test_path) + # GTDatabaseCreater( + # 'WaymoDataset', + # out_dir, + # info_prefix, + # f'{info_prefix}_infos_train.pkl', + # relative_path=False, + # with_mask=False, + # num_worker=workers).create() def semantickitti_data_prep(info_prefix, out_dir): diff --git a/tools/dataset_converters/kitti_converter.py b/tools/dataset_converters/kitti_converter.py index 367cfd7ba9..68a84329ab 100644 --- a/tools/dataset_converters/kitti_converter.py +++ b/tools/dataset_converters/kitti_converter.py @@ -109,8 +109,9 @@ def calculate_single(self, info): return info def calculate(self, infos): - ret_infos = mmengine.track_parallel_progress(self.calculate_single, - infos, self.num_worker) + # ret_infos = mmengine.track_parallel_progress(self.calculate_single, + # infos, self.num_worker) + ret_infos = [self.calculate_single(info) for info in infos] for i, ret_info in enumerate(ret_infos): infos[i] = ret_info @@ -252,7 +253,7 @@ def create_waymo_info_file(data_path, imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) - test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) + # test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) print('Generate info. this may take several minutes.') if save_path is None: @@ -268,40 +269,40 @@ def create_waymo_info_file(data_path, relative_path=relative_path, max_sweeps=max_sweeps, num_worker=workers) - waymo_infos_gatherer_test = WaymoInfoGatherer( - data_path, - training=False, - label_info=False, - velodyne=True, - calib=True, - pose=True, - relative_path=relative_path, - max_sweeps=max_sweeps, - num_worker=workers) - num_points_in_gt_calculater = _NumPointsInGTCalculater( - data_path, - relative_path, - num_features=6, - remove_outside=False, - num_worker=workers) + # waymo_infos_gatherer_test = WaymoInfoGatherer( + # data_path, + # training=False, + # label_info=False, + # velodyne=True, + # calib=True, + # pose=True, + # relative_path=relative_path, + # max_sweeps=max_sweeps, + # num_worker=workers) + # num_points_in_gt_calculater = _NumPointsInGTCalculater( + # data_path, + # relative_path, + # num_features=6, + # remove_outside=False, + # num_worker=workers) waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids) - num_points_in_gt_calculater.calculate(waymo_infos_train) + # num_points_in_gt_calculater.calculate(waymo_infos_train) filename = save_path / f'{pkl_prefix}_infos_train.pkl' print(f'Waymo info train file is saved to {filename}') mmengine.dump(waymo_infos_train, filename) waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids) - num_points_in_gt_calculater.calculate(waymo_infos_val) + # num_points_in_gt_calculater.calculate(waymo_infos_val) filename = save_path / f'{pkl_prefix}_infos_val.pkl' print(f'Waymo info val file is saved to {filename}') mmengine.dump(waymo_infos_val, filename) - filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' - print(f'Waymo info trainval file is saved to {filename}') - mmengine.dump(waymo_infos_train + waymo_infos_val, filename) - waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) - filename = save_path / f'{pkl_prefix}_infos_test.pkl' - print(f'Waymo info test file is saved to {filename}') - mmengine.dump(waymo_infos_test, filename) + # filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' + # print(f'Waymo info trainval file is saved to {filename}') + # mmengine.dump(waymo_infos_train + waymo_infos_val, filename) + # waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) + # filename = save_path / f'{pkl_prefix}_infos_test.pkl' + # print(f'Waymo info test file is saved to {filename}') + # mmengine.dump(waymo_infos_test, filename) def _create_reduced_point_cloud(data_path, diff --git a/tools/dataset_converters/kitti_data_utils.py b/tools/dataset_converters/kitti_data_utils.py index 64c3bc415b..b0e90bd8d3 100644 --- a/tools/dataset_converters/kitti_data_utils.py +++ b/tools/dataset_converters/kitti_data_utils.py @@ -148,10 +148,14 @@ def get_label_anno(label_path): for x in content]).reshape(-1, 3) annotations['rotation_y'] = np.array([float(x[14]) for x in content]).reshape(-1) - if len(content) != 0 and len(content[0]) == 16: # have score + if len(content) != 0 and len(content[0]) >= 16: # have score annotations['score'] = np.array([float(x[15]) for x in content]) else: annotations['score'] = np.zeros((annotations['bbox'].shape[0], )) + # have num_lidar_points_in_box, given in waymo + if len(content) != 0 and len(content[0]) == 17: + annotations['num_points_in_gt'] = np.array( + [int(x[16]) for x in content]) index = list(range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) annotations['group_ids'] = np.arange(num_gt, dtype=np.int32) @@ -552,6 +556,7 @@ def gather(self, image_ids): image_infos = mmengine.track_parallel_progress(self.gather_single, image_ids, self.num_worker) + # image_infos = mmengine.track_progress(self.gather_single, image_ids) return list(image_infos) diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py index a2ddbd0688..42d0f5e8ba 100644 --- a/tools/dataset_converters/update_infos_to_v2.py +++ b/tools/dataset_converters/update_infos_to_v2.py @@ -923,8 +923,8 @@ def update_waymo_infos(pkl_path, out_dir): base_img_path = Path(ori_info_dict['image']['image_path']).name for cam_idx, cam_key in enumerate(camera_types): - temp_data_info['images'][cam_key]['timestamp'] = ori_info_dict[ - 'timestamp'] + # temp_data_info['images'][cam_key]['timestamp'] = ori_info_dict[ + # 'timestamp'] temp_data_info['images'][cam_key]['img_path'] = base_img_path h, w = ori_info_dict['image']['image_shape'] @@ -934,16 +934,16 @@ def update_waymo_infos(pkl_path, out_dir): temp_data_info['images'][camera_types[0]]['width'] = w temp_data_info['lidar_points']['num_pts_feats'] = ori_info_dict[ 'point_cloud']['num_features'] - temp_data_info['lidar_points']['timestamp'] = ori_info_dict[ - 'timestamp'] + # temp_data_info['lidar_points']['timestamp'] = ori_info_dict[ + # 'timestamp'] velo_path = ori_info_dict['point_cloud'].get('velodyne_path') if velo_path is not None: temp_data_info['lidar_points']['lidar_path'] = Path(velo_path).name # TODO discuss the usage of Tr_velo_to_cam in lidar - Trv2c = ori_info_dict['calib']['Tr_velo_to_cam'].astype(np.float32) + # Trv2c = ori_info_dict['calib']['Tr_velo_to_cam'].astype(np.float32) - temp_data_info['lidar_points']['Tr_velo_to_cam'] = Trv2c.tolist() + # temp_data_info['lidar_points']['Tr_velo_to_cam'] = Trv2c.tolist() # for potential usage # temp_data_info['images']['R0_rect'] = ori_info_dict['calib'][ @@ -951,7 +951,8 @@ def update_waymo_infos(pkl_path, out_dir): # for the sweeps part: temp_data_info['timestamp'] = ori_info_dict['timestamp'] - temp_data_info['ego2global'] = ori_info_dict['pose'] + temp_data_info['ego2global'] = ori_info_dict['pose'].astype( + np.float32).tolist() for ori_sweep in ori_info_dict['sweeps']: # lidar sweeps @@ -961,6 +962,8 @@ def update_waymo_infos(pkl_path, out_dir): lidar_sweep['lidar_points']['lidar_path'] = Path( ori_sweep['velodyne_path']).name # image sweeps + # image_sweep = dict() + # image_sweep['sample_idx']= ori_sweep['image']['image_idx'] image_sweep = get_single_image_sweep(camera_types) image_sweep['ego2global'] = ori_sweep['pose'] image_sweep['timestamp'] = ori_sweep['timestamp'] @@ -1059,12 +1062,13 @@ def update_waymo_infos(pkl_path, out_dir): instance_list.append(empty_instance) temp_data_info['cam_sync_instances'] = instance_list - cam_instances = generate_waymo_camera_instances( - ori_info_dict, camera_types) - temp_data_info['cam_instances'] = cam_instances + # cam_instances = generate_waymo_camera_instances( + # ori_info_dict, camera_types) + # temp_data_info['cam_instances'] = cam_instances temp_data_info, _ = clear_data_info_unused_keys(temp_data_info) converted_list.append(temp_data_info) + del data_list pkl_name = Path(pkl_path).name out_path = osp.join(out_dir, pkl_name) print(f'Writing to output file: {out_path}.') diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index 87f9c54b54..edfff10ca3 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -109,6 +109,7 @@ def __init__(self, def convert(self): """Convert action.""" print('Start converting ...') + # mmengine.track_progress(self.convert_one, range(len(self))) mmengine.track_parallel_progress(self.convert_one, range(len(self)), self.workers) print('\nFinished ...') @@ -131,11 +132,11 @@ def convert_one(self, file_idx): not in self.selected_waymo_locations): continue - self.save_image(frame, file_idx, frame_idx) - self.save_calib(frame, file_idx, frame_idx) - self.save_lidar(frame, file_idx, frame_idx) - self.save_pose(frame, file_idx, frame_idx) - self.save_timestamp(frame, file_idx, frame_idx) + # self.save_image(frame, file_idx, frame_idx) + # self.save_calib(frame, file_idx, frame_idx) + # self.save_lidar(frame, file_idx, frame_idx) + # self.save_pose(frame, file_idx, frame_idx) + # self.save_timestamp(frame, file_idx, frame_idx) if not self.test_mode: # TODO save the depth image for waymo challenge solution. @@ -354,16 +355,17 @@ def save_label(self, frame, file_idx, frame_idx, cam_sync=False): width = box3d.width length = box3d.length + # TODO: Discuss if we should keep kitti_format x = box3d.center_x y = box3d.center_y z = box3d.center_z - height / 2 - # project bounding box to the virtual reference frame - pt_ref = self.T_velo_to_front_cam @ \ - np.array([x, y, z, 1]).reshape((4, 1)) - x, y, z, _ = pt_ref.flatten().tolist() + # # project bounding box to the virtual reference frame + # pt_ref = self.T_velo_to_front_cam @ \ + # np.array([x, y, z, 1]).reshape((4, 1)) + # x, y, z, _ = pt_ref.flatten().tolist() - rotation_y = -box3d.heading - np.pi / 2 + rotation_y = box3d.heading track_id = obj.id # not available @@ -373,25 +375,28 @@ def save_label(self, frame, file_idx, frame_idx, cam_sync=False): line = my_type + \ ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format( - round(truncated, 2), occluded, round(alpha, 2), - round(bounding_box[0], 2), round(bounding_box[1], 2), - round(bounding_box[2], 2), round(bounding_box[3], 2), - round(height, 2), round(width, 2), round(length, 2), - round(x, 2), round(y, 2), round(z, 2), - round(rotation_y, 2)) + truncated, occluded, alpha, + bounding_box[0], bounding_box[1], + bounding_box[2], bounding_box[3], + width, height, length, + x, y, z, rotation_y) if self.save_track_id: line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n' else: line_all = line[:-1] + ' ' + name + '\n' - label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \ - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' - if cam_sync: - label_path = label_path.replace('label_', 'cam_sync_label_') - fp_label = open(label_path, 'a') - fp_label.write(line) - fp_label.close() + # Save num_lidar_points_in_box + line_all = line_all[:-1] + ' ' + str( + obj.num_lidar_points_in_box) + '\n' + + # label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \ + # f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' + # if cam_sync: + # label_path = label_path.replace('label_', 'cam_sync_label_') + # fp_label = open(label_path, 'a') + # fp_label.write(line) + # fp_label.close() fp_label_all.write(line_all) @@ -627,6 +632,6 @@ def create_ImageSets_img_ids(root_dir, splits): open(save_dir + 'train.txt', 'w').writelines(idx_all[0]) open(save_dir + 'val.txt', 'w').writelines(idx_all[1]) open(save_dir + 'trainval.txt', 'w').writelines(idx_all[0] + idx_all[1]) - open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) + # open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) # open(save_dir+'test_cam_only.txt','w').writelines(idx_all[3]) print('created txt files indicating what to collect in ', splits) From a81529e5ac1a74e831d67c4293f84493ac1be7a4 Mon Sep 17 00:00:00 2001 From: sjh Date: Mon, 14 Aug 2023 14:34:12 +0800 Subject: [PATCH 04/25] add cam instances --- mmdet3d/datasets/convert_utils.py | 17 +++++++++++------ tools/create_data.py | 4 ++-- tools/create_data.sh | 2 ++ tools/dataset_converters/kitti_converter.py | 6 +++--- tools/dataset_converters/update_infos_to_v2.py | 6 +++--- 5 files changed, 21 insertions(+), 14 deletions(-) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index cb4d97e137..84a5c34fa9 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -10,7 +10,7 @@ from shapely.geometry import MultiPoint, box from shapely.geometry.polygon import Polygon -from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img +from mmdet3d.structures import Box3DMode, LiDARInstance3DBoxes, points_cam2img from mmdet3d.structures.ops import box_np_ops kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck', @@ -318,7 +318,7 @@ def get_kitti_style_2d_boxes(info: dict, def convert_annos(info: dict, cam_idx: int) -> dict: """Convert front-cam anns to i-th camera (KITTI-style info).""" rect = info['calib']['R0_rect'].astype(np.float32) - lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) + # lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32) annos = info['annos'] converted_annos = copy.deepcopy(annos) @@ -328,11 +328,16 @@ def convert_annos(info: dict, cam_idx: int) -> dict: gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) # convert gt_bboxes_3d to velodyne coordinates - gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( - Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True) + # BC-breaking: gt_bboxes_3d is already in lidar coordinates + # TODO: Discuss correcet_yaw if necessary + # gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( + # Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True) # convert gt_bboxes_3d to cam coordinates - gt_bboxes_3d = gt_bboxes_3d.convert_to( - Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() + # gt_bboxes_3d = gt_bboxes_3d.convert_to( + # Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() + gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( + Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True) + converted_annos['location'] = gt_bboxes_3d[:, :3] converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6] converted_annos['rotation_y'] = gt_bboxes_3d[:, 6] diff --git a/tools/create_data.py b/tools/create_data.py index eff511ef4a..4543756193 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -208,8 +208,8 @@ def waymo_data_prep(root_path, # create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) # Generate waymo infos out_dir = osp.join(out_dir, 'kitti_format') - # kitti.create_waymo_info_file( - # out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) + kitti.create_waymo_info_file( + out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') info_trainval_path = osp.join(out_dir, f'{info_prefix}_infos_trainval.pkl') diff --git a/tools/create_data.sh b/tools/create_data.sh index 9a57852f71..9e1fe9c0ce 100755 --- a/tools/create_data.sh +++ b/tools/create_data.sh @@ -6,6 +6,7 @@ export PYTHONPATH=`pwd`:$PYTHONPATH PARTITION=$1 JOB_NAME=$2 DATASET=$3 +WORKERS=$4 GPUS=${GPUS:-1} GPUS_PER_NODE=${GPUS_PER_NODE:-1} SRUN_ARGS=${SRUN_ARGS:-""} @@ -21,4 +22,5 @@ srun -p ${PARTITION} \ python -u tools/create_data.py ${DATASET} \ --root-path ./data/${DATASET} \ --out-dir ./data/${DATASET} \ + --workers ${WORKERS} \ --extra-tag ${DATASET} diff --git a/tools/dataset_converters/kitti_converter.py b/tools/dataset_converters/kitti_converter.py index 68a84329ab..57453c5ddc 100644 --- a/tools/dataset_converters/kitti_converter.py +++ b/tools/dataset_converters/kitti_converter.py @@ -296,9 +296,9 @@ def create_waymo_info_file(data_path, filename = save_path / f'{pkl_prefix}_infos_val.pkl' print(f'Waymo info val file is saved to {filename}') mmengine.dump(waymo_infos_val, filename) - # filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' - # print(f'Waymo info trainval file is saved to {filename}') - # mmengine.dump(waymo_infos_train + waymo_infos_val, filename) + filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' + print(f'Waymo info trainval file is saved to {filename}') + mmengine.dump(waymo_infos_train + waymo_infos_val, filename) # waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) # filename = save_path / f'{pkl_prefix}_infos_test.pkl' # print(f'Waymo info test file is saved to {filename}') diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py index 42d0f5e8ba..ec4831d80e 100644 --- a/tools/dataset_converters/update_infos_to_v2.py +++ b/tools/dataset_converters/update_infos_to_v2.py @@ -1062,9 +1062,9 @@ def update_waymo_infos(pkl_path, out_dir): instance_list.append(empty_instance) temp_data_info['cam_sync_instances'] = instance_list - # cam_instances = generate_waymo_camera_instances( - # ori_info_dict, camera_types) - # temp_data_info['cam_instances'] = cam_instances + cam_instances = generate_waymo_camera_instances( + ori_info_dict, camera_types) + temp_data_info['cam_instances'] = cam_instances temp_data_info, _ = clear_data_info_unused_keys(temp_data_info) converted_list.append(temp_data_info) From 84d9fc787ec35bba9e2bb6b77d1540569bdd74f6 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 16 Aug 2023 16:57:01 +0800 Subject: [PATCH 05/25] add cam_instances --- check_coor.py | 24 +++++++++++++ get_gt_mini.py | 11 ++++++ mmdet3d/datasets/convert_utils.py | 8 +++-- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 30 ++++++++-------- tools/create_data.py | 35 +++++++++++++------ .../dataset_converters/update_infos_to_v2.py | 22 +++++++----- tools/dataset_converters/waymo_converter.py | 8 ++--- 7 files changed, 97 insertions(+), 41 deletions(-) create mode 100644 check_coor.py create mode 100644 get_gt_mini.py diff --git a/check_coor.py b/check_coor.py new file mode 100644 index 0000000000..ed1df35a33 --- /dev/null +++ b/check_coor.py @@ -0,0 +1,24 @@ +import mmengine +import numpy as np + +from mmdet3d.structures import (Box3DMode, CameraInstance3DBoxes, + LiDARInstance3DBoxes) + +val_infos = mmengine.load('data/waymo_mini/kitti_format/waymo_infos_val.pkl') +old_val_infos = mmengine.load( + 'data/waymo_mini/kitti_format/old_pkl/waymo_infos_val.pkl') +instance = np.array( + val_infos['data_list'][0]['cam_sync_instances'][0]['bbox_3d'])[np.newaxis, + ...] +instance = LiDARInstance3DBoxes(instance) +lidar2cam = np.array( + val_infos['data_list'][0]['images']['CAM_FRONT']['lidar2cam']) +cam_instance = instance.convert_to(Box3DMode.CAM, lidar2cam) + +old_instance = np.array(old_val_infos['data_list'][0]['cam_sync_instances'][0] + ['bbox_3d'])[np.newaxis, ...] + +old_lidar2cam = np.array( + old_val_infos['data_list'][0]['images']['CAM_FRONT']['lidar2cam']) +old_instance = CameraInstance3DBoxes(old_instance) +pass diff --git a/get_gt_mini.py b/get_gt_mini.py new file mode 100644 index 0000000000..e300324ed4 --- /dev/null +++ b/get_gt_mini.py @@ -0,0 +1,11 @@ +from waymo_open_dataset.protos.metrics_pb2 import Objects + +objects = Objects() +with open('./data/waymo_mini/waymo_format/gt.bin', 'rb') as f: + objects.ParseFromString(bytearray(f.read())) +new = Objects() +for obj in objects.objects: + if obj.context_name == '10203656353524179475_7625_000_7645_000': + new.objects.append(obj) +with open('./data/waymo_mini/waymo_format/gt_mini.bin', 'wb') as f: + f.write(new.SerializeToString()) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index 84a5c34fa9..9d65369593 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -319,7 +319,11 @@ def convert_annos(info: dict, cam_idx: int) -> dict: """Convert front-cam anns to i-th camera (KITTI-style info).""" rect = info['calib']['R0_rect'].astype(np.float32) # lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) - lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32) + if cam_idx == 0: + lidar2cami = info['calib']['Tr_velo_to_cam'].astype(np.float32) + else: + lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype( + np.float32) annos = info['annos'] converted_annos = copy.deepcopy(annos) loc = annos['location'] @@ -336,7 +340,7 @@ def convert_annos(info: dict, cam_idx: int) -> dict: # gt_bboxes_3d = gt_bboxes_3d.convert_to( # Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( - Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True) + Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() converted_annos['location'] = gt_bboxes_3d[:, :3] converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6] diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 9132b45cb6..1027942d9b 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -5,8 +5,8 @@ voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] -# data_root = 'data/waymo_mini/kitti_format/' -data_root = 'data/waymo/kitti_format/' +data_root = 'data/waymo_mini/kitti_format/' +# data_root = 'data/waymo/kitti_format/' class_names = ['Car', 'Pedestrian', 'Cyclist'] metainfo = dict(classes=class_names) input_modality = dict(use_lidar=True, use_camera=False) @@ -193,7 +193,7 @@ dataset_type = 'WaymoDataset' val_dataloader = dict( - batch_size=4, + batch_size=1, num_workers=4, persistent_workers=True, drop_last=False, @@ -211,22 +211,22 @@ backend_args=backend_args)) test_dataloader = val_dataloader -# val_evaluator = dict( -# type='WaymoMetric', -# ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', -# waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', -# data_root='./data/waymo_mini/waymo_format', -# backend_args=backend_args, -# convert_kitti_format=False, -# idx2metainfo='./data/waymo_mini/waymo_format/idx2metainfo.pkl') val_evaluator = dict( type='WaymoMetric', - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', - waymo_bin_file='./data/waymo/waymo_format/gt.bin', - data_root='./data/waymo/waymo_format', + ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', + waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', + data_root='./data/waymo_mini/waymo_format', backend_args=backend_args, convert_kitti_format=False, - idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') + idx2metainfo='./data/waymo_mini/waymo_format/idx2metainfo.pkl') +# val_evaluator = dict( +# type='WaymoMetric', +# ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', +# waymo_bin_file='./data/waymo/waymo_format/gt.bin', +# data_root='./data/waymo/waymo_format', +# backend_args=backend_args, +# convert_kitti_format=False, +# idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend')] diff --git a/tools/create_data.py b/tools/create_data.py index 4543756193..5495d9700a 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -173,7 +173,7 @@ def waymo_data_prep(root_path, version, out_dir, workers, - max_sweeps=5): + max_sweeps=10): """Prepare the info file for waymo dataset. Args: @@ -275,11 +275,16 @@ def semantickitti_data_prep(info_prefix, out_dir): '--only-gt-database', action='store_true', help='Whether to only generate ground truth database.') +parser.add_argument( + '--skip-extract-kitti-format', + action='store_true', + help='''Whether to skip extracting kitti format image and point cloud. + Only used when dataset is waymo!''') args = parser.parse_args() if __name__ == '__main__': - from mmdet3d.utils import register_all_modules - register_all_modules() + from mmengine.registry import init_default_scope + init_default_scope('mmdet3d') if args.dataset == 'kitti': if args.only_gt_database: @@ -334,6 +339,22 @@ def semantickitti_data_prep(info_prefix, out_dir): dataset_name='NuScenesDataset', out_dir=args.out_dir, max_sweeps=args.max_sweeps) + elif args.dataset == 'waymo' and args.version == 'v1.4': + waymo_data_prep( + root_path=args.root_path, + info_prefix=args.extra_tag, + version=args.version, + out_dir=args.out_dir, + workers=args.workers, + max_sweeps=args.max_sweeps) + elif args.dataset == 'waymo' and args.version == 'v1.4-mini': + waymo_data_prep( + root_path=args.root_path, + info_prefix=args.extra_tag, + version=args.version, + out_dir=args.out_dir, + workers=args.workers, + max_sweeps=args.max_sweeps) elif args.dataset == 'lyft': train_version = f'{args.version}-train' lyft_data_prep( @@ -347,14 +368,6 @@ def semantickitti_data_prep(info_prefix, out_dir): info_prefix=args.extra_tag, version=test_version, max_sweeps=args.max_sweeps) - elif args.dataset == 'waymo': - waymo_data_prep( - root_path=args.root_path, - info_prefix=args.extra_tag, - version=args.version, - out_dir=args.out_dir, - workers=args.workers, - max_sweeps=args.max_sweeps) elif args.dataset == 'scannet': scannet_data_prep( root_path=args.root_path, diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py index ec4831d80e..a779d6b6c7 100644 --- a/tools/dataset_converters/update_infos_to_v2.py +++ b/tools/dataset_converters/update_infos_to_v2.py @@ -961,18 +961,22 @@ def update_waymo_infos(pkl_path, out_dir): lidar_sweep['timestamp'] = ori_sweep['timestamp'] lidar_sweep['lidar_points']['lidar_path'] = Path( ori_sweep['velodyne_path']).name + # delete the num_pts_feats and lidar2ego + del lidar_sweep['lidar_points']['num_pts_feats'] + del lidar_sweep['lidar_points']['lidar2ego'] + # TODO: refactor image_sweeps info when we really need it. # image sweeps # image_sweep = dict() # image_sweep['sample_idx']= ori_sweep['image']['image_idx'] - image_sweep = get_single_image_sweep(camera_types) - image_sweep['ego2global'] = ori_sweep['pose'] - image_sweep['timestamp'] = ori_sweep['timestamp'] - img_path = Path(ori_sweep['image_path']).name - for cam_idx, cam_key in enumerate(camera_types): - image_sweep['images'][cam_key]['img_path'] = img_path + # image_sweep = get_single_image_sweep(camera_types) + # image_sweep['ego2global'] = ori_sweep['pose'] + # image_sweep['timestamp'] = ori_sweep['timestamp'] + # img_path = Path(ori_sweep['image_path']).name + # for cam_idx, cam_key in enumerate(camera_types): + # image_sweep['images'][cam_key]['img_path'] = img_path temp_data_info['lidar_sweeps'].append(lidar_sweep) - temp_data_info['image_sweeps'].append(image_sweep) + # temp_data_info['image_sweeps'].append(image_sweep) anns = ori_info_dict.get('annos', None) ignore_class_name = set() @@ -1107,8 +1111,8 @@ def generate_waymo_camera_instances(ori_info_dict, cam_keys): for cam_idx, cam_key in enumerate(cam_keys): annos = copy.deepcopy(ori_info_dict['cam_sync_annos']) - if cam_idx != 0: - annos = convert_annos(ori_info_dict, cam_idx) + + annos = convert_annos(ori_info_dict, cam_idx) ann_infos = get_kitti_style_2d_boxes( ori_info_dict, cam_idx, occluded=[0], annos=annos, dataset='waymo') diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index edfff10ca3..c20db23685 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -92,7 +92,7 @@ def __init__(self, self.tfrecord_pathnames = sorted( glob(join(self.load_dir, '*.tfrecord'))) - self.label_save_dir = f'{self.save_dir}/label_' + # self.label_save_dir = f'{self.save_dir}/label_' self.label_all_save_dir = f'{self.save_dir}/label_all' self.image_save_dir = f'{self.save_dir}/image_' self.calib_save_dir = f'{self.save_dir}/calib' @@ -100,7 +100,7 @@ def __init__(self, self.pose_save_dir = f'{self.save_dir}/pose' self.timestamp_save_dir = f'{self.save_dir}/timestamp' if self.save_cam_sync_labels: - self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_' + # self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_' self.cam_sync_label_all_save_dir = \ f'{self.save_dir}/cam_sync_label_all' @@ -450,10 +450,10 @@ def create_folder(self): self.pose_save_dir, self.timestamp_save_dir, ] - dir_list2 = [self.label_save_dir, self.image_save_dir] + dir_list2 = [self.image_save_dir] if self.save_cam_sync_labels: dir_list1.append(self.cam_sync_label_all_save_dir) - dir_list2.append(self.cam_sync_label_save_dir) + # dir_list2.append(self.cam_sync_label_save_dir) else: dir_list1 = [ self.calib_save_dir, self.pose_save_dir, From b7b3b62a47b835e893d5c2226607ea17ce8ddd7f Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 16 Aug 2023 16:59:38 +0800 Subject: [PATCH 06/25] add description for skip --- tools/create_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/create_data.py b/tools/create_data.py index 5495d9700a..3d07cb6169 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -279,7 +279,7 @@ def semantickitti_data_prep(info_prefix, out_dir): '--skip-extract-kitti-format', action='store_true', help='''Whether to skip extracting kitti format image and point cloud. - Only used when dataset is waymo!''') + Only used when dataset is waymo!''') args = parser.parse_args() if __name__ == '__main__': From 1995b5a9c736d5bf02f800c0f29d68e861bfc131 Mon Sep 17 00:00:00 2001 From: sjh Date: Fri, 18 Aug 2023 16:35:32 +0800 Subject: [PATCH 07/25] fix num_ins_per_cat --- mmdet3d/datasets/det3d_dataset.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/mmdet3d/datasets/det3d_dataset.py b/mmdet3d/datasets/det3d_dataset.py index 11caae4729..8e1570e794 100644 --- a/mmdet3d/datasets/det3d_dataset.py +++ b/mmdet3d/datasets/det3d_dataset.py @@ -113,7 +113,7 @@ def __init__(self, ori_label = self.METAINFO['classes'].index(name) self.label_mapping[ori_label] = label_idx - self.num_ins_per_cat = {name: 0 for name in metainfo['classes']} + self.num_ins_per_cat = [0] * len(metainfo['classes']) else: self.label_mapping = { i: i @@ -121,10 +121,7 @@ def __init__(self, } self.label_mapping[-1] = -1 - self.num_ins_per_cat = { - name: 0 - for name in self.METAINFO['classes'] - } + self.num_ins_per_cat = [0] * len(self.METAINFO['classes']) super().__init__( ann_file=ann_file, @@ -148,7 +145,8 @@ def __init__(self, print_log('-' * 30, 'current') print_log(f'The length of the dataset: {len(self)}', 'current') content_show = [['category', 'number']] - for cat_name, num in self.num_ins_per_cat.items(): + for label, num in enumerate(self.num_ins_per_cat): + cat_name = self.metainfo['classes'][label] content_show.append([cat_name, num]) table = AsciiTable(content_show) print_log( @@ -256,8 +254,7 @@ def parse_ann_info(self, info: dict) -> Union[dict, None]: for label in ann_info['gt_labels_3d']: if label != -1: - cat_name = self.metainfo['classes'][label] - self.num_ins_per_cat[cat_name] += 1 + self.num_ins_per_cat[label] += 1 return ann_info From a9c7cd3cd85b68d3448380ecdeb13ebe0fb85bb5 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 21 Aug 2023 23:13:15 +0800 Subject: [PATCH 08/25] refactor waymo create --- mp.py | 43 +++ tools/create_data.py | 63 ++-- tools/dataset_converters/waymo_converter.py | 388 ++++++++++++++++---- 3 files changed, 394 insertions(+), 100 deletions(-) create mode 100644 mp.py diff --git a/mp.py b/mp.py new file mode 100644 index 0000000000..3262dd5eac --- /dev/null +++ b/mp.py @@ -0,0 +1,43 @@ +import multiprocessing +import time + + +class A(object): + + def __init__(self): + self.a = [] + self.b = None + + def get_num_a(self, num): + time.sleep(3) + self.a = [0] + self.get_num_b(num) + print(self.a) + return self.a + + def get_num_b(self, num): + self.a.append(num) + + def sum(self): + print(self.a) + + def run(self): + # p1 = multiprocessing.Process(target=self.get_num_a) + # p2 = multiprocessing.Process(target=self.get_num_b) + # p1.start() + # p2.start() + # p1.join() + # p2.join() + p1 = multiprocessing.Pool(2) + list_a = p1.map(self.get_num_a, [1, 2]) + self.sum() + print(list_a) + + +if __name__ == '__main__': + + t1 = time.time() + a = A() + a.run() + t2 = time.time() + print('cost time :{}'.format(t2 - t1)) diff --git a/tools/create_data.py b/tools/create_data.py index 3d07cb6169..aa0288ae93 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -7,10 +7,8 @@ from tools.dataset_converters import lyft_converter as lyft_converter from tools.dataset_converters import nuscenes_converter as nuscenes_converter from tools.dataset_converters import semantickitti_converter -# from tools.dataset_converters.create_gt_database import ( -# GTDatabaseCreater, create_groundtruth_database) -from tools.dataset_converters.create_gt_database import \ - create_groundtruth_database +from tools.dataset_converters.create_gt_database import ( + GTDatabaseCreater, create_groundtruth_database) from tools.dataset_converters.update_infos_to_v2 import update_pkl_infos @@ -174,7 +172,13 @@ def waymo_data_prep(root_path, out_dir, workers, max_sweeps=10): - """Prepare the info file for waymo dataset. + """Prepare waymo dataset. There are 3 steps as follows: Step 1. Extract + camera images and lidar point clouds from waymo raw data in '*.tfreord' and + save as kitti format. Step 2. Generate waymo train/val/test infos and save + as pickle file. Step 3. Generate waymo ground truth database (point clouds + within each 3D bounding box) for data augmentation in training. Steps 1 and + 2 will be done in Waymo2KITTI, and step 3 will be done in + GTDatabaseCreater. Args: root_path (str): Path of dataset root. @@ -184,10 +188,12 @@ def waymo_data_prep(root_path, max_sweeps (int, optional): Number of input consecutive frames. Default: 5. Here we store pose information of these frames for later use. - """ + """ # noqa: E501 from tools.dataset_converters import waymo_converter as waymo - splits = ['training', 'validation'] + splits = [ + 'training', 'validation', 'testing', 'testing_3d_camera_only_detection' + ] for i, split in enumerate(splits): load_dir = osp.join(root_path, 'waymo_format', split) if split == 'validation': @@ -200,32 +206,33 @@ def waymo_data_prep(root_path, prefix=str(i), workers=workers, test_mode=(split - in ['testing', 'testing_3d_camera_only_detection'])) + in ['testing', 'testing_3d_camera_only_detection']), + info_prefix=info_prefix) converter.convert() - # from tools.dataset_converters.waymo_converter import \ - # create_ImageSets_img_ids - # create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) + from tools.dataset_converters.waymo_converter import \ + create_ImageSets_img_ids + create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) # Generate waymo infos - out_dir = osp.join(out_dir, 'kitti_format') - kitti.create_waymo_info_file( - out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) - info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') - info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') - info_trainval_path = osp.join(out_dir, f'{info_prefix}_infos_trainval.pkl') + # out_dir = osp.join(out_dir, 'kitti_format') + # kitti.create_waymo_info_file( + # out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) + # info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') + # info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') + # info_trainval_path = osp.join(out_d, f'{info_prefix}_infos_trainval.pkl') # info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path) - update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path) + # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path) + # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path) + # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path) # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_test_path) - # GTDatabaseCreater( - # 'WaymoDataset', - # out_dir, - # info_prefix, - # f'{info_prefix}_infos_train.pkl', - # relative_path=False, - # with_mask=False, - # num_worker=workers).create() + GTDatabaseCreater( + 'WaymoDataset', + out_dir, + info_prefix, + f'{info_prefix}_infos_train.pkl', + relative_path=False, + with_mask=False, + num_worker=workers).create() def semantickitti_data_prep(info_prefix, out_dir): diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index c20db23685..e269274540 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -9,23 +9,30 @@ raise ImportError('Please run "pip install waymo-open-dataset-tf-2-6-0" ' '>1.4.5 to install the official devkit first.') +import copy import os +import os.path as osp from glob import glob from os.path import exists, join import mmengine import numpy as np import tensorflow as tf +from nuscenes.utils.geometry_utils import view_points +from PIL import Image from waymo_open_dataset.utils import range_image_utils, transform_utils from waymo_open_dataset.utils.frame_utils import \ parse_range_image_and_camera_projection +from mmdet3d.datasets.convert_utils import post_process_coords +from mmdet3d.structures import Box3DMode, LiDARInstance3DBoxes, points_cam2img -class Waymo2KITTI(object): - """Waymo to KITTI converter. - This class serves as the converter to change the waymo raw data to KITTI - format. +class Waymo2KITTI(object): + """Waymo to KITTI converter.There are 2 steps as follows: Step 1. Extract + camera images and lidar point clouds from waymo raw data in '*.tfreord' and + save as kitti format. Step 2. Generate waymo train/val/test infos and save + as pickle file. Args: load_dir (str): Directory to load waymo raw data. @@ -36,8 +43,10 @@ class Waymo2KITTI(object): Defaults to 64. test_mode (bool, optional): Whether in the test_mode. Defaults to False. - save_cam_sync_labels (bool, optional): Whether to save cam sync labels. - Defaults to True. + save_cam_sync_instances (bool, optional): Whether to save cam sync + instances. Defaults to True. + save_cam_instances (bool, optional): Whether to save cam instances. + Defaults to False. """ def __init__(self, @@ -46,18 +55,9 @@ def __init__(self, prefix, workers=64, test_mode=False, - save_cam_sync_labels=True): - self.filter_empty_3dboxes = True - self.filter_no_label_zone_points = True - - self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST'] - - # Only data collected in specific locations will be converted - # If set None, this filter is disabled - # Available options: location_sf (main dataset) - self.selected_waymo_locations = None - self.save_track_id = False - + save_cam_sync_instances=True, + save_cam_instances=False, + info_prefix='waymo'): # turn on eager execution for older tensorflow versions if int(tf.__version__.split('.')[0]) < 2: tf.enable_eager_execution() @@ -74,55 +74,82 @@ def __init__(self, self.type_list = [ 'UNKNOWN', 'VEHICLE', 'PEDESTRIAN', 'SIGN', 'CYCLIST' ] - self.waymo_to_kitti_class_map = { - 'UNKNOWN': 'DontCare', - 'PEDESTRIAN': 'Pedestrian', - 'VEHICLE': 'Car', - 'CYCLIST': 'Cyclist', - 'SIGN': 'Sign' # not in kitti - } + + # MMDetection3D unified camera keys & class names + self.camera_types = [ + 'CAM_FRONT', + 'CAM_FRONT_LEFT', + 'CAM_FRONT_RIGHT', + 'CAM_SIDE_LEFT', + 'CAM_SIDE_RIGHT', + ] + self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST'] self.load_dir = load_dir self.save_dir = save_dir self.prefix = prefix self.workers = int(workers) self.test_mode = test_mode - self.save_cam_sync_labels = save_cam_sync_labels + self.save_cam_sync_instances = save_cam_sync_instances + self.save_cam_instances = save_cam_instances + + # TODO: Discuss filter_empty_3dboxes and filter_no_label_zone_points + self.filter_empty_3dboxes = True + self.filter_no_label_zone_points = True + self.save_track_id = False self.tfrecord_pathnames = sorted( glob(join(self.load_dir, '*.tfrecord'))) - # self.label_save_dir = f'{self.save_dir}/label_' - self.label_all_save_dir = f'{self.save_dir}/label_all' self.image_save_dir = f'{self.save_dir}/image_' - self.calib_save_dir = f'{self.save_dir}/calib' self.point_cloud_save_dir = f'{self.save_dir}/velodyne' - self.pose_save_dir = f'{self.save_dir}/pose' - self.timestamp_save_dir = f'{self.save_dir}/timestamp' - if self.save_cam_sync_labels: - # self.cam_sync_label_save_dir = f'{self.save_dir}/cam_sync_label_' - self.cam_sync_label_all_save_dir = \ - f'{self.save_dir}/cam_sync_label_all' - self.create_folder() + # Create folder for saving KITTI format camera images and + # lidar point clouds. + if 'testing_3d_camera_only_detection' not in self.load_dir: + mmengine.mkdir_or_exist(self.point_cloud_save_dir) + for i in range(5): + mmengine.mkdir_or_exist(f'{self.image_save_dir}{str(i)}') def convert(self): """Convert action.""" print('Start converting ...') # mmengine.track_progress(self.convert_one, range(len(self))) - mmengine.track_parallel_progress(self.convert_one, range(len(self)), - self.workers) + data_infos = mmengine.track_parallel_progress(self.convert_one, + range(len(self)), + self.workers) + data_list = [] + for data_info in data_infos: + data_list.extend(data_info) + metainfo = dict() + metainfo['dataset'] = 'waymo' + metainfo['version'] = '1.4' + metainfo['info_version'] = '1.1' + waymo_infos = dict(data_infos=data_list, metainfo=metainfo) + filenames = osp.join( + osp.dirname(self.save_dir), + f'{self.prefix}_infos_f{osp.basename(self.save_dir)}.pkl') + mmengine.dump(waymo_infos, filenames) print('\nFinished ...') def convert_one(self, file_idx): - """Convert action for single file. + """Convert one '*.tfrecord' file to kitti format. Each file stores all + the frames (about 200 frames) in current scene. We treat each frame as + a sample, save their images and point clouds in kitti format, and then + create info for all frames. Args: file_idx (int): Index of the file to be converted. + + Returns: + file_infos (list): Waymo infos for all frames in current file. """ pathname = self.tfrecord_pathnames[file_idx] dataset = tf.data.TFRecordDataset(pathname, compression_type='') + # NOTE: file_infos is not shared between processes, only stores frame + # infos within the current file. + self.file_infos = [] for frame_idx, data in enumerate(dataset): frame = dataset_pb2.Frame() @@ -132,14 +159,15 @@ def convert_one(self, file_idx): not in self.selected_waymo_locations): continue - # self.save_image(frame, file_idx, frame_idx) - # self.save_calib(frame, file_idx, frame_idx) - # self.save_lidar(frame, file_idx, frame_idx) - # self.save_pose(frame, file_idx, frame_idx) - # self.save_timestamp(frame, file_idx, frame_idx) + # Step 1. + self.save_image(frame, file_idx, frame_idx) + self.save_lidar(frame, file_idx, frame_idx) + # Step 2. + # TODO save the depth image for waymo challenge solution. + self.create_waymo_info_file(frame, file_idx, frame_idx) if not self.test_mode: - # TODO save the depth image for waymo challenge solution. + self.save_label(frame, file_idx, frame_idx) if self.save_cam_sync_labels: self.save_label(frame, file_idx, frame_idx, cam_sync=True) @@ -441,33 +469,6 @@ def save_timestamp(self, frame, file_idx, frame_idx): 'w') as f: f.write(str(frame.timestamp_micros)) - def create_folder(self): - """Create folder for data preprocessing.""" - if not self.test_mode: - dir_list1 = [ - self.label_all_save_dir, - self.calib_save_dir, - self.pose_save_dir, - self.timestamp_save_dir, - ] - dir_list2 = [self.image_save_dir] - if self.save_cam_sync_labels: - dir_list1.append(self.cam_sync_label_all_save_dir) - # dir_list2.append(self.cam_sync_label_save_dir) - else: - dir_list1 = [ - self.calib_save_dir, self.pose_save_dir, - self.timestamp_save_dir - ] - dir_list2 = [self.image_save_dir] - if 'testing_3d_camera_only_detection' not in self.load_dir: - dir_list1.append(self.point_cloud_save_dir) - for d in dir_list1: - mmengine.mkdir_or_exist(d) - for d in dir_list2: - for i in range(5): - mmengine.mkdir_or_exist(f'{d}{str(i)}') - def convert_range_image_to_point_cloud(self, frame, range_images, @@ -609,6 +610,249 @@ def cart_to_homo(self, mat): raise ValueError(mat.shape) return ret + def create_waymo_info_file(self, frame, file_idx, frame_idx): + frame_infos = dict() + + # Gather frame infos + sample_idx = int( + f'{self.prefix}{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}') + frame_infos['sample_idx'] = sample_idx + frame_infos['timestamp'] = frame.timestamp_micros + frame_infos['ego2global'] = np.array(frame.pose.transform).reshape( + 4, 4).astype(np.float32).tolist() + frame_infos['context_name'] = frame.context_name + + # Gather camera infos + frame_infos['images'] = dict() + # waymo front camera to kitti reference camera + T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0], + [1.0, 0.0, 0.0]]) + camera_calibs = [] + Tr_velo_to_cams = [] + for camera in frame.context.camera_calibrations: + # extrinsic parameters + T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape( + 4, 4) + T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle) + Tr_velo_to_cam = \ + self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam + Tr_velo_to_cams.append(Tr_velo_to_cam) + + # intrinsic parameters + camera_calib = np.zeros((3, 4)) + camera_calib[0, 0] = camera.intrinsic[0] + camera_calib[1, 1] = camera.intrinsic[1] + camera_calib[0, 2] = camera.intrinsic[2] + camera_calib[1, 2] = camera.intrinsic[3] + camera_calib[2, 2] = 1 + camera_calib = list(camera_calib.reshape(12)) + camera_calib = [f'{i:e}' for i in camera_calib] + camera_calibs.append(camera_calib) + + for cam_key, camera_calib, Tr_velo_to_cam, img in zip( + self.camera_types, camera_calibs, Tr_velo_to_cams, + frame.images): + cam_infos = dict() + cam_infos['img_path'] = str(sample_idx) + '.jpg' + width, height = Image.open(img).size + cam_infos['height'] = height + cam_infos['width'] = width + cam_infos['lidar2cam'] = Tr_velo_to_cam.astype(np.float32).tolist() + cam_infos['cam2img'] = camera_calib.astype(np.float32).tolist() + cam_infos['lidar2img'] = (camera_calib @ Tr_velo_to_cam).astype( + np.float32).tolist() + frame_infos['images'][cam_key] = cam_infos + + # Gather lidar infos + lidar_infos = dict() + lidar_infos['lidar_path'] = str(sample_idx) + '.bin' + lidar_infos['num_pts_feats'] = 6 + frame_infos['lidar_points'] = lidar_infos + + # Gather lidar sweeps and camera sweeps infos + # TODO: Add lidar2img in image sweeps infos when we need it. + # TODO: Consider merging lidar sweeps infos and image sweeps infos. + lidar_sweeps_infos, image_sweeps_infos = [], [] + while len(lidar_sweeps_infos) < self.max_sweeps: + prev_lidar_infos = dict() + prev_image_infos = dict() + if frame_idx - len(lidar_sweeps_infos) - 1 >= 0: + prev_frame_infos = self.file_infos[-len(lidar_sweeps_infos) - + 1] + prev_lidar_infos['timestamp'] = prev_frame_infos['timestamp'] + prev_lidar_infos['ego2global'] = prev_frame_infos['ego2global'] + lidar_path = prev_frame_infos['lidar_points']['lidar_path'] + prev_lidar_infos['lidar_path'] = lidar_path + lidar_sweeps_infos.append(prev_lidar_infos) + + prev_image_infos['timestamp'] = prev_frame_infos['timestamp'] + prev_image_infos['ego2global'] = prev_frame_infos['ego2global'] + prev_image_infos['images'] = dict() + for cam_key in self.camera_types: + prev_image_infos['images'][cam_key] = dict() + img_path = prev_frame_infos['images'][cam_key]['img_path'] + prev_image_infos['images'][cam_key]['img_path'] = img_path + image_sweeps_infos.append(prev_image_infos[cam_key]) + frame_infos['lidar_sweeps'] = lidar_sweeps_infos + frame_infos['images_sweeps'] = image_sweeps_infos + + if not self.test_mode: + # Gather instances infos which is used for lidar-based 3D detection + frame_infos['instances'] = self.gather_instance_info(frame) + # Gather cam_sync_instances infos which is used for image-based + # (multi-view) 3D detection. + if self.save_cam_sync_instances: + frame_infos['cam_sync_instances'] = self.gather_instance_info( + frame, cam_sync=True) + # Gather cam_instances infos which is used for image-based + # (monocular) 3D detection (optional). + if self.save_cam_instances: + frame_infos['cam_instances'] = self.gather_cam_instance_info( + copy.deepcopy(frame_infos['cam_sync_instances']), + frame_infos['images']) + + def gather_instance_info(self, frame, cam_sync=False): + id_to_bbox = dict() + id_to_name = dict() + for labels in frame.projected_lidar_labels: + name = labels.name + for label in labels.labels: + # TODO: need a workaround as bbox may not belong to front cam + bbox = [ + label.box.center_x - label.box.length / 2, + label.box.center_y - label.box.width / 2, + label.box.center_x + label.box.length / 2, + label.box.center_y + label.box.width / 2 + ] + id_to_bbox[label.id] = bbox + id_to_name[label.id] = name - 1 + + group_id = 0 + instance_infos = [] + for obj in frame.laser_labels: + instance_info = dict() + bounding_box = None + name = None + id = obj.id + for proj_cam in self.cam_list: + if id + proj_cam in id_to_bbox: + bounding_box = id_to_bbox.get(id + proj_cam) + name = id_to_name.get(id + proj_cam) + break + + # NOTE: the 2D labels do not have strict correspondence with + # the projected 2D lidar labels + # e.g.: the projected 2D labels can be in camera 2 + # while the most_visible_camera can have id 4 + if cam_sync: + if obj.most_visible_camera_name: + name = self.cam_list.index( + f'_{obj.most_visible_camera_name}') + box3d = obj.camera_synced_box + else: + continue + else: + box3d = obj.box + + if bounding_box is None or name is None: + name = 0 + bounding_box = [0.0, 0.0, 0.0, 0.0] + + my_type = self.type_list[obj.type] + + if my_type not in self.selected_waymo_classes: + continue + + if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1: + continue + + group_id += 1 + instance_info['group_id'] = group_id + instance_info['camera_id'] = name + instance_info['bbox'] = bounding_box + instance_info['bbox_label'] = obj.type - 1 + + height = box3d.height + width = box3d.width + length = box3d.length + + # NOTE: We save the bottom center of 3D bboxes. + x = box3d.center_x + y = box3d.center_y + z = box3d.center_z - height / 2 + + rotation_y = box3d.heading + + instance_info['bbox3d'] = np.array( + [x, y, z, length, width, height, + rotation_y]).astype(np.float32).tolist() + instance_info['bbox_label_3d'] = obj.type - 1 + instance_info['num_lidar_pts'] = obj.num_lidar_points_in_box + + if self.save_track_id: + instance_info['track_id'] = obj.id + instance_infos.append(instance_info) + return instance_infos + + def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): + + cam_instances = dict() + for cam_type in self.camera_types: + lidar2cam = images[cam_type]['lidar2cam'] + cam2img = images[cam_type]['cam2img'] + cam_instances[cam_type] = [] + for instance in cam_sync_instances: + cam_instance = dict() + gt_bboxes_3d = instance['bbox3d'] + # Convert lidar coordinates to camera coordinates + gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( + Box3DMode.CAM, lidar2cam, correct_yaw=True).numpy() + corners_3d = gt_bboxes_3d.corners + corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8) + in_camera = np.argwhere(corners_3d[2, :] > 0).flatten() + corners_3d = corners_3d[:, in_camera] + # Project 3d box to 2d. + corner_coords = view_points(corners_3d, cam2img, + True).T[:, :2].tolist() + + # Keep only corners that fall within the image. + final_coords = post_process_coords( + corner_coords, + imsize=(images[cam_type]['weight'], + images[cam_type]['height'])) + + # Skip if the convex hull of the re-projected corners + # does not intersect the image canvas. + if final_coords is None: + continue + else: + min_x, min_y, max_x, max_y = final_coords + + cam_instance['bbox'] = [min_x, min_y, max_x, max_y] + cam_instance['bbox_label'] = instance['bbox_label'] + cam_instance['bbox3d'] = gt_bboxes_3d.astype( + np.float32).tolist() + cam_instance['bbox_label_3d'] = instance['bbox_label_3d'] + + center_3d = gt_bboxes_3d.gravity_center.numpy() + center_2d_with_depth = points_cam2img( + center_3d, lidar2cam, with_depth=True) + center_2d_with_depth = center_2d_with_depth.squeeze().tolist() + + # normalized center2D + depth + # if samples with depth < 0 will be removed + if center_2d_with_depth[2] <= 0: + continue + cam_instance['center_2d'] = center_2d_with_depth[:2] + cam_instance['depth'] = center_2d_with_depth[2] + + # TODO: Discuss whether following info is necessary + cam_instance['bbox_3d_isvalid'] = True + cam_instance['velocity'] = -1 + cam_instances[cam_type].append(cam_instance) + + return cam_instances + def create_ImageSets_img_ids(root_dir, splits): save_dir = join(root_dir, 'ImageSets/') From 257bcb3c346ece67126acf754cc7e3d5b8ccc68a Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 23 Aug 2023 15:58:22 +0800 Subject: [PATCH 09/25] fix waymo create --- tools/create_data.py | 51 +++++++------ tools/dataset_converters/waymo_converter.py | 85 +++++++++++---------- 2 files changed, 73 insertions(+), 63 deletions(-) diff --git a/tools/create_data.py b/tools/create_data.py index aa0288ae93..afb75ef484 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -171,13 +171,14 @@ def waymo_data_prep(root_path, version, out_dir, workers, - max_sweeps=10): + max_sweeps=10, + skip_gather_cam_instances_info=False): """Prepare waymo dataset. There are 3 steps as follows: Step 1. Extract camera images and lidar point clouds from waymo raw data in '*.tfreord' and save as kitti format. Step 2. Generate waymo train/val/test infos and save - as pickle file. Step 3. Generate waymo ground truth database (point clouds - within each 3D bounding box) for data augmentation in training. Steps 1 and - 2 will be done in Waymo2KITTI, and step 3 will be done in + as pickle file. Step 3. Generate waymo ground truth database (point + cloudswithin each 3D bounding box) for data augmentation in training. Steps + 1 and 2 will be done in Waymo2KITTI, and step 3 will be done in GTDatabaseCreater. Args: @@ -186,14 +187,22 @@ def waymo_data_prep(root_path, out_dir (str): Output directory of the generated info file. workers (int): Number of threads to be used. max_sweeps (int, optional): Number of input consecutive frames. - Default: 5. Here we store pose information of these frames - for later use. - """ # noqa: E501 + Default to 10. Here we store ego2global information of these + frames for later use. + skip_gather_cam_instances_info (bool, optional): Whether to skip + gather cam_instances infos in Step 2. Default to False. + """ # noqa from tools.dataset_converters import waymo_converter as waymo - splits = [ - 'training', 'validation', 'testing', 'testing_3d_camera_only_detection' - ] + if version == 'v1.4': + splits = [ + 'training', 'validation', 'testing', + 'testing_3d_camera_only_detection' + ] + elif version == 'v1.4-mini': + splits = ['training', 'validation'] + else: + raise NotImplementedError(f'Unsupported Waymo version {version}!') for i, split in enumerate(splits): load_dir = osp.join(root_path, 'waymo_format', split) if split == 'validation': @@ -207,7 +216,10 @@ def waymo_data_prep(root_path, workers=workers, test_mode=(split in ['testing', 'testing_3d_camera_only_detection']), - info_prefix=info_prefix) + info_prefix=info_prefix, + max_sweeps=max_sweeps, + split=split, + save_cam_instances=not skip_gather_cam_instances_info) converter.convert() from tools.dataset_converters.waymo_converter import \ @@ -283,9 +295,9 @@ def semantickitti_data_prep(info_prefix, out_dir): action='store_true', help='Whether to only generate ground truth database.') parser.add_argument( - '--skip-extract-kitti-format', + '--skip-gather-cam_instances-info', action='store_true', - help='''Whether to skip extracting kitti format image and point cloud. + help='''Whether to skip gathering cam_instances info. Only used when dataset is waymo!''') args = parser.parse_args() @@ -346,22 +358,15 @@ def semantickitti_data_prep(info_prefix, out_dir): dataset_name='NuScenesDataset', out_dir=args.out_dir, max_sweeps=args.max_sweeps) - elif args.dataset == 'waymo' and args.version == 'v1.4': + elif args.dataset == 'waymo': waymo_data_prep( root_path=args.root_path, info_prefix=args.extra_tag, version=args.version, out_dir=args.out_dir, workers=args.workers, - max_sweeps=args.max_sweeps) - elif args.dataset == 'waymo' and args.version == 'v1.4-mini': - waymo_data_prep( - root_path=args.root_path, - info_prefix=args.extra_tag, - version=args.version, - out_dir=args.out_dir, - workers=args.workers, - max_sweeps=args.max_sweeps) + max_sweeps=args.max_sweeps, + skip_gather_cam_instances_info=args.skip_gather_cam_instances_info) elif args.dataset == 'lyft': train_version = f'{args.version}-train' lyft_data_prep( diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index e269274540..a8c63815de 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -13,6 +13,7 @@ import os import os.path as osp from glob import glob +from io import BytesIO from os.path import exists, join import mmengine @@ -47,7 +48,11 @@ class Waymo2KITTI(object): instances. Defaults to True. save_cam_instances (bool, optional): Whether to save cam instances. Defaults to False. - """ + info_prefix (str, optional): Prefix of info filename. + Defaults to 'waymo'. + max_sweeps (int, optional): Max length of sweeps. Defaults to 10. + split (str, optional): Split of the data. Defaults to 'training'. + """ # noqa def __init__(self, load_dir, @@ -56,8 +61,10 @@ def __init__(self, workers=64, test_mode=False, save_cam_sync_instances=True, - save_cam_instances=False, - info_prefix='waymo'): + save_cam_instances=True, + info_prefix='waymo', + max_sweeps=10, + split='training'): # turn on eager execution for older tensorflow versions if int(tf.__version__.split('.')[0]) < 2: tf.enable_eager_execution() @@ -92,6 +99,9 @@ def __init__(self, self.test_mode = test_mode self.save_cam_sync_instances = save_cam_sync_instances self.save_cam_instances = save_cam_instances + self.info_prefix = info_prefix + self.max_sweeps = max_sweeps + self.split = split # TODO: Discuss filter_empty_3dboxes and filter_no_label_zone_points self.filter_empty_3dboxes = True @@ -128,7 +138,7 @@ def convert(self): waymo_infos = dict(data_infos=data_list, metainfo=metainfo) filenames = osp.join( osp.dirname(self.save_dir), - f'{self.prefix}_infos_f{osp.basename(self.save_dir)}.pkl') + f'{self.info_prefix}_infos_{self.split}.pkl') mmengine.dump(waymo_infos, filenames) print('\nFinished ...') @@ -149,15 +159,11 @@ def convert_one(self, file_idx): # NOTE: file_infos is not shared between processes, only stores frame # infos within the current file. - self.file_infos = [] + file_infos = [] for frame_idx, data in enumerate(dataset): frame = dataset_pb2.Frame() frame.ParseFromString(bytearray(data.numpy())) - if (self.selected_waymo_locations is not None - and frame.context.stats.location - not in self.selected_waymo_locations): - continue # Step 1. self.save_image(frame, file_idx, frame_idx) @@ -165,12 +171,8 @@ def convert_one(self, file_idx): # Step 2. # TODO save the depth image for waymo challenge solution. - self.create_waymo_info_file(frame, file_idx, frame_idx) - if not self.test_mode: - - self.save_label(frame, file_idx, frame_idx) - if self.save_cam_sync_labels: - self.save_label(frame, file_idx, frame_idx, cam_sync=True) + self.create_waymo_info_file(frame, file_idx, frame_idx, file_infos) + return file_infos def __len__(self): """Length of the filename list.""" @@ -610,17 +612,17 @@ def cart_to_homo(self, mat): raise ValueError(mat.shape) return ret - def create_waymo_info_file(self, frame, file_idx, frame_idx): + def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): frame_infos = dict() # Gather frame infos - sample_idx = int( - f'{self.prefix}{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}') - frame_infos['sample_idx'] = sample_idx + sample_idx = \ + f'{self.prefix}{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}' + frame_infos['sample_idx'] = int(sample_idx) frame_infos['timestamp'] = frame.timestamp_micros frame_infos['ego2global'] = np.array(frame.pose.transform).reshape( 4, 4).astype(np.float32).tolist() - frame_infos['context_name'] = frame.context_name + frame_infos['context_name'] = frame.context.name # Gather camera infos frame_infos['images'] = dict() @@ -645,8 +647,6 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx): camera_calib[0, 2] = camera.intrinsic[2] camera_calib[1, 2] = camera.intrinsic[3] camera_calib[2, 2] = 1 - camera_calib = list(camera_calib.reshape(12)) - camera_calib = [f'{i:e}' for i in camera_calib] camera_calibs.append(camera_calib) for cam_key, camera_calib, Tr_velo_to_cam, img in zip( @@ -654,7 +654,7 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx): frame.images): cam_infos = dict() cam_infos['img_path'] = str(sample_idx) + '.jpg' - width, height = Image.open(img).size + width, height = Image.open(BytesIO(img.image)).size cam_infos['height'] = height cam_infos['width'] = width cam_infos['lidar2cam'] = Tr_velo_to_cam.astype(np.float32).tolist() @@ -673,12 +673,11 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx): # TODO: Add lidar2img in image sweeps infos when we need it. # TODO: Consider merging lidar sweeps infos and image sweeps infos. lidar_sweeps_infos, image_sweeps_infos = [], [] - while len(lidar_sweeps_infos) < self.max_sweeps: + for prev_offset in range(-1, -self.max_sweeps - 1, -1): prev_lidar_infos = dict() prev_image_infos = dict() - if frame_idx - len(lidar_sweeps_infos) - 1 >= 0: - prev_frame_infos = self.file_infos[-len(lidar_sweeps_infos) - - 1] + if frame_idx + prev_offset >= 0: + prev_frame_infos = file_infos[prev_offset] prev_lidar_infos['timestamp'] = prev_frame_infos['timestamp'] prev_lidar_infos['ego2global'] = prev_frame_infos['ego2global'] lidar_path = prev_frame_infos['lidar_points']['lidar_path'] @@ -692,9 +691,11 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx): prev_image_infos['images'][cam_key] = dict() img_path = prev_frame_infos['images'][cam_key]['img_path'] prev_image_infos['images'][cam_key]['img_path'] = img_path - image_sweeps_infos.append(prev_image_infos[cam_key]) - frame_infos['lidar_sweeps'] = lidar_sweeps_infos - frame_infos['images_sweeps'] = image_sweeps_infos + image_sweeps_infos.append(prev_image_infos) + if lidar_sweeps_infos: + frame_infos['lidar_sweeps'] = lidar_sweeps_infos + if image_sweeps_infos: + frame_infos['images_sweeps'] = image_sweeps_infos if not self.test_mode: # Gather instances infos which is used for lidar-based 3D detection @@ -710,6 +711,7 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx): frame_infos['cam_instances'] = self.gather_cam_instance_info( copy.deepcopy(frame_infos['cam_sync_instances']), frame_infos['images']) + file_infos.append(frame_infos) def gather_instance_info(self, frame, cam_sync=False): id_to_bbox = dict() @@ -762,6 +764,8 @@ def gather_instance_info(self, frame, cam_sync=False): if my_type not in self.selected_waymo_classes: continue + else: + label = self.selected_waymo_classes.index(my_type) if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1: continue @@ -770,7 +774,7 @@ def gather_instance_info(self, frame, cam_sync=False): instance_info['group_id'] = group_id instance_info['camera_id'] = name instance_info['bbox'] = bounding_box - instance_info['bbox_label'] = obj.type - 1 + instance_info['bbox_label'] = label height = box3d.height width = box3d.width @@ -786,7 +790,7 @@ def gather_instance_info(self, frame, cam_sync=False): instance_info['bbox3d'] = np.array( [x, y, z, length, width, height, rotation_y]).astype(np.float32).tolist() - instance_info['bbox_label_3d'] = obj.type - 1 + instance_info['bbox_label_3d'] = label instance_info['num_lidar_pts'] = obj.num_lidar_points_in_box if self.save_track_id: @@ -798,16 +802,17 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): cam_instances = dict() for cam_type in self.camera_types: - lidar2cam = images[cam_type]['lidar2cam'] - cam2img = images[cam_type]['cam2img'] + lidar2cam = np.array(images[cam_type]['lidar2cam']) + cam2img = np.array(images[cam_type]['cam2img']) cam_instances[cam_type] = [] for instance in cam_sync_instances: cam_instance = dict() - gt_bboxes_3d = instance['bbox3d'] + gt_bboxes_3d = np.array(instance['bbox3d']) # Convert lidar coordinates to camera coordinates - gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( - Box3DMode.CAM, lidar2cam, correct_yaw=True).numpy() - corners_3d = gt_bboxes_3d.corners + gt_bboxes_3d = LiDARInstance3DBoxes( + gt_bboxes_3d[None, :]).convert_to( + Box3DMode.CAM, lidar2cam, correct_yaw=True) + corners_3d = gt_bboxes_3d.corners.numpy() corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8) in_camera = np.argwhere(corners_3d[2, :] > 0).flatten() corners_3d = corners_3d[:, in_camera] @@ -818,7 +823,7 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): # Keep only corners that fall within the image. final_coords = post_process_coords( corner_coords, - imsize=(images[cam_type]['weight'], + imsize=(images[cam_type]['width'], images[cam_type]['height'])) # Skip if the convex hull of the re-projected corners @@ -830,7 +835,7 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): cam_instance['bbox'] = [min_x, min_y, max_x, max_y] cam_instance['bbox_label'] = instance['bbox_label'] - cam_instance['bbox3d'] = gt_bboxes_3d.astype( + cam_instance['bbox3d'] = gt_bboxes_3d.numpy().astype( np.float32).tolist() cam_instance['bbox_label_3d'] = instance['bbox_label_3d'] From 53620aab56f8da7a8e8a159391f4702db6698c26 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Thu, 24 Aug 2023 14:53:03 +0800 Subject: [PATCH 10/25] remove some function not use --- tools/create_data.py | 31 +-- tools/dataset_converters/waymo_converter.py | 274 +++----------------- 2 files changed, 52 insertions(+), 253 deletions(-) diff --git a/tools/create_data.py b/tools/create_data.py index afb75ef484..409aa5cc0b 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -173,12 +173,14 @@ def waymo_data_prep(root_path, workers, max_sweeps=10, skip_gather_cam_instances_info=False): - """Prepare waymo dataset. There are 3 steps as follows: Step 1. Extract - camera images and lidar point clouds from waymo raw data in '*.tfreord' and - save as kitti format. Step 2. Generate waymo train/val/test infos and save - as pickle file. Step 3. Generate waymo ground truth database (point - cloudswithin each 3D bounding box) for data augmentation in training. Steps - 1 and 2 will be done in Waymo2KITTI, and step 3 will be done in + """Prepare waymo dataset. There are 3 steps as follows: + + Step 1. Extract camera images and lidar point clouds from waymo raw + data in '*.tfreord' and save as kitti format. + Step 2. Generate waymo train/val/test infos and save as pickle file. + Step 3. Generate waymo ground truth database (point clouds within + each 3D bounding box) for data augmentation in training. + Steps 1 and 2 will be done in Waymo2KITTI, and step 3 will be done in GTDatabaseCreater. Args: @@ -191,7 +193,7 @@ def waymo_data_prep(root_path, frames for later use. skip_gather_cam_instances_info (bool, optional): Whether to skip gather cam_instances infos in Step 2. Default to False. - """ # noqa + """ from tools.dataset_converters import waymo_converter as waymo if version == 'v1.4': @@ -221,22 +223,13 @@ def waymo_data_prep(root_path, split=split, save_cam_instances=not skip_gather_cam_instances_info) converter.convert() + if split == 'validation': + converter.merge_trainval_infos() from tools.dataset_converters.waymo_converter import \ create_ImageSets_img_ids create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) - # Generate waymo infos - # out_dir = osp.join(out_dir, 'kitti_format') - # kitti.create_waymo_info_file( - # out_dir, info_prefix, max_sweeps=max_sweeps, workers=workers) - # info_train_path = osp.join(out_dir, f'{info_prefix}_infos_train.pkl') - # info_val_path = osp.join(out_dir, f'{info_prefix}_infos_val.pkl') - # info_trainval_path = osp.join(out_d, f'{info_prefix}_infos_trainval.pkl') - # info_test_path = osp.join(out_dir, f'{info_prefix}_infos_test.pkl') - # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_train_path) - # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_val_path) - # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_trainval_path) - # update_pkl_infos('waymo', out_dir=out_dir, pkl_path=info_test_path) + GTDatabaseCreater( 'WaymoDataset', out_dir, diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index a8c63815de..a91780d78e 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -30,10 +30,11 @@ class Waymo2KITTI(object): - """Waymo to KITTI converter.There are 2 steps as follows: Step 1. Extract - camera images and lidar point clouds from waymo raw data in '*.tfreord' and - save as kitti format. Step 2. Generate waymo train/val/test infos and save - as pickle file. + """Waymo to KITTI converter. There are 2 steps as follows: + + Step 1. Extract camera images and lidar point clouds from waymo raw data in + '*.tfreord' and save as kitti format. + Step 2. Generate waymo train/val/test infos and save as pickle file. Args: load_dir (str): Directory to load waymo raw data. @@ -52,7 +53,7 @@ class Waymo2KITTI(object): Defaults to 'waymo'. max_sweeps (int, optional): Max length of sweeps. Defaults to 10. split (str, optional): Split of the data. Defaults to 'training'. - """ # noqa + """ def __init__(self, load_dir, @@ -91,6 +92,12 @@ def __init__(self, 'CAM_SIDE_RIGHT', ] self.selected_waymo_classes = ['VEHICLE', 'PEDESTRIAN', 'CYCLIST'] + self.info_map = { + 'training': '_infos_train.pkl', + 'validation': '_infos_val.pkl', + 'testing': '_infos_test.pkl', + 'testing_3d_camera_only_detection': '_infos_test_cam_only.pkl' + } self.load_dir = load_dir self.save_dir = save_dir @@ -124,10 +131,12 @@ def __init__(self, def convert(self): """Convert action.""" print('Start converting ...') - # mmengine.track_progress(self.convert_one, range(len(self))) - data_infos = mmengine.track_parallel_progress(self.convert_one, - range(len(self)), - self.workers) + if self.workers == 0: + data_infos = mmengine.track_progress(self.convert_one, + range(len(self))) + else: + data_infos = mmengine.track_parallel_progress( + self.convert_one, range(len(self)), self.workers) data_list = [] for data_info in data_infos: data_list.extend(data_info) @@ -138,7 +147,7 @@ def convert(self): waymo_infos = dict(data_infos=data_list, metainfo=metainfo) filenames = osp.join( osp.dirname(self.save_dir), - f'{self.info_prefix}_infos_{self.split}.pkl') + f'{self.info_prefix + self.info_map[self.split]}') mmengine.dump(waymo_infos, filenames) print('\nFinished ...') @@ -193,62 +202,6 @@ def save_image(self, frame, file_idx, frame_idx): with open(img_path, 'wb') as fp: fp.write(img.image) - def save_calib(self, frame, file_idx, frame_idx): - """Parse and save the calibration data. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - # waymo front camera to kitti reference camera - T_front_cam_to_ref = np.array([[0.0, -1.0, 0.0], [0.0, 0.0, -1.0], - [1.0, 0.0, 0.0]]) - camera_calibs = [] - R0_rect = [f'{i:e}' for i in np.eye(3).flatten()] - Tr_velo_to_cams = [] - calib_context = '' - - for camera in frame.context.camera_calibrations: - # extrinsic parameters - T_cam_to_vehicle = np.array(camera.extrinsic.transform).reshape( - 4, 4) - T_vehicle_to_cam = np.linalg.inv(T_cam_to_vehicle) - Tr_velo_to_cam = \ - self.cart_to_homo(T_front_cam_to_ref) @ T_vehicle_to_cam - if camera.name == 1: # FRONT = 1, see dataset.proto for details - self.T_velo_to_front_cam = Tr_velo_to_cam.copy() - Tr_velo_to_cam = Tr_velo_to_cam[:3, :].reshape((12, )) - Tr_velo_to_cams.append([f'{i:e}' for i in Tr_velo_to_cam]) - - # intrinsic parameters - camera_calib = np.zeros((3, 4)) - camera_calib[0, 0] = camera.intrinsic[0] - camera_calib[1, 1] = camera.intrinsic[1] - camera_calib[0, 2] = camera.intrinsic[2] - camera_calib[1, 2] = camera.intrinsic[3] - camera_calib[2, 2] = 1 - camera_calib = list(camera_calib.reshape(12)) - camera_calib = [f'{i:e}' for i in camera_calib] - camera_calibs.append(camera_calib) - - # all camera ids are saved as id-1 in the result because - # camera 0 is unknown in the proto - for i in range(5): - calib_context += 'P' + str(i) + ': ' + \ - ' '.join(camera_calibs[i]) + '\n' - calib_context += 'R0_rect' + ': ' + ' '.join(R0_rect) + '\n' - for i in range(5): - calib_context += 'Tr_velo_to_cam_' + str(i) + ': ' + \ - ' '.join(Tr_velo_to_cams[i]) + '\n' - - with open( - f'{self.calib_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt', - 'w+') as fp_calib: - fp_calib.write(calib_context) - fp_calib.close() - def save_lidar(self, frame, file_idx, frame_idx): """Parse and save the lidar data in psd format. @@ -306,171 +259,6 @@ def save_lidar(self, frame, file_idx, frame_idx): f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.bin' point_cloud.astype(np.float32).tofile(pc_path) - def save_label(self, frame, file_idx, frame_idx, cam_sync=False): - """Parse and save the label data in txt format. - The relation between waymo and kitti coordinates is noteworthy: - 1. x, y, z correspond to l, w, h (waymo) -> l, h, w (kitti) - 2. x-y-z: front-left-up (waymo) -> right-down-front(kitti) - 3. bbox origin at volumetric center (waymo) -> bottom center (kitti) - 4. rotation: +x around y-axis (kitti) -> +x around z-axis (waymo) - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - cam_sync (bool, optional): Whether to save the cam sync labels. - Defaults to False. - """ - label_all_path = f'{self.label_all_save_dir}/{self.prefix}' + \ - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' - if cam_sync: - label_all_path = label_all_path.replace('label_', - 'cam_sync_label_') - fp_label_all = open(label_all_path, 'w+') - id_to_bbox = dict() - id_to_name = dict() - for labels in frame.projected_lidar_labels: - name = labels.name - for label in labels.labels: - # TODO: need a workaround as bbox may not belong to front cam - bbox = [ - label.box.center_x - label.box.length / 2, - label.box.center_y - label.box.width / 2, - label.box.center_x + label.box.length / 2, - label.box.center_y + label.box.width / 2 - ] - id_to_bbox[label.id] = bbox - id_to_name[label.id] = name - 1 - - for obj in frame.laser_labels: - bounding_box = None - name = None - id = obj.id - for proj_cam in self.cam_list: - if id + proj_cam in id_to_bbox: - bounding_box = id_to_bbox.get(id + proj_cam) - name = str(id_to_name.get(id + proj_cam)) - break - - # NOTE: the 2D labels do not have strict correspondence with - # the projected 2D lidar labels - # e.g.: the projected 2D labels can be in camera 2 - # while the most_visible_camera can have id 4 - if cam_sync: - if obj.most_visible_camera_name: - name = str( - self.cam_list.index( - f'_{obj.most_visible_camera_name}')) - box3d = obj.camera_synced_box - else: - continue - else: - box3d = obj.box - - if bounding_box is None or name is None: - name = '0' - bounding_box = (0, 0, 0, 0) - - my_type = self.type_list[obj.type] - - if my_type not in self.selected_waymo_classes: - continue - - if self.filter_empty_3dboxes and obj.num_lidar_points_in_box < 1: - continue - - my_type = self.waymo_to_kitti_class_map[my_type] - - height = box3d.height - width = box3d.width - length = box3d.length - - # TODO: Discuss if we should keep kitti_format - x = box3d.center_x - y = box3d.center_y - z = box3d.center_z - height / 2 - - # # project bounding box to the virtual reference frame - # pt_ref = self.T_velo_to_front_cam @ \ - # np.array([x, y, z, 1]).reshape((4, 1)) - # x, y, z, _ = pt_ref.flatten().tolist() - - rotation_y = box3d.heading - track_id = obj.id - - # not available - truncated = 0 - occluded = 0 - alpha = -10 - - line = my_type + \ - ' {} {} {} {} {} {} {} {} {} {} {} {} {} {}\n'.format( - truncated, occluded, alpha, - bounding_box[0], bounding_box[1], - bounding_box[2], bounding_box[3], - width, height, length, - x, y, z, rotation_y) - - if self.save_track_id: - line_all = line[:-1] + ' ' + name + ' ' + track_id + '\n' - else: - line_all = line[:-1] + ' ' + name + '\n' - - # Save num_lidar_points_in_box - line_all = line_all[:-1] + ' ' + str( - obj.num_lidar_points_in_box) + '\n' - - # label_path = f'{self.label_save_dir}{name}/{self.prefix}' + \ - # f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt' - # if cam_sync: - # label_path = label_path.replace('label_', 'cam_sync_label_') - # fp_label = open(label_path, 'a') - # fp_label.write(line) - # fp_label.close() - - fp_label_all.write(line_all) - - fp_label_all.close() - - def save_pose(self, frame, file_idx, frame_idx): - """Parse and save the pose data. - - Note that SDC's own pose is not included in the regular training - of KITTI dataset. KITTI raw dataset contains ego motion files - but are not often used. Pose is important for algorithms that - take advantage of the temporal information. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - pose = np.array(frame.pose.transform).reshape(4, 4) - np.savetxt( - join(f'{self.pose_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'), - pose) - - def save_timestamp(self, frame, file_idx, frame_idx): - """Save the timestamp data in a separate file instead of the - pointcloud. - - Note that SDC's own pose is not included in the regular training - of KITTI dataset. KITTI raw dataset contains ego motion files - but are not often used. Pose is important for algorithms that - take advantage of the temporal information. - - Args: - frame (:obj:`Frame`): Open dataset frame proto. - file_idx (int): Current file index. - frame_idx (int): Current frame index. - """ - with open( - join(f'{self.timestamp_save_dir}/{self.prefix}' + - f'{str(file_idx).zfill(3)}{str(frame_idx).zfill(3)}.txt'), - 'w') as f: - f.write(str(frame.timestamp_micros)) - def convert_range_image_to_point_cloud(self, frame, range_images, @@ -858,15 +646,33 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): return cam_instances + def merge_trainval_infos(self): + """Merge training and validation infos into a single file.""" + train_infos_path = osp.join( + osp.dirname(self.save_dir), f'{self.info_prefix}_infos_train.pkl') + val_infos_path = osp.join( + osp.dirname(self.save_dir), f'{self.info_prefix}_infos_val.pkl') + train_infos = mmengine.load(train_infos_path) + val_infos = mmengine.load(val_infos_path) + trainval_infos = dict( + metainfo=train_infos['metainfo'], + data_list=train_infos['data_list'] + val_infos['data_list']) + mmengine.dump( + trainval_infos, + osp.join( + osp.dirname(self.save_dir), + f'{self.info_prefix}_infos_trainval.pkl')) + def create_ImageSets_img_ids(root_dir, splits): + """Create txt files indicating what to collect in each split.""" save_dir = join(root_dir, 'ImageSets/') if not exists(save_dir): os.mkdir(save_dir) idx_all = [[] for i in splits] for i, split in enumerate(splits): - path = join(root_dir, splits[i], 'calib') + path = join(root_dir, splits[i], 'image_0') if not exists(path): RawNames = [] else: @@ -881,6 +687,6 @@ def create_ImageSets_img_ids(root_dir, splits): open(save_dir + 'train.txt', 'w').writelines(idx_all[0]) open(save_dir + 'val.txt', 'w').writelines(idx_all[1]) open(save_dir + 'trainval.txt', 'w').writelines(idx_all[0] + idx_all[1]) - # open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) - # open(save_dir+'test_cam_only.txt','w').writelines(idx_all[3]) + open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) + open(save_dir + 'test_cam_only.txt', 'w').writelines(idx_all[3]) print('created txt files indicating what to collect in ', splits) From 061d9e6dd1e5a9cc276e2ad8503e4315379ac0e9 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Thu, 24 Aug 2023 20:15:33 +0800 Subject: [PATCH 11/25] fix cam_instances after refactor --- tools/create_data.py | 4 +- .../dataset_converters/create_gt_database.py | 26 ++++++---- tools/dataset_converters/waymo_converter.py | 52 +++++++++++-------- 3 files changed, 49 insertions(+), 33 deletions(-) diff --git a/tools/create_data.py b/tools/create_data.py index 409aa5cc0b..d4358bb376 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -225,10 +225,10 @@ def waymo_data_prep(root_path, converter.convert() if split == 'validation': converter.merge_trainval_infos() - + out_dir = osp.join(out_dir, 'kitti_format') from tools.dataset_converters.waymo_converter import \ create_ImageSets_img_ids - create_ImageSets_img_ids(osp.join(out_dir, 'kitti_format'), splits) + create_ImageSets_img_ids(out_dir, splits) GTDatabaseCreater( 'WaymoDataset', diff --git a/tools/dataset_converters/create_gt_database.py b/tools/dataset_converters/create_gt_database.py index ae452eb543..bbf357e6cb 100644 --- a/tools/dataset_converters/create_gt_database.py +++ b/tools/dataset_converters/create_gt_database.py @@ -7,7 +7,7 @@ import numpy as np from mmcv.ops import roi_align from mmdet.evaluation import bbox_overlaps -from mmengine import track_iter_progress +from mmengine import print_log, track_iter_progress from pycocotools import mask as maskUtils from pycocotools.coco import COCO @@ -504,7 +504,9 @@ def create_single(self, input_dict): return single_db_infos def create(self): - print(f'Create GT Database of {self.dataset_class_name}') + print_log( + f'Create GT Database of {self.dataset_class_name}', + logger='current') dataset_cfg = dict( type=self.dataset_class_name, data_root=self.data_path, @@ -610,12 +612,18 @@ def loop_dataset(i): input_dict['box_mode_3d'] = self.dataset.box_mode_3d return input_dict - multi_db_infos = mmengine.track_parallel_progress( - self.create_single, - ((loop_dataset(i) - for i in range(len(self.dataset))), len(self.dataset)), - self.num_worker) - print('Make global unique group id') + if self.num_worker == 0: + multi_db_infos = mmengine.track_progress( + self.create_single, + ((loop_dataset(i) + for i in range(len(self.dataset))), len(self.dataset))) + else: + multi_db_infos = mmengine.track_parallel_progress( + self.create_single, + ((loop_dataset(i) + for i in range(len(self.dataset))), len(self.dataset)), + self.num_worker) + print_log('Make global unique group id', logger='current') group_counter_offset = 0 all_db_infos = dict() for single_db_infos in track_iter_progress(multi_db_infos): @@ -630,7 +638,7 @@ def loop_dataset(i): group_counter_offset += (group_id + 1) for k, v in all_db_infos.items(): - print(f'load {len(v)} {k} database infos') + print_log(f'load {len(v)} {k} database infos', logger='current') with open(self.db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f) diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index a91780d78e..fc6f5a9114 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -19,6 +19,7 @@ import mmengine import numpy as np import tensorflow as tf +from mmengine import print_log from nuscenes.utils.geometry_utils import view_points from PIL import Image from waymo_open_dataset.utils import range_image_utils, transform_utils @@ -130,7 +131,7 @@ def __init__(self, def convert(self): """Convert action.""" - print('Start converting ...') + print_log(f'Start converting {self.split} dataset', logger='current') if self.workers == 0: data_infos = mmengine.track_progress(self.convert_one, range(len(self))) @@ -144,12 +145,11 @@ def convert(self): metainfo['dataset'] = 'waymo' metainfo['version'] = '1.4' metainfo['info_version'] = '1.1' - waymo_infos = dict(data_infos=data_list, metainfo=metainfo) + waymo_infos = dict(data_list=data_list, metainfo=metainfo) filenames = osp.join( osp.dirname(self.save_dir), f'{self.info_prefix + self.info_map[self.split]}') mmengine.dump(waymo_infos, filenames) - print('\nFinished ...') def convert_one(self, file_idx): """Convert one '*.tfrecord' file to kitti format. Each file stores all @@ -437,12 +437,14 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): camera_calib[2, 2] = 1 camera_calibs.append(camera_calib) - for cam_key, camera_calib, Tr_velo_to_cam, img in zip( - self.camera_types, camera_calibs, Tr_velo_to_cams, - frame.images): + for i, (cam_key, camera_calib, Tr_velo_to_cam) in enumerate( + zip(self.camera_types, camera_calibs, Tr_velo_to_cams)): cam_infos = dict() cam_infos['img_path'] = str(sample_idx) + '.jpg' - width, height = Image.open(BytesIO(img.image)).size + # NOTE: frames.images order is different + for img in frame.images: + if img.name == i + 1: + width, height = Image.open(BytesIO(img.image)).size cam_infos['height'] = height cam_infos['width'] = width cam_infos['lidar2cam'] = Tr_velo_to_cam.astype(np.float32).tolist() @@ -468,8 +470,9 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): prev_frame_infos = file_infos[prev_offset] prev_lidar_infos['timestamp'] = prev_frame_infos['timestamp'] prev_lidar_infos['ego2global'] = prev_frame_infos['ego2global'] + prev_lidar_infos['lidar_points'] = dict() lidar_path = prev_frame_infos['lidar_points']['lidar_path'] - prev_lidar_infos['lidar_path'] = lidar_path + prev_lidar_infos['lidar_points']['lidar_path'] = lidar_path lidar_sweeps_infos.append(prev_lidar_infos) prev_image_infos['timestamp'] = prev_frame_infos['timestamp'] @@ -483,7 +486,7 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): if lidar_sweeps_infos: frame_infos['lidar_sweeps'] = lidar_sweeps_infos if image_sweeps_infos: - frame_infos['images_sweeps'] = image_sweeps_infos + frame_infos['image_sweeps'] = image_sweeps_infos if not self.test_mode: # Gather instances infos which is used for lidar-based 3D detection @@ -495,9 +498,10 @@ def create_waymo_info_file(self, frame, file_idx, frame_idx, file_infos): frame, cam_sync=True) # Gather cam_instances infos which is used for image-based # (monocular) 3D detection (optional). + # TODO: Should we use cam_sync_instances to generate cam_instances? if self.save_cam_instances: frame_infos['cam_instances'] = self.gather_cam_instance_info( - copy.deepcopy(frame_infos['cam_sync_instances']), + copy.deepcopy(frame_infos['instances']), frame_infos['images']) file_infos.append(frame_infos) @@ -575,7 +579,7 @@ def gather_instance_info(self, frame, cam_sync=False): rotation_y = box3d.heading - instance_info['bbox3d'] = np.array( + instance_info['bbox_3d'] = np.array( [x, y, z, length, width, height, rotation_y]).astype(np.float32).tolist() instance_info['bbox_label_3d'] = label @@ -586,16 +590,16 @@ def gather_instance_info(self, frame, cam_sync=False): instance_infos.append(instance_info) return instance_infos - def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): + def gather_cam_instance_info(self, instances: dict, images: dict): cam_instances = dict() for cam_type in self.camera_types: lidar2cam = np.array(images[cam_type]['lidar2cam']) cam2img = np.array(images[cam_type]['cam2img']) cam_instances[cam_type] = [] - for instance in cam_sync_instances: + for instance in instances: cam_instance = dict() - gt_bboxes_3d = np.array(instance['bbox3d']) + gt_bboxes_3d = np.array(instance['bbox_3d']) # Convert lidar coordinates to camera coordinates gt_bboxes_3d = LiDARInstance3DBoxes( gt_bboxes_3d[None, :]).convert_to( @@ -609,10 +613,14 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): True).T[:, :2].tolist() # Keep only corners that fall within the image. + # TODO: imsize should be determined by the current image size + # CAM_FRONT: (1920, 1280) + # CAM_FRONT_LEFT: (1920, 1280) + # CAM_SIDE_LEFT: (1920, 886) final_coords = post_process_coords( corner_coords, - imsize=(images[cam_type]['width'], - images[cam_type]['height'])) + imsize=(images['CAM_FRONT']['width'], + images['CAM_FRONT']['height'])) # Skip if the convex hull of the re-projected corners # does not intersect the image canvas. @@ -623,13 +631,13 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): cam_instance['bbox'] = [min_x, min_y, max_x, max_y] cam_instance['bbox_label'] = instance['bbox_label'] - cam_instance['bbox3d'] = gt_bboxes_3d.numpy().astype( + cam_instance['bbox_3d'] = gt_bboxes_3d.numpy().astype( np.float32).tolist() cam_instance['bbox_label_3d'] = instance['bbox_label_3d'] center_3d = gt_bboxes_3d.gravity_center.numpy() center_2d_with_depth = points_cam2img( - center_3d, lidar2cam, with_depth=True) + center_3d, cam2img, with_depth=True) center_2d_with_depth = center_2d_with_depth.squeeze().tolist() # normalized center2D + depth @@ -642,7 +650,7 @@ def gather_cam_instance_info(self, cam_sync_instances: dict, images: dict): # TODO: Discuss whether following info is necessary cam_instance['bbox_3d_isvalid'] = True cam_instance['velocity'] = -1 - cam_instances[cam_type].append(cam_instance) + cam_instances[cam_type].append(cam_instance) return cam_instances @@ -670,7 +678,7 @@ def create_ImageSets_img_ids(root_dir, splits): if not exists(save_dir): os.mkdir(save_dir) - idx_all = [[] for i in splits] + idx_all = [[] for _ in range(4)] for i, split in enumerate(splits): path = join(root_dir, splits[i], 'image_0') if not exists(path): @@ -679,8 +687,8 @@ def create_ImageSets_img_ids(root_dir, splits): RawNames = os.listdir(path) for name in RawNames: - if name.endswith('.txt'): - idx = name.replace('.txt', '\n') + if name.endswith('.jpg'): + idx = name.replace('.jpg', '\n') idx_all[int(idx[0])].append(idx) idx_all[i].sort() From 3c7d4e9f527fb66b874058babde67e4cfb2a1492 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Thu, 24 Aug 2023 20:41:23 +0800 Subject: [PATCH 12/25] remove unused .py --- .gitignore | 3 --- check_coor.py | 24 ------------------------ get_gt_mini.py | 11 ----------- mp.py | 43 ------------------------------------------- 4 files changed, 81 deletions(-) delete mode 100644 check_coor.py delete mode 100644 get_gt_mini.py delete mode 100644 mp.py diff --git a/.gitignore b/.gitignore index 36817b21a6..2fefc6a904 100644 --- a/.gitignore +++ b/.gitignore @@ -135,6 +135,3 @@ data/sunrgbd/OFFICIAL_SUNRGBD/ mmdet3d/evaluation/functional/waymo_utils/compute_detection_metrics_main mmdet3d/evaluation/functional/waymo_utils/compute_detection_let_metrics_main mmdet3d/evaluation/functional/waymo_utils/compute_segmentation_metrics_main - -check_waymo.py -remove_infos.py diff --git a/check_coor.py b/check_coor.py deleted file mode 100644 index ed1df35a33..0000000000 --- a/check_coor.py +++ /dev/null @@ -1,24 +0,0 @@ -import mmengine -import numpy as np - -from mmdet3d.structures import (Box3DMode, CameraInstance3DBoxes, - LiDARInstance3DBoxes) - -val_infos = mmengine.load('data/waymo_mini/kitti_format/waymo_infos_val.pkl') -old_val_infos = mmengine.load( - 'data/waymo_mini/kitti_format/old_pkl/waymo_infos_val.pkl') -instance = np.array( - val_infos['data_list'][0]['cam_sync_instances'][0]['bbox_3d'])[np.newaxis, - ...] -instance = LiDARInstance3DBoxes(instance) -lidar2cam = np.array( - val_infos['data_list'][0]['images']['CAM_FRONT']['lidar2cam']) -cam_instance = instance.convert_to(Box3DMode.CAM, lidar2cam) - -old_instance = np.array(old_val_infos['data_list'][0]['cam_sync_instances'][0] - ['bbox_3d'])[np.newaxis, ...] - -old_lidar2cam = np.array( - old_val_infos['data_list'][0]['images']['CAM_FRONT']['lidar2cam']) -old_instance = CameraInstance3DBoxes(old_instance) -pass diff --git a/get_gt_mini.py b/get_gt_mini.py deleted file mode 100644 index e300324ed4..0000000000 --- a/get_gt_mini.py +++ /dev/null @@ -1,11 +0,0 @@ -from waymo_open_dataset.protos.metrics_pb2 import Objects - -objects = Objects() -with open('./data/waymo_mini/waymo_format/gt.bin', 'rb') as f: - objects.ParseFromString(bytearray(f.read())) -new = Objects() -for obj in objects.objects: - if obj.context_name == '10203656353524179475_7625_000_7645_000': - new.objects.append(obj) -with open('./data/waymo_mini/waymo_format/gt_mini.bin', 'wb') as f: - f.write(new.SerializeToString()) diff --git a/mp.py b/mp.py deleted file mode 100644 index 3262dd5eac..0000000000 --- a/mp.py +++ /dev/null @@ -1,43 +0,0 @@ -import multiprocessing -import time - - -class A(object): - - def __init__(self): - self.a = [] - self.b = None - - def get_num_a(self, num): - time.sleep(3) - self.a = [0] - self.get_num_b(num) - print(self.a) - return self.a - - def get_num_b(self, num): - self.a.append(num) - - def sum(self): - print(self.a) - - def run(self): - # p1 = multiprocessing.Process(target=self.get_num_a) - # p2 = multiprocessing.Process(target=self.get_num_b) - # p1.start() - # p2.start() - # p1.join() - # p2.join() - p1 = multiprocessing.Pool(2) - list_a = p1.map(self.get_num_a, [1, 2]) - self.sum() - print(list_a) - - -if __name__ == '__main__': - - t1 = time.time() - a = A() - a.run() - t2 = time.time() - print('cost time :{}'.format(t2 - t1)) From 38e47b1d9637efb86245f80adc5e6b9b11d650ac Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Mon, 28 Aug 2023 15:14:07 +0800 Subject: [PATCH 13/25] add fast eval --- .../waymo_utils/prediction_to_waymo.py | 265 +----------------- mmdet3d/evaluation/metrics/waymo_metric.py | 73 +++-- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 9 +- 3 files changed, 52 insertions(+), 295 deletions(-) diff --git a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py index b9da8043d2..7b47d85075 100644 --- a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py +++ b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py @@ -4,7 +4,6 @@ """ try: - from waymo_open_dataset import dataset_pb2 as open_dataset from waymo_open_dataset import label_pb2 from waymo_open_dataset.protos import metrics_pb2 from waymo_open_dataset.protos.metrics_pb2 import Objects @@ -14,13 +13,10 @@ 'Please run "pip install waymo-open-dataset-tf-2-1-0==1.2.0" ' 'to install the official devkit first.') -from glob import glob -from os.path import join from typing import List, Optional import mmengine import numpy as np -import tensorflow as tf class Prediction2Waymo(object): @@ -45,39 +41,22 @@ class Prediction2Waymo(object): corresponding backend. Defaults to None. from_kitti_format (bool, optional): Whether the reuslts are kitti format. Defaults to False. - idx2metainfo (Optional[dict], optional): The mapping from sample_idx to - metainfo. The metainfo must contain the keys: 'idx2contextname' and - 'idx2timestamp'. Defaults to None. """ def __init__(self, results: List[dict], - waymo_tfrecords_dir: str, waymo_results_save_dir: str, waymo_results_final_path: str, - prefix: str, classes: dict, workers: int = 2, - backend_args: Optional[dict] = None, - from_kitti_format: bool = False, - idx2metainfo: Optional[dict] = None): + backend_args: Optional[dict] = None): self.results = results - self.waymo_tfrecords_dir = waymo_tfrecords_dir self.waymo_results_save_dir = waymo_results_save_dir self.waymo_results_final_path = waymo_results_final_path - self.prefix = prefix self.classes = classes self.workers = int(workers) self.backend_args = backend_args - self.from_kitti_format = from_kitti_format - if idx2metainfo is not None: - self.idx2metainfo = idx2metainfo - # If ``fast_eval``, the metainfo does not need to be read from - # original data online. It's preprocessed offline. - self.fast_eval = True - else: - self.fast_eval = False self.name2idx = {} @@ -88,192 +67,12 @@ def __init__(self, 'Cyclist': label_pb2.Label.TYPE_CYCLIST, } - if self.from_kitti_format: - self.T_ref_to_front_cam = np.array([[0.0, 0.0, 1.0, 0.0], - [-1.0, 0.0, 0.0, 0.0], - [0.0, -1.0, 0.0, 0.0], - [0.0, 0.0, 0.0, 1.0]]) - # ``sample_idx`` of the sample in kitti-format is an array - for idx, result in enumerate(results): - if len(result['sample_idx']) > 0: - self.name2idx[str(result['sample_idx'][0])] = idx - else: - # ``sample_idx`` of the sample in the original prediction - # is an int value. - for idx, result in enumerate(results): - self.name2idx[str(result['sample_idx'])] = idx - - if not self.fast_eval: - # need to read original '.tfrecord' file - self.get_file_names() - # turn on eager execution for older tensorflow versions - if int(tf.__version__.split('.')[0]) < 2: - tf.enable_eager_execution() - self.create_folder() - def get_file_names(self): - """Get file names of waymo raw data.""" - if 'path_mapping' in self.backend_args: - for path in self.backend_args['path_mapping'].keys(): - if path in self.waymo_tfrecords_dir: - self.waymo_tfrecords_dir = \ - self.waymo_tfrecords_dir.replace( - path, self.backend_args['path_mapping'][path]) - from petrel_client.client import Client - client = Client() - contents = client.list(self.waymo_tfrecords_dir) - self.waymo_tfrecord_pathnames = list() - for content in sorted(list(contents)): - if content.endswith('tfrecord'): - self.waymo_tfrecord_pathnames.append( - join(self.waymo_tfrecords_dir, content)) - else: - self.waymo_tfrecord_pathnames = sorted( - glob(join(self.waymo_tfrecords_dir, '*.tfrecord'))) - print(len(self.waymo_tfrecord_pathnames), 'tfrecords found.') - def create_folder(self): """Create folder for data conversion.""" mmengine.mkdir_or_exist(self.waymo_results_save_dir) - def parse_objects(self, kitti_result, T_k2w, context_name, - frame_timestamp_micros): - """Parse one prediction with several instances in kitti format and - convert them to `Object` proto. - - Args: - kitti_result (dict): Predictions in kitti format. - - - name (np.ndarray): Class labels of predictions. - - dimensions (np.ndarray): Height, width, length of boxes. - - location (np.ndarray): Bottom center of boxes (x, y, z). - - rotation_y (np.ndarray): Orientation of boxes. - - score (np.ndarray): Scores of predictions. - T_k2w (np.ndarray): Transformation matrix from kitti to waymo. - context_name (str): Context name of the frame. - frame_timestamp_micros (int): Frame timestamp. - - Returns: - :obj:`Object`: Predictions in waymo dataset Object proto. - """ - - def parse_one_object(instance_idx): - """Parse one instance in kitti format and convert them to `Object` - proto. - - Args: - instance_idx (int): Index of the instance to be converted. - - Returns: - :obj:`Object`: Predicted instance in waymo dataset - Object proto. - """ - cls = kitti_result['name'][instance_idx] - length = round(kitti_result['dimensions'][instance_idx, 0], 4) - height = round(kitti_result['dimensions'][instance_idx, 1], 4) - width = round(kitti_result['dimensions'][instance_idx, 2], 4) - x = round(kitti_result['location'][instance_idx, 0], 4) - y = round(kitti_result['location'][instance_idx, 1], 4) - z = round(kitti_result['location'][instance_idx, 2], 4) - rotation_y = round(kitti_result['rotation_y'][instance_idx], 4) - score = round(kitti_result['score'][instance_idx], 4) - - # y: downwards; move box origin from bottom center (kitti) to - # true center (waymo) - y -= height / 2 - # frame transformation: kitti -> waymo - x, y, z = self.transform(T_k2w, x, y, z) - - # different conventions - heading = -(rotation_y + np.pi / 2) - while heading < -np.pi: - heading += 2 * np.pi - while heading > np.pi: - heading -= 2 * np.pi - - box = label_pb2.Label.Box() - box.center_x = x - box.center_y = y - box.center_z = z - box.length = length - box.width = width - box.height = height - box.heading = heading - - o = metrics_pb2.Object() - o.object.box.CopyFrom(box) - o.object.type = self.k2w_cls_map[cls] - o.score = score - - o.context_name = context_name - o.frame_timestamp_micros = frame_timestamp_micros - - return o - - objects = metrics_pb2.Objects() - - for instance_idx in range(len(kitti_result['name'])): - o = parse_one_object(instance_idx) - objects.objects.append(o) - - return objects - - def convert_one(self, file_idx): - """Convert action for single file. - - Args: - file_idx (int): Index of the file to be converted. - """ - file_pathname = self.waymo_tfrecord_pathnames[file_idx] - if 's3://' in file_pathname and tf.__version__ >= '2.6.0': - try: - import tensorflow_io as tfio # noqa: F401 - except ImportError: - raise ImportError( - "Please run 'pip install tensorflow-io' to install tensorflow_io first." # noqa: E501 - ) - file_data = tf.data.TFRecordDataset(file_pathname, compression_type='') - - for frame_num, frame_data in enumerate(file_data): - frame = open_dataset.Frame() - frame.ParseFromString(bytearray(frame_data.numpy())) - - filename = f'{self.prefix}{file_idx:03d}{frame_num:03d}' - - context_name = frame.context.name - frame_timestamp_micros = frame.timestamp_micros - - if filename in self.name2idx: - if self.from_kitti_format: - for camera in frame.context.camera_calibrations: - # FRONT = 1, see dataset.proto for details - if camera.name == 1: - T_front_cam_to_vehicle = np.array( - camera.extrinsic.transform).reshape(4, 4) - - T_k2w = T_front_cam_to_vehicle @ self.T_ref_to_front_cam - - kitti_result = \ - self.results[self.name2idx[filename]] - objects = self.parse_objects(kitti_result, T_k2w, - context_name, - frame_timestamp_micros) - else: - index = self.name2idx[filename] - objects = self.parse_objects_from_origin( - self.results[index], context_name, - frame_timestamp_micros) - - else: - print(filename, 'not found.') - objects = metrics_pb2.Objects() - - with open( - join(self.waymo_results_save_dir, f'{filename}.bin'), - 'wb') as f: - f.write(objects.SerializeToString()) - def convert_one_fast(self, res_index: int): """Convert action for single file. It read the metainfo from the preprocessed file offline and will be faster. @@ -285,16 +84,13 @@ def convert_one_fast(self, res_index: int): if len(self.results[res_index]['pred_instances_3d']) > 0: objects = self.parse_objects_from_origin( self.results[res_index], - self.idx2metainfo[str(sample_idx)]['contextname'], - self.idx2metainfo[str(sample_idx)]['timestamp']) + self.results[res_index]['context_name'], + self.results[res_index]['timestamp']) else: print(sample_idx, 'not found.') objects = metrics_pb2.Objects() - with open( - join(self.waymo_results_save_dir, f'{sample_idx}.bin'), - 'wb') as f: - f.write(objects.SerializeToString()) + return objects def parse_objects_from_origin(self, result: dict, contextname: str, timestamp: str) -> Objects: @@ -350,8 +146,6 @@ def parse_one_object(index): def convert(self): """Convert action.""" print('Start converting ...') - convert_func = self.convert_one_fast if self.fast_eval else \ - self.convert_one # from torch.multiprocessing import set_sharing_strategy # # Force using "file_system" sharing strategy for stability @@ -363,57 +157,22 @@ def convert(self): # TODO: Support multiprocessing. Now, multiprocessing evaluation will # cause shared memory error in torch-1.10 and torch-1.11. Details can # be seen in https://github.com/pytorch/pytorch/issues/67864. + objects_list = [] prog_bar = mmengine.ProgressBar(len(self)) for i in range(len(self)): - convert_func(i) + objects = self.convert_one_fast(i) + objects_list.append(objects) prog_bar.update() + combined = metrics_pb2.Objects() + for objects in objects_list: + for o in objects.objects: + combined.objects.append(o) print('\nFinished ...') - # combine all files into one .bin - pathnames = sorted(glob(join(self.waymo_results_save_dir, '*.bin'))) - combined = self.combine(pathnames) - with open(self.waymo_results_final_path, 'wb') as f: f.write(combined.SerializeToString()) def __len__(self): """Length of the filename list.""" - return len(self.results) if self.fast_eval else len( - self.waymo_tfrecord_pathnames) - - def transform(self, T, x, y, z): - """Transform the coordinates with matrix T. - - Args: - T (np.ndarray): Transformation matrix. - x(float): Coordinate in x axis. - y(float): Coordinate in y axis. - z(float): Coordinate in z axis. - - Returns: - list: Coordinates after transformation. - """ - pt_bef = np.array([x, y, z, 1.0]).reshape(4, 1) - pt_aft = np.matmul(T, pt_bef) - return pt_aft[:3].flatten().tolist() - - def combine(self, pathnames): - """Combine predictions in waymo format for each sample together. - - Args: - pathnames (str): Paths to save predictions. - - Returns: - :obj:`Objects`: Combined predictions in Objects proto. - """ - combined = metrics_pb2.Objects() - - for pathname in pathnames: - objects = metrics_pb2.Objects() - with open(pathname, 'rb') as f: - objects.ParseFromString(f.read()) - for o in objects.objects: - combined.objects.append(o) - - return combined + return len(self.results) diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index 0dd69a5c24..b923b6566a 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. import tempfile from os import path as osp -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Sequence, Tuple, Union import mmengine import numpy as np @@ -79,7 +79,6 @@ class WaymoMetric(KittiMetric): def __init__(self, ann_file: str, waymo_bin_file: str, - data_root: str, split: str = 'training', metric: Union[str, List[str]] = 'mAP', pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5], @@ -92,20 +91,13 @@ def __init__(self, default_cam_key: str = 'CAM_FRONT', use_pred_sample_idx: bool = False, collect_device: str = 'cpu', - backend_args: Optional[dict] = None, - idx2metainfo: Optional[str] = None) -> None: + backend_args: Optional[dict] = None) -> None: self.waymo_bin_file = waymo_bin_file - self.data_root = data_root self.split = split self.load_type = load_type self.use_pred_sample_idx = use_pred_sample_idx self.convert_kitti_format = convert_kitti_format - if idx2metainfo is not None: - self.idx2metainfo = mmengine.load(idx2metainfo) - else: - self.idx2metainfo = None - super(WaymoMetric, self).__init__( ann_file=ann_file, metric=metric, @@ -124,6 +116,32 @@ def __init__(self, self.default_prefix = 'Waymo metric' + def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: + """Process one batch of data samples and predictions. + + The processed results should be stored in ``self.results``, which will + be used to compute the metrics when all batches have been processed. + + Args: + data_batch (dict): A batch of data from the dataloader. + data_samples (Sequence[dict]): A batch of outputs from the model. + """ + + for data_sample in data_samples: + result = dict() + pred_3d = data_sample['pred_instances_3d'] + pred_2d = data_sample['pred_instances'] + for attr_name in pred_3d: + pred_3d[attr_name] = pred_3d[attr_name].to('cpu') + result['pred_instances_3d'] = pred_3d + for attr_name in pred_2d: + pred_2d[attr_name] = pred_2d[attr_name].to('cpu') + result['pred_instances'] = pred_2d + result['sample_idx'] = data_sample['sample_idx'] + result['context_name'] = data_sample['context_name'] + result['timestamp'] = data_sample['timestamp'] + self.results.append(result) + def compute_metrics(self, results: List[dict]) -> Dict[str, float]: """Compute the metrics from processed results. @@ -137,13 +155,13 @@ def compute_metrics(self, results: List[dict]) -> Dict[str, float]: logger: MMLogger = MMLogger.get_current_instance() self.classes = self.dataset_meta['classes'] - # load annotations - self.data_infos = load(self.ann_file)['data_list'] - assert len(results) == len(self.data_infos), \ - 'invalid list length of network outputs' # different from kitti, waymo do not need to convert the ann file # handle the mv_image_based load_mode if self.load_type == 'mv_image_based': + # load annotations + self.data_infos = load(self.ann_file)['data_list'] + assert len(results) == len(self.data_infos), \ + 'invalid list length of network outputs' new_data_infos = [] for info in self.data_infos: height = info['images'][self.default_cam_key]['height'] @@ -179,7 +197,7 @@ def compute_metrics(self, results: List[dict]) -> Dict[str, float]: eval_tmp_dir = None pklfile_prefix = self.pklfile_prefix - result_dict, tmp_dir = self.format_results( + self.format_results( results, pklfile_prefix=pklfile_prefix, submission_prefix=self.submission_prefix, @@ -199,8 +217,6 @@ def compute_metrics(self, results: List[dict]) -> Dict[str, float]: if eval_tmp_dir is not None: eval_tmp_dir.cleanup() - if tmp_dir is not None: - tmp_dir.cleanup() return metric_dict def waymo_evaluate(self, @@ -359,40 +375,21 @@ def format_results( final_results = results_kitti_format['pred_instances_3d'] else: final_results = results - for i, res in enumerate(final_results): - # Actually, `sample_idx` here is the filename without suffix. - # It's for identitying the sample in formating. - res['sample_idx'] = self.data_infos[i]['sample_idx'] + for res in final_results: res['pred_instances_3d']['bboxes_3d'].limit_yaw( offset=0.5, period=np.pi * 2) - waymo_root = self.data_root - if self.split == 'training': - waymo_tfrecords_dir = osp.join(waymo_root, 'validation') - prefix = '1' - elif self.split == 'testing': - waymo_tfrecords_dir = osp.join(waymo_root, 'testing') - prefix = '2' - else: - raise ValueError('Not supported split value.') - from ..functional.waymo_utils.prediction_to_waymo import \ Prediction2Waymo converter = Prediction2Waymo( final_results, - waymo_tfrecords_dir, waymo_results_save_dir, waymo_results_final_path, - prefix, classes, - backend_args=self.backend_args, - from_kitti_format=self.convert_kitti_format, - idx2metainfo=self.idx2metainfo) + backend_args=self.backend_args) converter.convert() waymo_save_tmp_dir.cleanup() - return final_results, waymo_save_tmp_dir - def merge_multi_view_boxes(self, box_dict_per_frame: List[dict], cam0_info: dict) -> dict: """Merge bounding boxes predicted from multi-view images. diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 1027942d9b..ec734d8f48 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -188,7 +188,10 @@ dict( type='PointsRangeFilter', point_cloud_range=point_cloud_range) ]), - dict(type='Pack3DDetInputs', keys=['points']) + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']), ] dataset_type = 'WaymoDataset' @@ -215,10 +218,8 @@ type='WaymoMetric', ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', - data_root='./data/waymo_mini/waymo_format', backend_args=backend_args, - convert_kitti_format=False, - idx2metainfo='./data/waymo_mini/waymo_format/idx2metainfo.pkl') + convert_kitti_format=False) # val_evaluator = dict( # type='WaymoMetric', # ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', From 8c6e8c034ea433abee6ccb8a64cd964f4a8a5b10 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Tue, 29 Aug 2023 11:58:19 +0800 Subject: [PATCH 14/25] add prallel eval --- .../waymo_utils/prediction_to_waymo.py | 73 ++++++----------- mmdet3d/evaluation/metrics/waymo_metric.py | 26 +++--- tools/create_data.py | 80 ++++++++++++------- tools/dataset_converters/waymo_converter.py | 9 ++- 4 files changed, 92 insertions(+), 96 deletions(-) diff --git a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py index 7b47d85075..765b1bfa02 100644 --- a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py +++ b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py @@ -16,7 +16,7 @@ from typing import List, Optional import mmengine -import numpy as np +from mmengine import print_log class Prediction2Waymo(object): @@ -48,7 +48,7 @@ def __init__(self, waymo_results_save_dir: str, waymo_results_final_path: str, classes: dict, - workers: int = 2, + workers: int = 4, backend_args: Optional[dict] = None): self.results = results @@ -104,71 +104,48 @@ def parse_objects_from_origin(self, result: dict, contextname: str, Returns: metrics_pb2.Objects: The parsed object. """ - lidar_boxes = result['pred_instances_3d']['bboxes_3d'].tensor + lidar_boxes = result['pred_instances_3d']['bboxes_3d'] scores = result['pred_instances_3d']['scores_3d'] labels = result['pred_instances_3d']['labels_3d'] - def parse_one_object(index): - class_name = self.classes[labels[index].item()] - + objects = metrics_pb2.Objects() + for lidar_box, score, label in zip(lidar_boxes, scores, labels): + # Parse one object box = label_pb2.Label.Box() - height = lidar_boxes[index][5].item() - heading = lidar_boxes[index][6].item() - - while heading < -np.pi: - heading += 2 * np.pi - while heading > np.pi: - heading -= 2 * np.pi - - box.center_x = lidar_boxes[index][0].item() - box.center_y = lidar_boxes[index][1].item() - box.center_z = lidar_boxes[index][2].item() + height / 2 - box.length = lidar_boxes[index][3].item() - box.width = lidar_boxes[index][4].item() + height = lidar_box[5] + heading = lidar_box[6] + + box.center_x = lidar_box[0] + box.center_y = lidar_box[1] + box.center_z = lidar_box[2] + height / 2 + box.length = lidar_box[3] + box.width = lidar_box[4] box.height = height box.heading = heading - o = metrics_pb2.Object() - o.object.box.CopyFrom(box) - o.object.type = self.k2w_cls_map[class_name] - o.score = scores[index].item() - o.context_name = contextname - o.frame_timestamp_micros = timestamp - - return o + object = metrics_pb2.Object() + object.object.box.CopyFrom(box) - objects = metrics_pb2.Objects() - for i in range(len(lidar_boxes)): - objects.objects.append(parse_one_object(i)) + class_name = self.classes[label] + object.object.type = self.k2w_cls_map[class_name] + object.score = score + object.context_name = contextname + object.frame_timestamp_micros = timestamp + objects.objects.append(object) return objects def convert(self): """Convert action.""" - print('Start converting ...') - - # from torch.multiprocessing import set_sharing_strategy - # # Force using "file_system" sharing strategy for stability - # set_sharing_strategy("file_system") - - # mmengine.track_parallel_progress(convert_func, range(len(self)), - # self.workers) + print_log('Start converting ...', logger='current') - # TODO: Support multiprocessing. Now, multiprocessing evaluation will - # cause shared memory error in torch-1.10 and torch-1.11. Details can - # be seen in https://github.com/pytorch/pytorch/issues/67864. - objects_list = [] - prog_bar = mmengine.ProgressBar(len(self)) - for i in range(len(self)): - objects = self.convert_one_fast(i) - objects_list.append(objects) - prog_bar.update() + objects_list = mmengine.track_parallel_progress( + self.convert_one_fast, range(len(self)), self.workers) combined = metrics_pb2.Objects() for objects in objects_list: for o in objects.objects: combined.objects.append(o) - print('\nFinished ...') with open(self.waymo_results_final_path, 'wb') as f: f.write(combined.SerializeToString()) diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index b923b6566a..fb8ac495e6 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -129,14 +129,15 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None: for data_sample in data_samples: result = dict() - pred_3d = data_sample['pred_instances_3d'] - pred_2d = data_sample['pred_instances'] - for attr_name in pred_3d: - pred_3d[attr_name] = pred_3d[attr_name].to('cpu') - result['pred_instances_3d'] = pred_3d - for attr_name in pred_2d: - pred_2d[attr_name] = pred_2d[attr_name].to('cpu') - result['pred_instances'] = pred_2d + result['pred_instances_3d'] = dict() + data_sample['pred_instances_3d']['bboxes_3d'].limit_yaw( + offset=0.5, period=np.pi * 2) + result['pred_instances_3d']['bboxes_3d'] = data_sample[ + 'pred_instances_3d']['bboxes_3d'].tensor.cpu().numpy() + result['pred_instances_3d']['scores_3d'] = data_sample[ + 'pred_instances_3d']['scores_3d'].cpu().numpy() + result['pred_instances_3d']['labels_3d'] = data_sample[ + 'pred_instances_3d']['labels_3d'].cpu().numpy() result['sample_idx'] = data_sample['sample_idx'] result['context_name'] = data_sample['context_name'] result['timestamp'] = data_sample['timestamp'] @@ -372,17 +373,12 @@ def format_results( if self.convert_kitti_format: results_kitti_format, tmp_dir = super().format_results( results, pklfile_prefix, submission_prefix, classes) - final_results = results_kitti_format['pred_instances_3d'] - else: - final_results = results - for res in final_results: - res['pred_instances_3d']['bboxes_3d'].limit_yaw( - offset=0.5, period=np.pi * 2) + results = results_kitti_format['pred_instances_3d'] from ..functional.waymo_utils.prediction_to_waymo import \ Prediction2Waymo converter = Prediction2Waymo( - final_results, + results, waymo_results_save_dir, waymo_results_final_path, classes, diff --git a/tools/create_data.py b/tools/create_data.py index d4358bb376..1eadaeb6af 100644 --- a/tools/create_data.py +++ b/tools/create_data.py @@ -172,7 +172,9 @@ def waymo_data_prep(root_path, out_dir, workers, max_sweeps=10, - skip_gather_cam_instances_info=False): + only_gt_database=False, + skip_image_and_lidar=False, + skip_cam_instances_infos=False): """Prepare waymo dataset. There are 3 steps as follows: Step 1. Extract camera images and lidar point clouds from waymo raw @@ -191,8 +193,12 @@ def waymo_data_prep(root_path, max_sweeps (int, optional): Number of input consecutive frames. Default to 10. Here we store ego2global information of these frames for later use. - skip_gather_cam_instances_info (bool, optional): Whether to skip - gather cam_instances infos in Step 2. Default to False. + only_gt_database (bool, optional): Whether to only generate ground + truth database. Default to False. + skip_image_and_lidar (bool, optional): Whether to skip saving + image and lidar. Default to False. + skip_cam_instances_infos (bool, optional): Whether to skip + gathering cam_instances infos in Step 2. Default to False. """ from tools.dataset_converters import waymo_converter as waymo @@ -205,30 +211,34 @@ def waymo_data_prep(root_path, splits = ['training', 'validation'] else: raise NotImplementedError(f'Unsupported Waymo version {version}!') - for i, split in enumerate(splits): - load_dir = osp.join(root_path, 'waymo_format', split) - if split == 'validation': - save_dir = osp.join(out_dir, 'kitti_format', 'training') - else: - save_dir = osp.join(out_dir, 'kitti_format', split) - converter = waymo.Waymo2KITTI( - load_dir, - save_dir, - prefix=str(i), - workers=workers, - test_mode=(split - in ['testing', 'testing_3d_camera_only_detection']), - info_prefix=info_prefix, - max_sweeps=max_sweeps, - split=split, - save_cam_instances=not skip_gather_cam_instances_info) - converter.convert() - if split == 'validation': - converter.merge_trainval_infos() out_dir = osp.join(out_dir, 'kitti_format') - from tools.dataset_converters.waymo_converter import \ - create_ImageSets_img_ids - create_ImageSets_img_ids(out_dir, splits) + + if not only_gt_database: + for i, split in enumerate(splits): + load_dir = osp.join(root_path, 'waymo_format', split) + if split == 'validation': + save_dir = osp.join(out_dir, 'training') + else: + save_dir = osp.join(out_dir, split) + converter = waymo.Waymo2KITTI( + load_dir, + save_dir, + prefix=str(i), + workers=workers, + test_mode=(split + in ['testing', 'testing_3d_camera_only_detection']), + info_prefix=info_prefix, + max_sweeps=max_sweeps, + split=split, + save_image_and_lidar=not skip_image_and_lidar, + save_cam_instances=not skip_cam_instances_infos) + converter.convert() + if split == 'validation': + converter.merge_trainval_infos() + + from tools.dataset_converters.waymo_converter import \ + create_ImageSets_img_ids + create_ImageSets_img_ids(out_dir, splits) GTDatabaseCreater( 'WaymoDataset', @@ -286,12 +296,18 @@ def semantickitti_data_prep(info_prefix, out_dir): parser.add_argument( '--only-gt-database', action='store_true', - help='Whether to only generate ground truth database.') + help='''Whether to only generate ground truth database. + Only used when dataset is NuScenes or Waymo!''') +parser.add_argument( + '--skip-cam_instances-infos', + action='store_true', + help='''Whether to skip gathering cam_instances infos. + Only used when dataset is Waymo!''') parser.add_argument( - '--skip-gather-cam_instances-info', + '--skip-image-and-lidar', action='store_true', - help='''Whether to skip gathering cam_instances info. - Only used when dataset is waymo!''') + help='''Whether to skip saving image and lidar. + Only used when dataset is Waymo!''') args = parser.parse_args() if __name__ == '__main__': @@ -359,7 +375,9 @@ def semantickitti_data_prep(info_prefix, out_dir): out_dir=args.out_dir, workers=args.workers, max_sweeps=args.max_sweeps, - skip_gather_cam_instances_info=args.skip_gather_cam_instances_info) + only_gt_database=args.only_gt_database, + skip_image_and_lidar=args.skip_image_and_lidar, + skip_cam_instances_infos=args.skip_cam_instances_infos) elif args.dataset == 'lyft': train_version = f'{args.version}-train' lyft_data_prep( diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index fc6f5a9114..10cc15fc40 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -46,6 +46,8 @@ class Waymo2KITTI(object): Defaults to 64. test_mode (bool, optional): Whether in the test_mode. Defaults to False. + save_image_and_lidar (bool, optional): Whether to save image and lidar + data. Defaults to True. save_cam_sync_instances (bool, optional): Whether to save cam sync instances. Defaults to True. save_cam_instances (bool, optional): Whether to save cam instances. @@ -62,6 +64,7 @@ def __init__(self, prefix, workers=64, test_mode=False, + save_image_and_lidar=True, save_cam_sync_instances=True, save_cam_instances=True, info_prefix='waymo', @@ -105,6 +108,7 @@ def __init__(self, self.prefix = prefix self.workers = int(workers) self.test_mode = test_mode + self.save_image_and_lidar = save_image_and_lidar self.save_cam_sync_instances = save_cam_sync_instances self.save_cam_instances = save_cam_instances self.info_prefix = info_prefix @@ -175,8 +179,9 @@ def convert_one(self, file_idx): frame.ParseFromString(bytearray(data.numpy())) # Step 1. - self.save_image(frame, file_idx, frame_idx) - self.save_lidar(frame, file_idx, frame_idx) + if self.save_image_and_lidar: + self.save_image(frame, file_idx, frame_idx) + self.save_lidar(frame, file_idx, frame_idx) # Step 2. # TODO save the depth image for waymo challenge solution. From eadd6fc629fe26e8e4669aaa6b60c099fa1519ec Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 30 Aug 2023 13:26:19 +0800 Subject: [PATCH 15/25] fail use parallel --- .../functional/waymo_utils/prediction_to_waymo.py | 6 ++++-- ...ond_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 14 ++++++++++---- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py index 765b1bfa02..7cb1b6efeb 100644 --- a/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py +++ b/mmdet3d/evaluation/functional/waymo_utils/prediction_to_waymo.py @@ -139,8 +139,10 @@ def convert(self): """Convert action.""" print_log('Start converting ...', logger='current') - objects_list = mmengine.track_parallel_progress( - self.convert_one_fast, range(len(self)), self.workers) + # objects_list = mmengine.track_parallel_progress( + # self.convert_one_fast, range(len(self)), self.workers) + objects_list = mmengine.track_progress(self.convert_one_fast, + range(len(self))) combined = metrics_pb2.Objects() for objects in objects_list: diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index ec734d8f48..87c9c3da16 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -5,8 +5,8 @@ voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] -data_root = 'data/waymo_mini/kitti_format/' -# data_root = 'data/waymo/kitti_format/' +# data_root = 'data/waymo_mini/kitti_format/' +data_root = 'data/waymo/kitti_format/' class_names = ['Car', 'Pedestrian', 'Cyclist'] metainfo = dict(classes=class_names) input_modality = dict(use_lidar=True, use_camera=False) @@ -214,10 +214,16 @@ backend_args=backend_args)) test_dataloader = val_dataloader +# val_evaluator = dict( +# type='WaymoMetric', +# ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', +# waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', +# backend_args=backend_args, +# convert_kitti_format=False) val_evaluator = dict( type='WaymoMetric', - ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', - waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', + ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', + waymo_bin_file='./data/waymo/waymo_format/gt.bin', backend_args=backend_args, convert_kitti_format=False) # val_evaluator = dict( From 170554052629bfdd417d9b7e498554f7270f02c6 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 30 Aug 2023 15:55:58 +0800 Subject: [PATCH 16/25] remove unused code --- mmdet3d/datasets/convert_utils.py | 7 --- mmdet3d/datasets/waymo_dataset.py | 8 --- tools/create_data.sh | 5 +- tools/dataset_converters/kitti_converter.py | 58 ++++++++++--------- .../dataset_converters/update_infos_to_v2.py | 40 +++++-------- tools/dataset_converters/waymo_converter.py | 10 ++-- 6 files changed, 57 insertions(+), 71 deletions(-) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index 9d65369593..66dba29117 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -318,7 +318,6 @@ def get_kitti_style_2d_boxes(info: dict, def convert_annos(info: dict, cam_idx: int) -> dict: """Convert front-cam anns to i-th camera (KITTI-style info).""" rect = info['calib']['R0_rect'].astype(np.float32) - # lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) if cam_idx == 0: lidar2cami = info['calib']['Tr_velo_to_cam'].astype(np.float32) else: @@ -331,14 +330,8 @@ def convert_annos(info: dict, cam_idx: int) -> dict: rots = annos['rotation_y'] gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) - # convert gt_bboxes_3d to velodyne coordinates # BC-breaking: gt_bboxes_3d is already in lidar coordinates - # TODO: Discuss correcet_yaw if necessary - # gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( - # Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True) # convert gt_bboxes_3d to cam coordinates - # gt_bboxes_3d = gt_bboxes_3d.convert_to( - # Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 243630d006..f5a35f5af0 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -163,14 +163,6 @@ def parse_ann_info(self, info: dict) -> dict: centers_2d = np.zeros((0, 2), dtype=np.float32) depths = np.zeros((0), dtype=np.float32) - # in waymo, lidar2cam = R0_rect @ Tr_velo_to_cam - # convert gt_bboxes_3d to velodyne coordinates with `lidar2cam` - # lidar2cam = np.array(info['images'][self.default_cam_key] - # ['lidar2cam']) - # gt_bboxes_3d = CameraInstance3DBoxes( - # ann_info['gt_bboxes_3d']).convert_to(self.box_mode_3d, - # np.linalg.inv(lidar2cam)) - # ann_info['gt_bboxes_3d'] = gt_bboxes_3d gt_bboxes_3d = LiDARInstance3DBoxes(ann_info['gt_bboxes_3d']) anns_results = dict( diff --git a/tools/create_data.sh b/tools/create_data.sh index 9e1fe9c0ce..0a1946585d 100755 --- a/tools/create_data.sh +++ b/tools/create_data.sh @@ -10,7 +10,7 @@ WORKERS=$4 GPUS=${GPUS:-1} GPUS_PER_NODE=${GPUS_PER_NODE:-1} SRUN_ARGS=${SRUN_ARGS:-""} -JOB_NAME=create_data +PY_ARGS=${@:5} srun -p ${PARTITION} \ --job-name=${JOB_NAME} \ @@ -23,4 +23,5 @@ srun -p ${PARTITION} \ --root-path ./data/${DATASET} \ --out-dir ./data/${DATASET} \ --workers ${WORKERS} \ - --extra-tag ${DATASET} + --extra-tag ${DATASET} \ + ${PY_ARGS} diff --git a/tools/dataset_converters/kitti_converter.py b/tools/dataset_converters/kitti_converter.py index 57453c5ddc..e904918f60 100644 --- a/tools/dataset_converters/kitti_converter.py +++ b/tools/dataset_converters/kitti_converter.py @@ -5,6 +5,7 @@ import mmcv import mmengine import numpy as np +from mmengine import logging, print_log from nuscenes.utils.geometry_utils import view_points from mmdet3d.structures import points_cam2img @@ -109,9 +110,8 @@ def calculate_single(self, info): return info def calculate(self, infos): - # ret_infos = mmengine.track_parallel_progress(self.calculate_single, - # infos, self.num_worker) - ret_infos = [self.calculate_single(info) for info in infos] + ret_infos = mmengine.track_parallel_progress(self.calculate_single, + infos, self.num_worker) for i, ret_info in enumerate(ret_infos): infos[i] = ret_info @@ -250,10 +250,16 @@ def create_waymo_info_file(data_path, max_sweeps (int, optional): Max sweeps before the detection frame to be used. Default: 5. """ + print_log( + 'Deprecation Warning: related functions has been migrated to ' + '`Waymo2KITTI.create_waymo_info_file`. It will be removed in ' + 'the future!', + logger='current', + level=logging.WARNING) imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) - # test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) + test_img_ids = _read_imageset_file(str(imageset_folder / 'test.txt')) print('Generate info. this may take several minutes.') if save_path is None: @@ -269,40 +275,40 @@ def create_waymo_info_file(data_path, relative_path=relative_path, max_sweeps=max_sweeps, num_worker=workers) - # waymo_infos_gatherer_test = WaymoInfoGatherer( - # data_path, - # training=False, - # label_info=False, - # velodyne=True, - # calib=True, - # pose=True, - # relative_path=relative_path, - # max_sweeps=max_sweeps, - # num_worker=workers) - # num_points_in_gt_calculater = _NumPointsInGTCalculater( - # data_path, - # relative_path, - # num_features=6, - # remove_outside=False, - # num_worker=workers) + waymo_infos_gatherer_test = WaymoInfoGatherer( + data_path, + training=False, + label_info=False, + velodyne=True, + calib=True, + pose=True, + relative_path=relative_path, + max_sweeps=max_sweeps, + num_worker=workers) + num_points_in_gt_calculater = _NumPointsInGTCalculater( + data_path, + relative_path, + num_features=6, + remove_outside=False, + num_worker=workers) waymo_infos_train = waymo_infos_gatherer_trainval.gather(train_img_ids) - # num_points_in_gt_calculater.calculate(waymo_infos_train) + num_points_in_gt_calculater.calculate(waymo_infos_train) filename = save_path / f'{pkl_prefix}_infos_train.pkl' print(f'Waymo info train file is saved to {filename}') mmengine.dump(waymo_infos_train, filename) waymo_infos_val = waymo_infos_gatherer_trainval.gather(val_img_ids) - # num_points_in_gt_calculater.calculate(waymo_infos_val) + num_points_in_gt_calculater.calculate(waymo_infos_val) filename = save_path / f'{pkl_prefix}_infos_val.pkl' print(f'Waymo info val file is saved to {filename}') mmengine.dump(waymo_infos_val, filename) filename = save_path / f'{pkl_prefix}_infos_trainval.pkl' print(f'Waymo info trainval file is saved to {filename}') mmengine.dump(waymo_infos_train + waymo_infos_val, filename) - # waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) - # filename = save_path / f'{pkl_prefix}_infos_test.pkl' - # print(f'Waymo info test file is saved to {filename}') - # mmengine.dump(waymo_infos_test, filename) + waymo_infos_test = waymo_infos_gatherer_test.gather(test_img_ids) + filename = save_path / f'{pkl_prefix}_infos_test.pkl' + print(f'Waymo info test file is saved to {filename}') + mmengine.dump(waymo_infos_test, filename) def _create_reduced_point_cloud(data_path, diff --git a/tools/dataset_converters/update_infos_to_v2.py b/tools/dataset_converters/update_infos_to_v2.py index a779d6b6c7..a2ddbd0688 100644 --- a/tools/dataset_converters/update_infos_to_v2.py +++ b/tools/dataset_converters/update_infos_to_v2.py @@ -923,8 +923,8 @@ def update_waymo_infos(pkl_path, out_dir): base_img_path = Path(ori_info_dict['image']['image_path']).name for cam_idx, cam_key in enumerate(camera_types): - # temp_data_info['images'][cam_key]['timestamp'] = ori_info_dict[ - # 'timestamp'] + temp_data_info['images'][cam_key]['timestamp'] = ori_info_dict[ + 'timestamp'] temp_data_info['images'][cam_key]['img_path'] = base_img_path h, w = ori_info_dict['image']['image_shape'] @@ -934,16 +934,16 @@ def update_waymo_infos(pkl_path, out_dir): temp_data_info['images'][camera_types[0]]['width'] = w temp_data_info['lidar_points']['num_pts_feats'] = ori_info_dict[ 'point_cloud']['num_features'] - # temp_data_info['lidar_points']['timestamp'] = ori_info_dict[ - # 'timestamp'] + temp_data_info['lidar_points']['timestamp'] = ori_info_dict[ + 'timestamp'] velo_path = ori_info_dict['point_cloud'].get('velodyne_path') if velo_path is not None: temp_data_info['lidar_points']['lidar_path'] = Path(velo_path).name # TODO discuss the usage of Tr_velo_to_cam in lidar - # Trv2c = ori_info_dict['calib']['Tr_velo_to_cam'].astype(np.float32) + Trv2c = ori_info_dict['calib']['Tr_velo_to_cam'].astype(np.float32) - # temp_data_info['lidar_points']['Tr_velo_to_cam'] = Trv2c.tolist() + temp_data_info['lidar_points']['Tr_velo_to_cam'] = Trv2c.tolist() # for potential usage # temp_data_info['images']['R0_rect'] = ori_info_dict['calib'][ @@ -951,8 +951,7 @@ def update_waymo_infos(pkl_path, out_dir): # for the sweeps part: temp_data_info['timestamp'] = ori_info_dict['timestamp'] - temp_data_info['ego2global'] = ori_info_dict['pose'].astype( - np.float32).tolist() + temp_data_info['ego2global'] = ori_info_dict['pose'] for ori_sweep in ori_info_dict['sweeps']: # lidar sweeps @@ -961,22 +960,16 @@ def update_waymo_infos(pkl_path, out_dir): lidar_sweep['timestamp'] = ori_sweep['timestamp'] lidar_sweep['lidar_points']['lidar_path'] = Path( ori_sweep['velodyne_path']).name - # delete the num_pts_feats and lidar2ego - del lidar_sweep['lidar_points']['num_pts_feats'] - del lidar_sweep['lidar_points']['lidar2ego'] - # TODO: refactor image_sweeps info when we really need it. # image sweeps - # image_sweep = dict() - # image_sweep['sample_idx']= ori_sweep['image']['image_idx'] - # image_sweep = get_single_image_sweep(camera_types) - # image_sweep['ego2global'] = ori_sweep['pose'] - # image_sweep['timestamp'] = ori_sweep['timestamp'] - # img_path = Path(ori_sweep['image_path']).name - # for cam_idx, cam_key in enumerate(camera_types): - # image_sweep['images'][cam_key]['img_path'] = img_path + image_sweep = get_single_image_sweep(camera_types) + image_sweep['ego2global'] = ori_sweep['pose'] + image_sweep['timestamp'] = ori_sweep['timestamp'] + img_path = Path(ori_sweep['image_path']).name + for cam_idx, cam_key in enumerate(camera_types): + image_sweep['images'][cam_key]['img_path'] = img_path temp_data_info['lidar_sweeps'].append(lidar_sweep) - # temp_data_info['image_sweeps'].append(image_sweep) + temp_data_info['image_sweeps'].append(image_sweep) anns = ori_info_dict.get('annos', None) ignore_class_name = set() @@ -1072,7 +1065,6 @@ def update_waymo_infos(pkl_path, out_dir): temp_data_info, _ = clear_data_info_unused_keys(temp_data_info) converted_list.append(temp_data_info) - del data_list pkl_name = Path(pkl_path).name out_path = osp.join(out_dir, pkl_name) print(f'Writing to output file: {out_path}.') @@ -1111,8 +1103,8 @@ def generate_waymo_camera_instances(ori_info_dict, cam_keys): for cam_idx, cam_key in enumerate(cam_keys): annos = copy.deepcopy(ori_info_dict['cam_sync_annos']) - - annos = convert_annos(ori_info_dict, cam_idx) + if cam_idx != 0: + annos = convert_annos(ori_info_dict, cam_idx) ann_infos = get_kitti_style_2d_boxes( ori_info_dict, cam_idx, occluded=[0], annos=annos, dataset='waymo') diff --git a/tools/dataset_converters/waymo_converter.py b/tools/dataset_converters/waymo_converter.py index 10cc15fc40..704ce391e9 100644 --- a/tools/dataset_converters/waymo_converter.py +++ b/tools/dataset_converters/waymo_converter.py @@ -683,9 +683,9 @@ def create_ImageSets_img_ids(root_dir, splits): if not exists(save_dir): os.mkdir(save_dir) - idx_all = [[] for _ in range(4)] + idx_all = [[] for _ in splits] for i, split in enumerate(splits): - path = join(root_dir, splits[i], 'image_0') + path = join(root_dir, split, 'image_0') if not exists(path): RawNames = [] else: @@ -700,6 +700,8 @@ def create_ImageSets_img_ids(root_dir, splits): open(save_dir + 'train.txt', 'w').writelines(idx_all[0]) open(save_dir + 'val.txt', 'w').writelines(idx_all[1]) open(save_dir + 'trainval.txt', 'w').writelines(idx_all[0] + idx_all[1]) - open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) - open(save_dir + 'test_cam_only.txt', 'w').writelines(idx_all[3]) + if idx_all[2]: + open(save_dir + 'test.txt', 'w').writelines(idx_all[2]) + if idx_all[3]: + open(save_dir + 'test_cam_only.txt', 'w').writelines(idx_all[3]) print('created txt files indicating what to collect in ', splits) From 83426854d89edcc08db42735a1068015145429df Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Tue, 12 Sep 2023 11:39:27 +0800 Subject: [PATCH 17/25] fix train iter --- convert_ckpt.py | 34 ----- mmdet3d/datasets/det3d_dataset.py | 4 +- mmdet3d/datasets/waymo_dataset.py | 58 ++++++++- .../models/dense_heads/centerpoint_head.py | 2 +- mmdet3d/structures/bbox_3d/base_box3d.py | 13 +- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 30 ++--- projects/DSVT/dsvt/__init__.py | 4 +- projects/DSVT/dsvt/dsvt_head.py | 14 ++- projects/DSVT/dsvt/dynamic_pillar_vfe.py | 3 + projects/DSVT/dsvt/transforms_3d.py | 116 ++++++++++++++++++ tools/train.py | 10 ++ 11 files changed, 219 insertions(+), 69 deletions(-) delete mode 100644 convert_ckpt.py create mode 100644 projects/DSVT/dsvt/transforms_3d.py diff --git a/convert_ckpt.py b/convert_ckpt.py deleted file mode 100644 index f5c3c9317e..0000000000 --- a/convert_ckpt.py +++ /dev/null @@ -1,34 +0,0 @@ -# from mmengine.config import Config -# cfg = Config.fromfile(path) -# print(cfg) -import torch - -mm3d_model = torch.load('checkpoints/dsvt_convert.pth') -dsvt_model = dict() -dsvt_model['model_state'] = dict() -for k, v in mm3d_model.items(): - if 'voxel_encoder' in k: - k = k.replace('voxel_encoder', 'vfe') - if 'middle_encoder' in k: - k = k.replace('middle_encoder', 'backbone_3d') - if 'backbone.' in k: - k = k.replace('backbone', 'backbone_2d') - if 'neck' in k: - k = k.replace('neck', 'backbone_2d') - if 'bbox_head.shared_conv' in k: - k = k.replace('bbox_head.shared_conv.conv', 'dense_head.shared_conv.0') - k = k.replace('bbox_head.shared_conv.bn', 'dense_head.shared_conv.1') - if 'bbox_head.task_heads' in k: - k = k.replace('bbox_head.task_heads', 'dense_head.heads_list') - if 'reg' in k: - k = k.replace('reg', 'center') - if 'height' in k: - k = k.replace('height', 'center_z') - if 'heatmap' in k: - k = k.replace('heatmap', 'hm') - if '0.conv' in k: - k = k.replace('0.conv', '0.0') - if '0.bn' in k: - k = k.replace('0.bn', '0.1') - dsvt_model['model_state'][k] = v -torch.save(dsvt_model, 'dsvt_ckpt.pth') diff --git a/mmdet3d/datasets/det3d_dataset.py b/mmdet3d/datasets/det3d_dataset.py index 8e1570e794..c701a893fd 100644 --- a/mmdet3d/datasets/det3d_dataset.py +++ b/mmdet3d/datasets/det3d_dataset.py @@ -143,7 +143,9 @@ def __init__(self, # show statistics of this dataset print_log('-' * 30, 'current') - print_log(f'The length of the dataset: {len(self)}', 'current') + print_log( + f'The length of {"test" if self.test_mode else "training"} dataset: {len(self)}', # noqa: E501 + 'current') content_show = [['category', 'number']] for label, num in enumerate(self.num_ins_per_cat): cat_name = self.metainfo['classes'][label] diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index f5a35f5af0..58093a355d 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -2,7 +2,10 @@ import os.path as osp from typing import Callable, List, Union +import mmengine import numpy as np +from mmengine import print_log +from mmengine.fileio import load from mmdet3d.registry import DATASETS from mmdet3d.structures import LiDARInstance3DBoxes @@ -176,9 +179,58 @@ def parse_ann_info(self, info: dict) -> dict: return anns_results def load_data_list(self) -> List[dict]: - """Add the load interval.""" - data_list = super().load_data_list() - data_list = data_list[::self.load_interval] + """Add the load interval. + + Returns: + list[dict]: A list of annotation. + """ # noqa: E501 + # `self.ann_file` denotes the absolute annotation file path if + # `self.root=None` or relative path if `self.root=/path/to/data/`. + annotations = load(self.ann_file) + if not isinstance(annotations, dict): + raise TypeError(f'The annotations loaded from annotation file ' + f'should be a dict, but got {type(annotations)}!') + if 'data_list' not in annotations or 'metainfo' not in annotations: + raise ValueError('Annotation must have data_list and metainfo ' + 'keys') + metainfo = annotations['metainfo'] + raw_data_list = annotations['data_list'] + raw_data_list = raw_data_list[::self.load_interval] + if self.load_interval > 1: + print_log( + f'Sample size will be reduced to 1/{self.load_interval} of' + 'the original data sample', + logger='current') + + # Meta information load from annotation file will not influence the + # existed meta information load from `BaseDataset.METAINFO` and + # `metainfo` arguments defined in constructor. + for k, v in metainfo.items(): + self._metainfo.setdefault(k, v) + + # load and parse data_infos. + data_list = [] + for raw_data_info in mmengine.track_iter_progress(raw_data_list): + # parse raw data information to target format + data_info = self.parse_data_info(raw_data_info) + if isinstance(data_info, dict): + # For image tasks, `data_info` should information if single + # image, such as dict(img_path='xxx', width=360, ...) + data_list.append(data_info) + elif isinstance(data_info, list): + # For video tasks, `data_info` could contain image + # information of multiple frames, such as + # [dict(video_path='xxx', timestamps=...), + # dict(video_path='xxx', timestamps=...)] + for item in data_info: + if not isinstance(item, dict): + raise TypeError('data_info must be list of dict, but ' + f'got {type(item)}') + data_list.extend(data_info) + else: + raise TypeError('data_info should be a dict or list of dict, ' + f'but got {type(data_info)}') + return data_list def parse_data_info(self, info: dict) -> Union[dict, List[dict]]: diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py index 12ba84234e..301db0eee0 100644 --- a/mmdet3d/models/dense_heads/centerpoint_head.py +++ b/mmdet3d/models/dense_heads/centerpoint_head.py @@ -101,7 +101,7 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. diff --git a/mmdet3d/structures/bbox_3d/base_box3d.py b/mmdet3d/structures/bbox_3d/base_box3d.py index 50b092c06e..7fb703c731 100644 --- a/mmdet3d/structures/bbox_3d/base_box3d.py +++ b/mmdet3d/structures/bbox_3d/base_box3d.py @@ -275,12 +275,13 @@ def in_range_3d( Tensor: A binary vector indicating whether each point is inside the reference range. """ - in_range_flags = ((self.tensor[:, 0] > box_range[0]) - & (self.tensor[:, 1] > box_range[1]) - & (self.tensor[:, 2] > box_range[2]) - & (self.tensor[:, 0] < box_range[3]) - & (self.tensor[:, 1] < box_range[4]) - & (self.tensor[:, 2] < box_range[5])) + gravity_center = self.gravity_center + in_range_flags = ((gravity_center[:, 0] > box_range[0]) + & (gravity_center[:, 1] > box_range[1]) + & (gravity_center[:, 2] > box_range[2]) + & (gravity_center[:, 0] < box_range[3]) + & (gravity_center[:, 1] < box_range[4]) + & (gravity_center[:, 2] < box_range[5])) return in_range_flags @abstractmethod diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index abcb15bf26..8429a2dc6c 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -2,6 +2,7 @@ custom_imports = dict( imports=['projects.DSVT.dsvt'], allow_failed_imports=False) +# load_from = 'checkpoints/dsvt_init_mm3d.pth' voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] @@ -89,7 +90,7 @@ loss_cls=dict( type='mmdet.GaussianFocalLoss', reduction='mean', loss_weight=1.0), loss_bbox=dict(type='mmdet.L1Loss', reduction='mean', loss_weight=2.0), - loss_iou=dict(type='mmdet.L1Loss', reduction='none', loss_weight=1.0), + loss_iou=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0), loss_reg_iou=dict( type='mmdet3d.DIoU3DLoss', reduction='mean', loss_weight=2.0), norm_bbox=True), @@ -163,9 +164,9 @@ rot_range=[-0.78539816, 0.78539816], scale_ratio_range=[0.95, 1.05], translation_std=[0.5, 0.5, 0.5]), - dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), - dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), - dict(type='ObjectNameFilter', classes=class_names), + dict(type='DSVTPointsRangeFilter', point_cloud_range=point_cloud_range), + dict(type='DSVTObjectRangeFilter', point_cloud_range=point_cloud_range), + # dict(type='ObjectNameFilter', classes=class_names), dict(type='PointShuffle'), dict( type='Pack3DDetInputs', @@ -181,8 +182,11 @@ norm_intensity=True, norm_elongation=True, backend_args=backend_args), - dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), - dict(type='Pack3DDetInputs', keys=['points']) + dict(type='DSVTPointsRangeFilter', point_cloud_range=point_cloud_range), + dict( + type='Pack3DDetInputs', + keys=['points'], + meta_keys=['box_type_3d', 'sample_idx', 'context_name', 'timestamp']) ] dataset_type = 'WaymoDataset' @@ -237,17 +241,9 @@ waymo_bin_file='./data/waymo/waymo_format/gt.bin', backend_args=backend_args, convert_kitti_format=False) -# val_evaluator = dict( -# type='WaymoMetric', -# ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', -# waymo_bin_file='./data/waymo/waymo_format/gt.bin', -# data_root='./data/waymo/waymo_format', -# backend_args=backend_args, -# convert_kitti_format=False, -# idx2metainfo='./data/waymo/waymo_format/idx2metainfo.pkl') test_evaluator = val_evaluator -vis_backends = [dict(type='LocalVisBackend')] +vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') lr = 1e-5 @@ -295,7 +291,7 @@ ] # runtime settings -train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=12) +train_cfg = dict(by_epoch=True, max_epochs=12, val_interval=1) # runtime settings val_cfg = dict() @@ -309,5 +305,5 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), - checkpoint=dict(type='CheckpointHook', interval=5)) + checkpoint=dict(type='CheckpointHook', interval=1)) custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] diff --git a/projects/DSVT/dsvt/__init__.py b/projects/DSVT/dsvt/__init__.py index 4fff505fb3..ed602d6a8a 100644 --- a/projects/DSVT/dsvt/__init__.py +++ b/projects/DSVT/dsvt/__init__.py @@ -4,9 +4,11 @@ from .dynamic_pillar_vfe import DynamicPillarVFE3D from .map2bev import PointPillarsScatter3D from .res_second import ResSECOND +from .transforms_3d import DSVTObjectRangeFilter, DSVTPointsRangeFilter from .utils import DSVTBBoxCoder __all__ = [ 'DSVTCenterHead', 'DSVT', 'DSVTMiddleEncoder', 'DynamicPillarVFE3D', - 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder' + 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder', + 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter' ] diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 2d7d8d03db..e6ff075305 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -161,6 +161,7 @@ def calc_iou_loss(self, iou_preds, batch_box_preds, mask, ind, gt_boxes): iou_target = iou_target * 2 - 1 # [0, 1] ==> [-1, 1] loss = self.loss_iou(selected_iou_preds.view(-1), iou_target) + loss = loss / torch.clamp(mask.sum(), min=1e-4) return loss def calc_iou_reg_loss(self, batch_box_preds, mask, ind, gt_boxes): @@ -222,10 +223,10 @@ def get_targets( # Transpose inds inds = list(map(list, zip(*inds))) inds = [torch.stack(inds_) for inds_ in inds] - # Transpose inds + # Transpose masks masks = list(map(list, zip(*masks))) masks = [torch.stack(masks_) for masks_ in masks] - # Transpose inds + # Transpose task_gt_bboxes task_gt_bboxes = list(map(list, zip(*task_gt_bboxes))) return heatmaps, anno_boxes, inds, masks, task_gt_bboxes @@ -358,8 +359,8 @@ def get_targets_single(self, anno_box[new_idx] = torch.cat([ center - torch.tensor([x, y], device=device), z.unsqueeze(0), box_dim, - torch.sin(rot).unsqueeze(0), - torch.cos(rot).unsqueeze(0) + torch.cos(rot).unsqueeze(0), + torch.sin(rot).unsqueeze(0) ]) heatmaps.append(heatmap) @@ -432,7 +433,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], loss_dict[f'task{task_id}.loss_iou'] = self.calc_iou_loss( iou_preds=preds_dict[0]['iou'], batch_box_preds=batch_box_preds_for_iou.clone().detach(), - mask=mask.all(dim=-1), + mask=masks[task_id], ind=ind, gt_boxes=task_gt_bboxes[task_id]) @@ -440,7 +441,7 @@ def loss_by_feat(self, preds_dicts: Tuple[List[dict]], loss_dict[f'task{task_id}.loss_reg_iou'] = \ self.calc_iou_reg_loss( batch_box_preds=batch_box_preds_for_iou, - mask=mask.all(dim=-1), + mask=masks[task_id], ind=ind, gt_boxes=task_gt_bboxes[task_id]) @@ -517,6 +518,7 @@ def predict_by_feat(self, preds_dicts: Tuple[List[dict]], else: batch_dim = preds_dict[0]['dim'] + # It's different from CenterHead batch_rotc = preds_dict[0]['rot'][:, 0].unsqueeze(1) batch_rots = preds_dict[0]['rot'][:, 1].unsqueeze(1) batch_iou = (preds_dict[0]['iou'] + diff --git a/projects/DSVT/dsvt/dynamic_pillar_vfe.py b/projects/DSVT/dsvt/dynamic_pillar_vfe.py index 97c75aaf00..6d21a69713 100644 --- a/projects/DSVT/dsvt/dynamic_pillar_vfe.py +++ b/projects/DSVT/dsvt/dynamic_pillar_vfe.py @@ -1,4 +1,5 @@ # modified from https://github.com/Haiyang-W/DSVT +import numpy as np import torch import torch.nn as nn import torch_scatter @@ -76,6 +77,8 @@ def __init__(self, with_distance, use_absolute_xyz, use_norm, num_filters, self.voxel_x = voxel_size[0] self.voxel_y = voxel_size[1] self.voxel_z = voxel_size[2] + # TODO: remove it after 对齐精度 + point_cloud_range = np.array(point_cloud_range).astype(np.float32) self.x_offset = self.voxel_x / 2 + point_cloud_range[0] self.y_offset = self.voxel_y / 2 + point_cloud_range[1] self.z_offset = self.voxel_z / 2 + point_cloud_range[2] diff --git a/projects/DSVT/dsvt/transforms_3d.py b/projects/DSVT/dsvt/transforms_3d.py new file mode 100644 index 0000000000..03d2c38dd7 --- /dev/null +++ b/projects/DSVT/dsvt/transforms_3d.py @@ -0,0 +1,116 @@ +from typing import List + +import numpy as np +from mmcv import BaseTransform + +from mmdet3d.registry import TRANSFORMS + + +@TRANSFORMS.register_module() +class DSVTObjectRangeFilter(BaseTransform): + """Filter objects by the range. It differs from `ObjectRangeFilter` by + using `in_range_3d` instead of `in_range_bev`. + + Required Keys: + + - gt_bboxes_3d + + Modified Keys: + + - gt_bboxes_3d + + Args: + point_cloud_range (list[float]): Point cloud range. + """ + + def __init__(self, point_cloud_range: List[float]) -> None: + self.pcd_range = np.array(point_cloud_range, dtype=np.float32) + + def transform(self, input_dict: dict) -> dict: + """Transform function to filter objects by the range. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after filtering, 'gt_bboxes_3d', 'gt_labels_3d' + keys are updated in the result dict. + """ + gt_bboxes_3d = input_dict['gt_bboxes_3d'] + gt_labels_3d = input_dict['gt_labels_3d'] + mask = gt_bboxes_3d.in_range_3d(self.pcd_range) + gt_bboxes_3d = gt_bboxes_3d[mask] + # mask is a torch tensor but gt_labels_3d is still numpy array + # using mask to index gt_labels_3d will cause bug when + # len(gt_labels_3d) == 1, where mask=1 will be interpreted + # as gt_labels_3d[1] and cause out of index error + gt_labels_3d = gt_labels_3d[mask.numpy().astype(bool)] + + # limit rad to [-pi, pi] + gt_bboxes_3d.limit_yaw(offset=0.5, period=2 * np.pi) + input_dict['gt_bboxes_3d'] = gt_bboxes_3d + input_dict['gt_labels_3d'] = gt_labels_3d + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f'(point_cloud_range={self.pcd_range.tolist()})' + return repr_str + + +@TRANSFORMS.register_module() +class DSVTPointsRangeFilter(BaseTransform): + """Filter points by the range. It differs from `PointRangeFilter` by using + `in_range_bev` instead of `in_range_3d`. + + Required Keys: + + - points + - pts_instance_mask (optional) + + Modified Keys: + + - points + - pts_instance_mask (optional) + + Args: + point_cloud_range (list[float]): Point cloud range. + """ + + def __init__(self, point_cloud_range: List[float]) -> None: + self.pcd_range = np.array(point_cloud_range, dtype=np.float32) + + def transform(self, input_dict: dict) -> dict: + """Transform function to filter points by the range. + + Args: + input_dict (dict): Result dict from loading pipeline. + + Returns: + dict: Results after filtering, 'points', 'pts_instance_mask' + and 'pts_semantic_mask' keys are updated in the result dict. + """ + points = input_dict['points'] + points_mask = points.in_range_bev(self.pcd_range[[0, 1, 3, 4]]) + clean_points = points[points_mask] + input_dict['points'] = clean_points + points_mask = points_mask.numpy() + + pts_instance_mask = input_dict.get('pts_instance_mask', None) + pts_semantic_mask = input_dict.get('pts_semantic_mask', None) + + if pts_instance_mask is not None: + input_dict['pts_instance_mask'] = pts_instance_mask[points_mask] + + if pts_semantic_mask is not None: + input_dict['pts_semantic_mask'] = pts_semantic_mask[points_mask] + + return input_dict + + def __repr__(self) -> str: + """str: Return a string that describes the module.""" + repr_str = self.__class__.__name__ + repr_str += f'(point_cloud_range={self.pcd_range.tolist()})' + return repr_str diff --git a/tools/train.py b/tools/train.py index b2ced54b05..6b9c3b0842 100644 --- a/tools/train.py +++ b/tools/train.py @@ -21,6 +21,12 @@ def parse_args(): action='store_true', default=False, help='enable automatic-mixed-precision training') + parser.add_argument( + '--sync_bn', + choices=['none', 'torch', 'mmcv'], + default='none', + help='convert all BatchNorm layers in the model to SyncBatchNorm ' + '(SyncBN) or mmcv.ops.sync_bn.SyncBatchNorm (MMSyncBN) layers.') parser.add_argument( '--auto-scale-lr', action='store_true', @@ -98,6 +104,10 @@ def main(): cfg.optim_wrapper.type = 'AmpOptimWrapper' cfg.optim_wrapper.loss_scale = 'dynamic' + # convert BatchNorm layers + if args.sync_bn != 'none': + cfg.sync_bn = args.sync_bn + # enable automatically scaling LR if args.auto_scale_lr: if 'auto_scale_lr' in cfg and \ From 2c9b33392718d3bca4c7c3ead4b54c4509d3b191 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Fri, 15 Sep 2023 14:46:21 +0800 Subject: [PATCH 18/25] fix dis aug and model init --- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 28 ++++---- projects/DSVT/dsvt/__init__.py | 3 +- projects/DSVT/dsvt/disable_aug_hook.py | 69 +++++++++++++++++++ projects/DSVT/dsvt/dsvt_head.py | 5 ++ 4 files changed, 91 insertions(+), 14 deletions(-) create mode 100644 projects/DSVT/dsvt/disable_aug_hook.py diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 8429a2dc6c..8220057a83 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -2,7 +2,7 @@ custom_imports = dict( imports=['projects.DSVT.dsvt'], allow_failed_imports=False) -# load_from = 'checkpoints/dsvt_init_mm3d.pth' +# load_from = 'checkpoints/dsvt_init.pth' voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] @@ -143,15 +143,8 @@ load_dim=6, use_dim=5, norm_intensity=True, + norm_elongation=True, backend_args=backend_args), - # Add this if using `MultiFrameDeformableDecoderRPN` - # dict( - # type='LoadPointsFromMultiSweeps', - # sweeps_num=9, - # load_dim=6, - # use_dim=[0, 1, 2, 3, 4], - # pad_empty_sweeps=True, - # remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='ObjectSample', db_sampler=db_sampler), dict( @@ -194,6 +187,7 @@ batch_size=1, num_workers=4, persistent_workers=True, + # sampler=dict(type='DefaultSampler', shuffle=False), sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, @@ -211,8 +205,8 @@ load_interval=5, backend_args=backend_args)) val_dataloader = dict( - batch_size=1, - num_workers=1, + batch_size=4, + num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), @@ -244,6 +238,7 @@ test_evaluator = val_evaluator vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] +# vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') lr = 1e-5 @@ -251,7 +246,7 @@ # max_norm=10 is better for SECOND optim_wrapper = dict( type='OptimWrapper', - optimizer=dict(type='Adam', lr=lr, weight_decay=0.05, betas=(0.9, 0.999)), + optimizer=dict(type='AdamW', lr=lr, weight_decay=0.05, betas=(0.9, 0.99)), clip_grad=dict(max_norm=10, norm_type=2)) # learning rate param_scheduler = [ @@ -306,4 +301,11 @@ default_hooks = dict( logger=dict(type='LoggerHook', interval=50), checkpoint=dict(type='CheckpointHook', interval=1)) -custom_hooks = [dict(type='DisableObjectSampleHook', disable_after_epoch=11)] +custom_hooks = [ + dict( + type='DisableAugHook', + disable_after_epoch=11, + disable_aug_list=[ + 'GlobalRotScaleTrans', 'RandomFlip3D', 'ObjectSample' + ]) +] diff --git a/projects/DSVT/dsvt/__init__.py b/projects/DSVT/dsvt/__init__.py index ed602d6a8a..58395863a4 100644 --- a/projects/DSVT/dsvt/__init__.py +++ b/projects/DSVT/dsvt/__init__.py @@ -1,3 +1,4 @@ +from .disable_aug_hook import DisableAugHook from .dsvt import DSVT from .dsvt_head import DSVTCenterHead from .dsvt_transformer import DSVTMiddleEncoder @@ -10,5 +11,5 @@ __all__ = [ 'DSVTCenterHead', 'DSVT', 'DSVTMiddleEncoder', 'DynamicPillarVFE3D', 'PointPillarsScatter3D', 'ResSECOND', 'DSVTBBoxCoder', - 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter' + 'DSVTObjectRangeFilter', 'DSVTPointsRangeFilter', 'DisableAugHook' ] diff --git a/projects/DSVT/dsvt/disable_aug_hook.py b/projects/DSVT/dsvt/disable_aug_hook.py new file mode 100644 index 0000000000..5a4dff5fb2 --- /dev/null +++ b/projects/DSVT/dsvt/disable_aug_hook.py @@ -0,0 +1,69 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import List + +from mmengine.dataset import BaseDataset +from mmengine.hooks import Hook +from mmengine.model import is_model_wrapper +from mmengine.runner import Runner + +from mmdet3d.registry import HOOKS + + +@HOOKS.register_module() +class DisableAugHook(Hook): + """The hook of disabling augmentations during training. + + Args: + disable_after_epoch (int): The number of epochs after which + the data augmentation will be closed in the training. + Defaults to 15. + disable_aug_list (list): the list of data augmentation will + be closed in the training. Defaults to []. + """ + + def __init__(self, + disable_after_epoch: int = 15, + disable_aug_list: List = []): + self.disable_after_epoch = disable_after_epoch + self.disable_aug_list = disable_aug_list + self._restart_dataloader = False + + def before_train_epoch(self, runner: Runner): + """Close augmentation. + + Args: + runner (Runner): The runner. + """ + epoch = runner.epoch + train_loader = runner.train_dataloader + model = runner.model + # TODO: refactor after mmengine using model wrapper + if is_model_wrapper(model): + model = model.module + if epoch == self.disable_after_epoch: + + dataset = runner.train_dataloader.dataset + # handle dataset wrapper + if not isinstance(dataset, BaseDataset): + dataset = dataset.dataset + new_transforms = [] + for transform in dataset.pipeline.transforms: # noqa: E501 + if transform.__class__.__name__ not in self.disable_aug_list: + new_transforms.append(transform) + else: + runner.logger.info( + f'Disable {transform.__class__.__name__}') + dataset.pipeline.transforms = new_transforms + # The dataset pipeline cannot be updated when persistent_workers + # is True, so we need to force the dataloader's multi-process + # restart. This is a very hacky approach. + if hasattr(train_loader, 'persistent_workers' + ) and train_loader.persistent_workers is True: + train_loader._DataLoader__initialized = False + train_loader._iterator = None + self._restart_dataloader = True + else: + # Once the restart is complete, we need to restore + # the initialization flag. + if self._restart_dataloader: + train_loader._DataLoader__initialized = True diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index e6ff075305..74bae83f2e 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -32,6 +32,11 @@ def __init__(self, self.loss_iou_reg = MODELS.build( loss_reg_iou) if loss_reg_iou is not None else None + def init_weights(self): + super().init_weights() + for task_head in self.task_heads: + task_head.init_weights() + def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. From 98247780baa74761c1b47dabe4cf79c57fe384c4 Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 13:28:08 +0800 Subject: [PATCH 19/25] train align --- mmdet3d/datasets/waymo_dataset.py | 2 +- mmdet3d/evaluation/metrics/waymo_metric.py | 4 ++-- .../models/dense_heads/centerpoint_head.py | 2 +- mmdet3d/models/necks/second_fpn.py | 18 +++++++++------ ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 6 +++-- projects/DSVT/dsvt/dsvt_head.py | 23 ++++++++++++++++--- projects/DSVT/dsvt/res_second.py | 13 +---------- projects/DSVT/dsvt/utils.py | 4 ++-- 8 files changed, 42 insertions(+), 30 deletions(-) diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index 58093a355d..f12462eaa8 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -210,7 +210,7 @@ def load_data_list(self) -> List[dict]: # load and parse data_infos. data_list = [] - for raw_data_info in mmengine.track_iter_progress(raw_data_list): + for raw_data_info in raw_data_list: # parse raw data information to target format data_info = self.parse_data_info(raw_data_info) if isinstance(data_info, dict): diff --git a/mmdet3d/evaluation/metrics/waymo_metric.py b/mmdet3d/evaluation/metrics/waymo_metric.py index fb8ac495e6..dc2f209b9a 100644 --- a/mmdet3d/evaluation/metrics/waymo_metric.py +++ b/mmdet3d/evaluation/metrics/waymo_metric.py @@ -36,7 +36,7 @@ class WaymoMetric(KittiMetric): prefix (str, optional): The prefix that will be added in the metric names to disambiguate homonymous metrics of different evaluators. If prefix is not provided in the argument, self.default_prefix will - be used instead. Defaults to None. + be used instead. Defaults to Waymo metric. format_only (bool): Format the output results without perform evaluation. It is useful when you want to format the result to a specific format and submit it to the test server. @@ -83,7 +83,7 @@ def __init__(self, metric: Union[str, List[str]] = 'mAP', pcd_limit_range: List[float] = [-85, -85, -5, 85, 85, 5], convert_kitti_format: bool = True, - prefix: Optional[str] = None, + prefix: Optional[str] = 'Waymo metric', format_only: bool = False, pklfile_prefix: Optional[str] = None, submission_prefix: Optional[str] = None, diff --git a/mmdet3d/models/dense_heads/centerpoint_head.py b/mmdet3d/models/dense_heads/centerpoint_head.py index 301db0eee0..c3fc187964 100644 --- a/mmdet3d/models/dense_heads/centerpoint_head.py +++ b/mmdet3d/models/dense_heads/centerpoint_head.py @@ -217,7 +217,7 @@ def forward(self, x): Returns: dict[str: torch.Tensor]: contains the following keys: - -reg (torch.Tensor): 2D regression value with the + -reg (torch.Tensor): 2D regression value with the shape of [B, 2, H, W]. -height (torch.Tensor): Height value with the shape of [B, 1, H, W]. diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index 90e57ec05c..e52a26ec4c 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -21,6 +21,10 @@ class SECONDFPN(BaseModule): upsample_cfg (dict): Config dict of upsample layers. conv_cfg (dict): Config dict of conv layers. use_conv_for_no_stride (bool): Whether to use conv when stride is 1. + init_cfg (dict or :obj:`ConfigDict` or list[dict or :obj:`ConfigDict`], + optional): Initialization config dict. Defaults to + [dict(type='Kaiming', layer='ConvTranspose2d'), + dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)]. """ def __init__(self, @@ -31,7 +35,13 @@ def __init__(self, upsample_cfg=dict(type='deconv', bias=False), conv_cfg=dict(type='Conv2d', bias=False), use_conv_for_no_stride=False, - init_cfg=None): + init_cfg=[ + dict(type='Kaiming', layer='ConvTranspose2d'), + dict( + type='Constant', + layer='NaiveSyncBatchNorm2d', + val=1.0) + ]): # if for GroupNorm, # cfg is dict(type='GN', num_groups=num_groups, eps=1e-3, affine=True) super(SECONDFPN, self).__init__(init_cfg=init_cfg) @@ -64,12 +74,6 @@ def __init__(self, deblocks.append(deblock) self.deblocks = nn.ModuleList(deblocks) - if init_cfg is None: - self.init_cfg = [ - dict(type='Kaiming', layer='ConvTranspose2d'), - dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0) - ] - def forward(self, x): """Forward function. diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 8220057a83..9d0be465e8 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -133,6 +133,8 @@ coord_type='LIDAR', load_dim=6, use_dim=[0, 1, 2, 3, 4], + norm_intensity=True, + norm_elongation=True, backend_args=backend_args), backend_args=backend_args) @@ -192,7 +194,7 @@ dataset=dict( type=dataset_type, data_root=data_root, - ann_file='waymo_infos_train.pkl', + ann_file='waymo_wo_cam_ins_infos_train.pkl', data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), pipeline=train_pipeline, modality=input_modality, @@ -214,7 +216,7 @@ type=dataset_type, data_root=data_root, data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), - ann_file='waymo_infos_val.pkl', + ann_file='waymo_wo_cam_ins_infos_val.pkl', pipeline=test_pipeline, modality=input_modality, test_mode=True, diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 74bae83f2e..808d3c3310 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -12,6 +12,10 @@ gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr +from mmengine.model import kaiming_init +import math +from torch.nn.init import constant_ +import torch.nn as nn @MODELS.register_module() @@ -33,9 +37,22 @@ def __init__(self, loss_reg_iou) if loss_reg_iou is not None else None def init_weights(self): - super().init_weights() - for task_head in self.task_heads: - task_head.init_weights() + kaiming_init( + self.shared_conv.conv, + a=math.sqrt(5), + mode='fan_in', + nonlinearity='leaky_relu', + distribution='uniform') + for head in self.task_heads[0].heads: + if head == 'heatmap': + constant_(self.task_heads[0].__getattr__(head)[-1].bias, + self.task_heads[0].init_bias) + else: + for m in self.task_heads[0].__getattr__(head).modules(): + if isinstance(m, nn.Conv2d): + kaiming_init( + m, mode='fan_in', nonlinearity='leaky_relu') + def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. diff --git a/projects/DSVT/dsvt/res_second.py b/projects/DSVT/dsvt/res_second.py index e1ddc1be6c..0755f895f9 100644 --- a/projects/DSVT/dsvt/res_second.py +++ b/projects/DSVT/dsvt/res_second.py @@ -78,8 +78,6 @@ class ResSECOND(BaseModule): out_channels (list[int]): Output channels for multi-scale feature maps. blocks_nums (list[int]): Number of blocks in each stage. layer_strides (list[int]): Strides of each stage. - norm_cfg (dict): Config dict of normalization layers. - conv_cfg (dict): Config dict of convolutional layers. """ def __init__(self, @@ -87,8 +85,7 @@ def __init__(self, out_channels: Sequence[int] = [128, 128, 256], blocks_nums: Sequence[int] = [1, 2, 2], layer_strides: Sequence[int] = [2, 2, 2], - init_cfg: OptMultiConfig = None, - pretrained: Optional[str] = None) -> None: + init_cfg: OptMultiConfig = None) -> None: super(ResSECOND, self).__init__(init_cfg=init_cfg) assert len(layer_strides) == len(blocks_nums) assert len(out_channels) == len(blocks_nums) @@ -108,14 +105,6 @@ def __init__(self, BasicResBlock(out_channels[i], out_channels[i])) blocks.append(nn.Sequential(*cur_layers)) self.blocks = nn.Sequential(*blocks) - assert not (init_cfg and pretrained), \ - 'init_cfg and pretrained cannot be setting at the same time' - if isinstance(pretrained, str): - warnings.warn('DeprecationWarning: pretrained is a deprecated, ' - 'please use "init_cfg" instead') - self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) - else: - self.init_cfg = dict(type='Kaiming', layer='Conv2d') def forward(self, x: Tensor) -> Tuple[Tensor, ...]: """Forward function. diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 4d697d2dbb..7845b27f3f 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -9,7 +9,7 @@ from mmdet3d.models.task_modules import CenterPointBBoxCoder from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds - +from mmengine import print_log get_inner_win_inds_cuda = ingroup_inds @@ -267,7 +267,7 @@ def decode(self, thresh_mask = final_scores > self.score_threshold if self.post_center_range is not None: - self.post_center_range = torch.tensor( + self.post_center_range = torch.as_tensor( self.post_center_range, device=heat.device) mask = (final_box_preds[..., :3] >= self.post_center_range[:3]).all(2) From 681423f9e48186a674c4bee7ef640824ec83774d Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 13:32:29 +0800 Subject: [PATCH 20/25] fix lint --- mmdet3d/datasets/waymo_dataset.py | 1 - mmdet3d/models/necks/second_fpn.py | 2 +- projects/DSVT/dsvt/dsvt_head.py | 9 ++++----- projects/DSVT/dsvt/res_second.py | 4 +--- projects/DSVT/dsvt/utils.py | 2 +- 5 files changed, 7 insertions(+), 11 deletions(-) diff --git a/mmdet3d/datasets/waymo_dataset.py b/mmdet3d/datasets/waymo_dataset.py index f12462eaa8..0d131164e9 100644 --- a/mmdet3d/datasets/waymo_dataset.py +++ b/mmdet3d/datasets/waymo_dataset.py @@ -2,7 +2,6 @@ import os.path as osp from typing import Callable, List, Union -import mmengine import numpy as np from mmengine import print_log from mmengine.fileio import load diff --git a/mmdet3d/models/necks/second_fpn.py b/mmdet3d/models/necks/second_fpn.py index e52a26ec4c..d4dc590c15 100644 --- a/mmdet3d/models/necks/second_fpn.py +++ b/mmdet3d/models/necks/second_fpn.py @@ -22,7 +22,7 @@ class SECONDFPN(BaseModule): conv_cfg (dict): Config dict of conv layers. use_conv_for_no_stride (bool): Whether to use conv when stride is 1. init_cfg (dict or :obj:`ConfigDict` or list[dict or :obj:`ConfigDict`], - optional): Initialization config dict. Defaults to + optional): Initialization config dict. Defaults to [dict(type='Kaiming', layer='ConvTranspose2d'), dict(type='Constant', layer='NaiveSyncBatchNorm2d', val=1.0)]. """ diff --git a/projects/DSVT/dsvt/dsvt_head.py b/projects/DSVT/dsvt/dsvt_head.py index 808d3c3310..650a585dfb 100644 --- a/projects/DSVT/dsvt/dsvt_head.py +++ b/projects/DSVT/dsvt/dsvt_head.py @@ -1,10 +1,14 @@ +import math from typing import Dict, List, Tuple import torch +import torch.nn as nn from mmcv.ops import boxes_iou3d from mmdet.models.utils import multi_apply +from mmengine.model import kaiming_init from mmengine.structures import InstanceData from torch import Tensor +from torch.nn.init import constant_ from mmdet3d.models import CenterHead from mmdet3d.models.layers import circle_nms, nms_bev @@ -12,10 +16,6 @@ gaussian_radius) from mmdet3d.registry import MODELS from mmdet3d.structures import Det3DDataSample, xywhr2xyxyr -from mmengine.model import kaiming_init -import math -from torch.nn.init import constant_ -import torch.nn as nn @MODELS.register_module() @@ -53,7 +53,6 @@ def init_weights(self): kaiming_init( m, mode='fan_in', nonlinearity='leaky_relu') - def forward_single(self, x: Tensor) -> dict: """Forward function for CenterPoint. diff --git a/projects/DSVT/dsvt/res_second.py b/projects/DSVT/dsvt/res_second.py index 0755f895f9..072c586181 100644 --- a/projects/DSVT/dsvt/res_second.py +++ b/projects/DSVT/dsvt/res_second.py @@ -1,7 +1,5 @@ # modified from https://github.com/Haiyang-W/DSVT - -import warnings -from typing import Optional, Sequence, Tuple +from typing import Sequence, Tuple from mmengine.model import BaseModule from torch import Tensor diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 7845b27f3f..978f9154da 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -9,7 +9,7 @@ from mmdet3d.models.task_modules import CenterPointBBoxCoder from mmdet3d.registry import MODELS, TASK_UTILS from .ops.ingroup_inds.ingroup_inds_op import ingroup_inds -from mmengine import print_log + get_inner_win_inds_cuda = ingroup_inds From fd0825ab094e06e7c13769af8587430c6edb5354 Mon Sep 17 00:00:00 2001 From: sunjihao1999 <578431509@qq.com> Date: Mon, 18 Sep 2023 14:48:10 +0800 Subject: [PATCH 21/25] fix basepoints in_range_3d --- mmdet3d/structures/points/base_points.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mmdet3d/structures/points/base_points.py b/mmdet3d/structures/points/base_points.py index 188d20d270..4cb54ce895 100644 --- a/mmdet3d/structures/points/base_points.py +++ b/mmdet3d/structures/points/base_points.py @@ -247,10 +247,10 @@ def in_range_3d( """ in_range_flags = ((self.tensor[:, 0] > point_range[0]) & (self.tensor[:, 1] > point_range[1]) - # & (self.tensor[:, 2] > point_range[2]) + & (self.tensor[:, 2] > point_range[2]) & (self.tensor[:, 0] < point_range[3]) - & (self.tensor[:, 1] < point_range[4])) - # & (self.tensor[:, 2] < point_range[5])) + & (self.tensor[:, 1] < point_range[4]) + & (self.tensor[:, 2] < point_range[5])) return in_range_flags @property From 49a564ae619acd4f708e240f4bd68850c8624d33 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Wed, 27 Dec 2023 20:21:59 +0800 Subject: [PATCH 22/25] reset unused --- mmdet3d/datasets/convert_utils.py | 16 ++++++------- projects/DSVT/README.md | 14 +++++++---- ...ecfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 24 ++++++------------- tools/dataset_converters/kitti_converter.py | 7 ------ tools/dataset_converters/kitti_data_utils.py | 7 +----- 5 files changed, 25 insertions(+), 43 deletions(-) diff --git a/mmdet3d/datasets/convert_utils.py b/mmdet3d/datasets/convert_utils.py index 66dba29117..cb4d97e137 100644 --- a/mmdet3d/datasets/convert_utils.py +++ b/mmdet3d/datasets/convert_utils.py @@ -10,7 +10,7 @@ from shapely.geometry import MultiPoint, box from shapely.geometry.polygon import Polygon -from mmdet3d.structures import Box3DMode, LiDARInstance3DBoxes, points_cam2img +from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img from mmdet3d.structures.ops import box_np_ops kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck', @@ -318,11 +318,8 @@ def get_kitti_style_2d_boxes(info: dict, def convert_annos(info: dict, cam_idx: int) -> dict: """Convert front-cam anns to i-th camera (KITTI-style info).""" rect = info['calib']['R0_rect'].astype(np.float32) - if cam_idx == 0: - lidar2cami = info['calib']['Tr_velo_to_cam'].astype(np.float32) - else: - lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype( - np.float32) + lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32) + lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32) annos = info['annos'] converted_annos = copy.deepcopy(annos) loc = annos['location'] @@ -330,11 +327,12 @@ def convert_annos(info: dict, cam_idx: int) -> dict: rots = annos['rotation_y'] gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) - # BC-breaking: gt_bboxes_3d is already in lidar coordinates + # convert gt_bboxes_3d to velodyne coordinates + gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to( + Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True) # convert gt_bboxes_3d to cam coordinates - gt_bboxes_3d = LiDARInstance3DBoxes(gt_bboxes_3d).convert_to( + gt_bboxes_3d = gt_bboxes_3d.convert_to( Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy() - converted_annos['location'] = gt_bboxes_3d[:, :3] converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6] converted_annos['rotation_y'] = gt_bboxes_3d[:, 6] diff --git a/projects/DSVT/README.md b/projects/DSVT/README.md index a4b45b570d..c1d8cf2744 100644 --- a/projects/DSVT/README.md +++ b/projects/DSVT/README.md @@ -57,18 +57,24 @@ python tools/test.py projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1- ### Training commands -The support of training DSVT is on the way. +In MMDetection3D's root directory, run the following command to test the model: + +```bash +tools/dist_train.sh projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py 8 --sync_bn torch +``` ## Results and models ### Waymo -| Middle Encoder | Backbone | Load Interval | Voxel type (voxel size) | Multi-Class NMS | Multi-frames | Mem (GB) | Inf time (fps) | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download | -| :------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------: | :-----------: | :---------------------: | :-------------: | :----------: | :------: | :------------: | :----: | :-----: | :----: | :---------: | :------: | -| [DSVT](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | [ResSECOND](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | 5 | voxel (0.32) | ✓ | × | | | 75.2 | 72.2 | 68.9 | 66.1 | | +| Middle Encoder | Backbone | Load Interval | Voxel type (voxel size) | Multi-Class NMS | Multi-frames | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download | +| :------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------: | :-----------: | :---------------------: | :-------------: | :----------: | :----: | :-----: | :----: | :---------: | :-------: | +| [DSVT](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | [ResSECOND](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | 5 | voxel (0.32) | ✓ | × | 75.5 | 72.4 | 69.2 | 66.3 | [log](<>) | **Note** that `ResSECOND` denotes the base block in SECOND has residual layers. +**Note** Regrettably, we are unable to provide the pre-trained model weights due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/). However, we can provide the training logs. + ## Citation ```latex diff --git a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py index 9d0be465e8..bf56259183 100644 --- a/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py +++ b/projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py @@ -6,7 +6,6 @@ voxel_size = [0.32, 0.32, 6] grid_size = [468, 468, 1] point_cloud_range = [-74.88, -74.88, -2, 74.88, 74.88, 4.0] -# data_root = 'data/waymo_mini/kitti_format/' data_root = 'data/waymo/kitti_format/' class_names = ['Car', 'Pedestrian', 'Cyclist'] metainfo = dict(classes=class_names) @@ -194,7 +193,7 @@ dataset=dict( type=dataset_type, data_root=data_root, - ann_file='waymo_wo_cam_ins_infos_train.pkl', + ann_file='waymo_infos_train.pkl', data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), pipeline=train_pipeline, modality=input_modality, @@ -216,7 +215,7 @@ type=dataset_type, data_root=data_root, data_prefix=dict(pts='training/velodyne', sweeps='training/velodyne'), - ann_file='waymo_wo_cam_ins_infos_val.pkl', + ann_file='waymo_infos_val.pkl', pipeline=test_pipeline, modality=input_modality, test_mode=True, @@ -225,32 +224,23 @@ backend_args=backend_args)) test_dataloader = val_dataloader -# val_evaluator = dict( -# type='WaymoMetric', -# ann_file='./data/waymo_mini/kitti_format/waymo_infos_val.pkl', -# waymo_bin_file='./data/waymo_mini/waymo_format/gt_mini.bin', -# backend_args=backend_args, -# convert_kitti_format=False) val_evaluator = dict( type='WaymoMetric', - ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl', waymo_bin_file='./data/waymo/waymo_format/gt.bin', - backend_args=backend_args, - convert_kitti_format=False) + result_prefix='./dsvt_pred') test_evaluator = val_evaluator -vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] -# vis_backends = [dict(type='LocalVisBackend')] +# vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')] +vis_backends = [dict(type='LocalVisBackend')] visualizer = dict( type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer') + +# schedules lr = 1e-5 -# This schedule is mainly used by models on nuScenes dataset -# max_norm=10 is better for SECOND optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=lr, weight_decay=0.05, betas=(0.9, 0.99)), clip_grad=dict(max_norm=10, norm_type=2)) -# learning rate param_scheduler = [ dict( type='CosineAnnealingLR', diff --git a/tools/dataset_converters/kitti_converter.py b/tools/dataset_converters/kitti_converter.py index e904918f60..367cfd7ba9 100644 --- a/tools/dataset_converters/kitti_converter.py +++ b/tools/dataset_converters/kitti_converter.py @@ -5,7 +5,6 @@ import mmcv import mmengine import numpy as np -from mmengine import logging, print_log from nuscenes.utils.geometry_utils import view_points from mmdet3d.structures import points_cam2img @@ -250,12 +249,6 @@ def create_waymo_info_file(data_path, max_sweeps (int, optional): Max sweeps before the detection frame to be used. Default: 5. """ - print_log( - 'Deprecation Warning: related functions has been migrated to ' - '`Waymo2KITTI.create_waymo_info_file`. It will be removed in ' - 'the future!', - logger='current', - level=logging.WARNING) imageset_folder = Path(data_path) / 'ImageSets' train_img_ids = _read_imageset_file(str(imageset_folder / 'train.txt')) val_img_ids = _read_imageset_file(str(imageset_folder / 'val.txt')) diff --git a/tools/dataset_converters/kitti_data_utils.py b/tools/dataset_converters/kitti_data_utils.py index b0e90bd8d3..64c3bc415b 100644 --- a/tools/dataset_converters/kitti_data_utils.py +++ b/tools/dataset_converters/kitti_data_utils.py @@ -148,14 +148,10 @@ def get_label_anno(label_path): for x in content]).reshape(-1, 3) annotations['rotation_y'] = np.array([float(x[14]) for x in content]).reshape(-1) - if len(content) != 0 and len(content[0]) >= 16: # have score + if len(content) != 0 and len(content[0]) == 16: # have score annotations['score'] = np.array([float(x[15]) for x in content]) else: annotations['score'] = np.zeros((annotations['bbox'].shape[0], )) - # have num_lidar_points_in_box, given in waymo - if len(content) != 0 and len(content[0]) == 17: - annotations['num_points_in_gt'] = np.array( - [int(x[16]) for x in content]) index = list(range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) annotations['group_ids'] = np.arange(num_gt, dtype=np.int32) @@ -556,7 +552,6 @@ def gather(self, image_ids): image_infos = mmengine.track_parallel_progress(self.gather_single, image_ids, self.num_worker) - # image_infos = mmengine.track_progress(self.gather_single, image_ids) return list(image_infos) From 6d56789d7a22a0e63eebdb9b3c7add3da0b56096 Mon Sep 17 00:00:00 2001 From: sunjiahao1999 <578431509@qq.com> Date: Thu, 28 Dec 2023 16:22:02 +0800 Subject: [PATCH 23/25] resolve review --- projects/DSVT/README.md | 6 +++--- ...second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 8 +++----- projects/DSVT/dsvt/__init__.py | 4 ++-- projects/DSVT/dsvt/dsvt_head.py | 7 ++++--- projects/DSVT/dsvt/dynamic_pillar_vfe.py | 1 - projects/DSVT/dsvt/res_second.py | 2 ++ projects/DSVT/dsvt/transforms_3d.py | 4 ++-- projects/DSVT/dsvt/utils.py | 11 ++++++----- 8 files changed, 22 insertions(+), 21 deletions(-) diff --git a/projects/DSVT/README.md b/projects/DSVT/README.md index c1d8cf2744..bb1b008236 100644 --- a/projects/DSVT/README.md +++ b/projects/DSVT/README.md @@ -67,9 +67,9 @@ tools/dist_train.sh projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-c ### Waymo -| Middle Encoder | Backbone | Load Interval | Voxel type (voxel size) | Multi-Class NMS | Multi-frames | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download | -| :------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------: | :-----------: | :---------------------: | :-------------: | :----------: | :----: | :-----: | :----: | :---------: | :-------: | -| [DSVT](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | [ResSECOND](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | 5 | voxel (0.32) | ✓ | × | 75.5 | 72.4 | 69.2 | 66.3 | [log](<>) | +| Middle Encoder | Backbone | Load Interval | Voxel type (voxel size) | Multi-Class NMS | Multi-frames | mAP@L1 | mAPH@L1 | mAP@L2 | **mAPH@L2** | Download | +| :------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------: | :-----------: | :---------------------: | :-------------: | :----------: | :----: | :-----: | :----: | :---------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------: | +| [DSVT](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | [ResSECOND](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | 5 | voxel (0.32) | ✓ | × | 75.5 | 72.4 | 69.2 | 66.3 | \[log\](\ str: @TRANSFORMS.register_module() -class DSVTPointsRangeFilter(BaseTransform): +class PointsRangeFilter3D(BaseTransform): """Filter points by the range. It differs from `PointRangeFilter` by using `in_range_bev` instead of `in_range_3d`. diff --git a/projects/DSVT/dsvt/utils.py b/projects/DSVT/dsvt/utils.py index 978f9154da..d8b5d7dfa1 100644 --- a/projects/DSVT/dsvt/utils.py +++ b/projects/DSVT/dsvt/utils.py @@ -313,13 +313,13 @@ def center_to_corner2d(center, dim): @weighted_loss def diou3d_loss(pred_boxes, gt_boxes, eps: float = 1e-7): """ - https://github.com/agent-sgs/PillarNet/blob/master/det3d/core/utils/center_utils.py # noqa + modified from https://github.com/agent-sgs/PillarNet/blob/master/det3d/core/utils/center_utils.py # noqa Args: pred_boxes (N, 7): gt_boxes (N, 7): Returns: - _type_: _description_ + Tensor: Distance-IoU Loss. """ assert pred_boxes.shape[0] == gt_boxes.shape[0] @@ -371,14 +371,15 @@ def diou3d_loss(pred_boxes, gt_boxes, eps: float = 1e-7): @MODELS.register_module() class DIoU3DLoss(nn.Module): r"""3D bboxes Implementation of `Distance-IoU Loss: Faster and Better - Learning for Bounding Box Regression https://arxiv.org/abs/1911.08287`_. + Learning for Bounding Box Regression `. Code is modified from https://github.com/Zzh-tju/DIoU. Args: - eps (float): Epsilon to avoid log(0). + eps (float): Epsilon to avoid log(0). Defaults to 1e-6. reduction (str): Options are "none", "mean" and "sum". - loss_weight (float): Weight of loss. + Defaults to "mean". + loss_weight (float): Weight of loss. Defaults to 1.0. """ def __init__(self, From fc93172d4dd3dd0ee216d18d8293b03c8f8433b3 Mon Sep 17 00:00:00 2001 From: sjh Date: Thu, 28 Dec 2023 20:59:58 +0800 Subject: [PATCH 24/25] resolve review 2 --- projects/DSVT/README.md | 6 ++++-- ...2_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py | 2 -- projects/DSVT/dsvt/utils.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/projects/DSVT/README.md b/projects/DSVT/README.md index bb1b008236..d042c7c374 100644 --- a/projects/DSVT/README.md +++ b/projects/DSVT/README.md @@ -71,9 +71,11 @@ tools/dist_train.sh projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-c | :------------------------------------------------------------------------------------: | :-----------------------------------------------------------------------------------------: | :-----------: | :---------------------: | :-------------: | :----------: | :----: | :-----: | :----: | :---------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------: | | [DSVT](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | [ResSECOND](./configs/dsvt_voxel032_res-second_secfpn_8xb1-cyclic-12e_waymoD5-3d-3class.py) | 5 | voxel (0.32) | ✓ | × | 75.5 | 72.4 | 69.2 | 66.3 | \[log\](\`. + Learning for Bounding Box Regression `_. Code is modified from https://github.com/Zzh-tju/DIoU. From 7bbaeb7b3e0da79f14ffbea0a92d2b9e3c234860 Mon Sep 17 00:00:00 2001 From: sjh Date: Thu, 28 Dec 2023 21:09:16 +0800 Subject: [PATCH 25/25] fix doc --- projects/DSVT/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/DSVT/README.md b/projects/DSVT/README.md index d042c7c374..d60e49abe5 100644 --- a/projects/DSVT/README.md +++ b/projects/DSVT/README.md @@ -73,9 +73,9 @@ tools/dist_train.sh projects/DSVT/configs/dsvt_voxel032_res-second_secfpn_8xb1-c **Note**: -\- that `ResSECOND` denotes the base block in SECOND has residual layers. +- `ResSECOND` denotes the base block in SECOND has residual layers. -\- Regrettably, we are unable to provide the pre-trained model weights due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), so we only provide the training logs as shown above. +- Regrettably, we are unable to provide the pre-trained model weights due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), so we only provide the training logs as shown above. ## Citation