From 52a2503f341b93ef0a87887d38b2986b3549381b Mon Sep 17 00:00:00 2001 From: VVsssssk Date: Tue, 7 Jun 2022 13:40:11 +0800 Subject: [PATCH 1/3] fix parta2 bug --- ...v_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py | 3 +++ mmdet3d/models/detectors/two_stage.py | 3 ++- mmdet3d/models/middle_encoders/sparse_unet.py | 9 +++++---- .../models/roi_heads/bbox_heads/parta2_bbox_head.py | 12 ++++++++++-- 4 files changed, 20 insertions(+), 7 deletions(-) diff --git a/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py b/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py index 4bb3b2c94..116623189 100644 --- a/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py +++ b/configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-3class.py @@ -90,6 +90,7 @@ pipeline=train_pipeline, modality=input_modality, classes=class_names, + box_type_3d='LiDAR', test_mode=False)), val=dict( type=dataset_type, @@ -100,6 +101,7 @@ pipeline=test_pipeline, modality=input_modality, classes=class_names, + box_type_3d='LiDAR', test_mode=True), test=dict( type=dataset_type, @@ -110,6 +112,7 @@ pipeline=test_pipeline, modality=input_modality, classes=class_names, + box_type_3d='LiDAR', test_mode=True)) # Part-A2 uses a different learning rate from what SECOND uses. diff --git a/mmdet3d/models/detectors/two_stage.py b/mmdet3d/models/detectors/two_stage.py index 06a036b0c..707f706d5 100644 --- a/mmdet3d/models/detectors/two_stage.py +++ b/mmdet3d/models/detectors/two_stage.py @@ -30,7 +30,8 @@ def __init__(self, 'please use "init_cfg" instead') backbone.pretrained = pretrained self.backbone = build_backbone(backbone) - + self.train_cfg = train_cfg + self.test_cfg = test_cfg if neck is not None: self.neck = build_neck(neck) diff --git a/mmdet3d/models/middle_encoders/sparse_unet.py b/mmdet3d/models/middle_encoders/sparse_unet.py index c8af2ed0b..005e34ebe 100644 --- a/mmdet3d/models/middle_encoders/sparse_unet.py +++ b/mmdet3d/models/middle_encoders/sparse_unet.py @@ -11,6 +11,7 @@ from mmcv.runner import BaseModule, auto_fp16 from mmdet3d.ops import SparseBasicBlock, make_sparse_convmodule +from mmdet3d.ops.sparse_block import replace_feature from ..builder import MIDDLE_ENCODERS @@ -168,10 +169,11 @@ def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer, :obj:`SparseConvTensor`: Upsampled feature. """ x = lateral_layer(x_lateral) - x.features = torch.cat((x_bottom.features, x.features), dim=1) + x = replace_feature(x, torch.cat((x_bottom.features, x.features), + dim=1)) x_merge = merge_layer(x) x = self.reduce_channel(x, x_merge.features.shape[1]) - x.features = x_merge.features + x.features + x = replace_feature(x, x_merge.features + x.features) x = upsample_layer(x) return x @@ -191,8 +193,7 @@ def reduce_channel(x, out_channels): n, in_channels = features.shape assert (in_channels % out_channels == 0) and (in_channels >= out_channels) - - x.features = features.view(n, out_channels, -1).sum(dim=2) + x = replace_feature(x, features.view(n, out_channels, -1).sum(dim=2)) return x def make_encoder_layers(self, make_block, norm_cfg, in_channels): diff --git a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py index c569c4e35..6f5ea722b 100644 --- a/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py +++ b/mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py @@ -2,7 +2,15 @@ import numpy as np import torch from mmcv.cnn import ConvModule, normal_init -from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential + +from mmdet3d.ops.spconv import IS_SPCONV2_AVAILABLE + +if IS_SPCONV2_AVAILABLE: + from spconv.pytorch import (SparseConvTensor, SparseMaxPool3d, + SparseSequential) +else: + from mmcv.ops import SparseConvTensor, SparseMaxPool3d, SparseSequential + from mmcv.runner import BaseModule from torch import nn as nn @@ -252,7 +260,7 @@ def forward(self, seg_feats, part_feats): sparse_idx[:, 2], sparse_idx[:, 3]] seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1], sparse_idx[:, 2], sparse_idx[:, 3]] - coords = sparse_idx.int() + coords = sparse_idx.int().contiguous() part_features = SparseConvTensor(part_features, coords, sparse_shape, rcnn_batch_size) seg_features = SparseConvTensor(seg_features, coords, sparse_shape, From 7e0f1ceabf28ac0bfbc8371489bc4f122005b865 Mon Sep 17 00:00:00 2001 From: VVsssssk Date: Mon, 26 Sep 2022 19:21:20 +0800 Subject: [PATCH 2/3] fix --- .../point_rcnn_2x8_kitti-3d-3classes.py | 49 ++++++++++++++++--- mmdet3d/models/dense_heads/point_rpn_head.py | 8 ++- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py b/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py index 1344aca5c..3725e4b7c 100644 --- a/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py +++ b/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py @@ -9,7 +9,14 @@ class_names = ['Car', 'Pedestrian', 'Cyclist'] point_cloud_range = [0, -40, -3, 70.4, 40, 1] input_modality = dict(use_lidar=True, use_camera=False) - +file_client_args = dict( + backend='petrel', + path_mapping=dict({ + './data/kitti/': + 's3://openmmlab/datasets/detection3d/kitti/', + 'data/kitti/': + 's3://openmmlab/datasets/detection3d/kitti/' + })) db_sampler = dict( data_root=data_root, info_path=data_root + 'kitti_dbinfos_train.pkl', @@ -18,11 +25,25 @@ filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15), + points_loader=dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=4, + file_client_args=file_client_args), classes=class_names) train_pipeline = [ - dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), - dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=4, + use_dim=4, + file_client_args=file_client_args), + dict( + type='LoadAnnotations3D', + with_bbox_3d=True, + with_label_3d=True, + file_client_args=file_client_args), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectSample', db_sampler=db_sampler), @@ -44,7 +65,12 @@ dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] test_pipeline = [ - dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), + dict( + type='LoadPointsFromFile', + coord_type='LIDAR', + load_dim=4, + use_dim=4, + file_client_args=file_client_args), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), @@ -74,9 +100,18 @@ train=dict( type='RepeatDataset', times=2, - dataset=dict(pipeline=train_pipeline, classes=class_names)), - val=dict(pipeline=test_pipeline, classes=class_names), - test=dict(pipeline=test_pipeline, classes=class_names)) + dataset=dict( + pipeline=train_pipeline, + classes=class_names, + file_client_args=file_client_args)), + val=dict( + pipeline=test_pipeline, + classes=class_names, + file_client_args=file_client_args), + test=dict( + pipeline=test_pipeline, + classes=class_names, + file_client_args=file_client_args)) # optimizer lr = 0.001 # max learning rate diff --git a/mmdet3d/models/dense_heads/point_rpn_head.py b/mmdet3d/models/dense_heads/point_rpn_head.py index 546cf1665..8704acb92 100644 --- a/mmdet3d/models/dense_heads/point_rpn_head.py +++ b/mmdet3d/models/dense_heads/point_rpn_head.py @@ -271,9 +271,13 @@ def get_bboxes(self, for b in range(batch_size): bbox3d = self.bbox_coder.decode(bbox_preds[b], points[b, ..., :3], object_class[b]) + mask = ~bbox3d.sum(dim=1).isinf() bbox_selected, score_selected, labels, cls_preds_selected = \ - self.class_agnostic_nms(obj_scores[b], sem_scores[b], bbox3d, - points[b, ..., :3], input_metas[b]) + self.class_agnostic_nms(obj_scores[b][mask], + sem_scores[b][mask, :], + bbox3d[mask, :], + points[b, ..., :3][mask, :], + input_metas[b]) bbox = input_metas[b]['box_type_3d']( bbox_selected.clone(), box_dim=bbox_selected.shape[-1], From 9e2ccbd7d6558f49ed6c090e0c1cb3815a0685dd Mon Sep 17 00:00:00 2001 From: VVsssssk Date: Wed, 28 Sep 2022 13:08:10 +0800 Subject: [PATCH 3/3] fix --- .../point_rcnn_2x8_kitti-3d-3classes.py | 49 +++---------------- 1 file changed, 7 insertions(+), 42 deletions(-) diff --git a/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py b/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py index 3725e4b7c..1344aca5c 100644 --- a/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py +++ b/configs/point_rcnn/point_rcnn_2x8_kitti-3d-3classes.py @@ -9,14 +9,7 @@ class_names = ['Car', 'Pedestrian', 'Cyclist'] point_cloud_range = [0, -40, -3, 70.4, 40, 1] input_modality = dict(use_lidar=True, use_camera=False) -file_client_args = dict( - backend='petrel', - path_mapping=dict({ - './data/kitti/': - 's3://openmmlab/datasets/detection3d/kitti/', - 'data/kitti/': - 's3://openmmlab/datasets/detection3d/kitti/' - })) + db_sampler = dict( data_root=data_root, info_path=data_root + 'kitti_dbinfos_train.pkl', @@ -25,25 +18,11 @@ filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)), sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15), - points_loader=dict( - type='LoadPointsFromFile', - coord_type='LIDAR', - load_dim=4, - file_client_args=file_client_args), classes=class_names) train_pipeline = [ - dict( - type='LoadPointsFromFile', - coord_type='LIDAR', - load_dim=4, - use_dim=4, - file_client_args=file_client_args), - dict( - type='LoadAnnotations3D', - with_bbox_3d=True, - with_label_3d=True, - file_client_args=file_client_args), + dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), + dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectSample', db_sampler=db_sampler), @@ -65,12 +44,7 @@ dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) ] test_pipeline = [ - dict( - type='LoadPointsFromFile', - coord_type='LIDAR', - load_dim=4, - use_dim=4, - file_client_args=file_client_args), + dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict( type='MultiScaleFlipAug3D', img_scale=(1333, 800), @@ -100,18 +74,9 @@ train=dict( type='RepeatDataset', times=2, - dataset=dict( - pipeline=train_pipeline, - classes=class_names, - file_client_args=file_client_args)), - val=dict( - pipeline=test_pipeline, - classes=class_names, - file_client_args=file_client_args), - test=dict( - pipeline=test_pipeline, - classes=class_names, - file_client_args=file_client_args)) + dataset=dict(pipeline=train_pipeline, classes=class_names)), + val=dict(pipeline=test_pipeline, classes=class_names), + test=dict(pipeline=test_pipeline, classes=class_names)) # optimizer lr = 0.001 # max learning rate