From 057ad6ec50fcf43532f1ed5bdc1655cd5c6eccd2 Mon Sep 17 00:00:00 2001
From: Ziyi Wu <dazitu616@gmail.com>
Date: Mon, 9 Aug 2021 15:11:26 +0800
Subject: [PATCH] [Enhance] Benchmark PAConvCUDA on S3DIS (#847)

* add paconv_cuda readme

* add unit test

* fix link

* fix link
---
 configs/paconv/README.md                      |  2 +
 ...sg_8x8_cosine_200e_s3dis_seg-3d-13class.py | 69 ++++++++++++++++++
 tests/test_models/test_segmentors.py          | 72 +++++++++++++++++++
 3 files changed, 143 insertions(+)
 create mode 100644 configs/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py

diff --git a/configs/paconv/README.md b/configs/paconv/README.md
index ec22fa835..38b31337d 100644
--- a/configs/paconv/README.md
+++ b/configs/paconv/README.md
@@ -25,11 +25,13 @@ We implement PAConv and provide the result and checkpoints on S3DIS dataset.
 |                                   Method                                    | Split  |   Lr schd   | Mem (GB) | Inf time (fps) | mIoU (Val set) |         Download         |
 | :-------------------------------------------------------------------------: | :----: | :---------: | :------: | :------------: | :------------: | :----------------------: |
 |    [PAConv (SSG)](./paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class.py)     | Area_5 | cosine 150e |   5.8    |                |       66.65        | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class_20210729_200615-2147b2d1.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class/paconv_ssg_8x8_cosine_150e_s3dis_seg-3d-13class_20210729_200615.log.json) |
+|    [PAConv\* (SSG)](./paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py)     | Area_5 | cosine 200e |   3.8    |                |       65.33        | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class_20210802_171802-e5ea9bb9.pth) &#124; [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class_20210802_171802.log.json) |
 
 **Notes:**
 
 - We use XYZ+Color+Normalized_XYZ as input in all the experiments on S3DIS datasets.
 - `Area_5` Split means training the model on Area_1, 2, 3, 4, 6 and testing on Area_5.
+- PAConv\* stands for the CUDA implementation of PAConv operations. See the [paper](https://arxiv.org/pdf/2103.14635.pdf) appendix section D for more details. In our experiments, the training of PAConv\* is found to be very unstable. We achieved slightly lower mIoU than the result in the paper, but is consistent with the result obtained by running their [official code](https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg). Besides, although the GPU memory consumption of PAConv\* is significantly lower than PAConv, its training and inference speed are actually slower (by ~10%).
 
 ## Indeterminism
 
diff --git a/configs/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py b/configs/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py
new file mode 100644
index 000000000..b2a1440e8
--- /dev/null
+++ b/configs/paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py
@@ -0,0 +1,69 @@
+_base_ = [
+    '../_base_/datasets/s3dis_seg-3d-13class.py',
+    '../_base_/models/paconv_cuda_ssg.py',
+    '../_base_/schedules/seg_cosine_150e.py', '../_base_/default_runtime.py'
+]
+
+# data settings
+class_names = ('ceiling', 'floor', 'wall', 'beam', 'column', 'window', 'door',
+               'table', 'chair', 'sofa', 'bookcase', 'board', 'clutter')
+num_points = 4096
+train_pipeline = [
+    dict(
+        type='LoadPointsFromFile',
+        coord_type='DEPTH',
+        shift_height=False,
+        use_color=True,
+        load_dim=6,
+        use_dim=[0, 1, 2, 3, 4, 5]),
+    dict(
+        type='LoadAnnotations3D',
+        with_bbox_3d=False,
+        with_label_3d=False,
+        with_mask_3d=False,
+        with_seg_3d=True),
+    dict(
+        type='PointSegClassMapping',
+        valid_cat_ids=tuple(range(len(class_names))),
+        max_cat_id=13),
+    dict(
+        type='IndoorPatchPointSample',
+        num_points=num_points,
+        block_size=1.0,
+        use_normalized_coord=True,
+        num_try=10000,
+        enlarge_size=None,
+        min_unique_num=num_points // 4,
+        eps=0.0),
+    dict(type='NormalizePointsColor', color_mean=None),
+    dict(
+        type='GlobalRotScaleTrans',
+        rot_range=[0.0, 6.283185307179586],  # [0, 2 * pi]
+        scale_ratio_range=[0.8, 1.2],
+        translation_std=[0, 0, 0]),
+    dict(
+        type='RandomJitterPoints',
+        jitter_std=[0.01, 0.01, 0.01],
+        clip_range=[-0.05, 0.05]),
+    dict(type='RandomDropPointsColor', drop_ratio=0.2),
+    dict(type='DefaultFormatBundle3D', class_names=class_names),
+    dict(type='Collect3D', keys=['points', 'pts_semantic_mask'])
+]
+
+data = dict(samples_per_gpu=8, train=dict(pipeline=train_pipeline))
+evaluation = dict(interval=1)
+
+# model settings
+model = dict(
+    decode_head=dict(
+        num_classes=13, ignore_index=13,
+        loss_decode=dict(class_weight=None)),  # S3DIS doesn't use class_weight
+    test_cfg=dict(
+        num_points=4096,
+        block_size=1.0,
+        sample_rate=0.5,
+        use_normalized_coord=True,
+        batch_size=12))
+
+# runtime settings
+runner = dict(max_epochs=200)
diff --git a/tests/test_models/test_segmentors.py b/tests/test_models/test_segmentors.py
index c17a183bf..3b46a86a4 100644
--- a/tests/test_models/test_segmentors.py
+++ b/tests/test_models/test_segmentors.py
@@ -231,3 +231,75 @@ def test_paconv_ssg():
         results = self.forward(return_loss=False, **data_dict)
         assert results[0]['semantic_mask'].shape == torch.Size([200])
         assert results[1]['semantic_mask'].shape == torch.Size([100])
+
+
+def test_paconv_cuda_ssg():
+    if not torch.cuda.is_available():
+        pytest.skip('test requires GPU and torch+cuda')
+
+    set_random_seed(0, True)
+    paconv_cuda_ssg_cfg = _get_segmentor_cfg(
+        'paconv/paconv_cuda_ssg_8x8_cosine_200e_s3dis_seg-3d-13class.py')
+    # for GPU memory consideration
+    paconv_cuda_ssg_cfg.backbone.num_points = (256, 64, 16, 4)
+    paconv_cuda_ssg_cfg.test_cfg.num_points = 32
+    self = build_segmentor(paconv_cuda_ssg_cfg).cuda()
+    points = [torch.rand(1024, 9).float().cuda() for _ in range(2)]
+    img_metas = [dict(), dict()]
+    gt_masks = [torch.randint(0, 13, (1024, )).long().cuda() for _ in range(2)]
+
+    # test forward_train
+    losses = self.forward_train(points, img_metas, gt_masks)
+    assert losses['decode.loss_sem_seg'].item() >= 0
+    assert losses['regularize.loss_regularize'].item() >= 0
+
+    # test forward function
+    set_random_seed(0, True)
+    data_dict = dict(
+        points=points, img_metas=img_metas, pts_semantic_mask=gt_masks)
+    forward_losses = self.forward(return_loss=True, **data_dict)
+    assert np.allclose(losses['decode.loss_sem_seg'].item(),
+                       forward_losses['decode.loss_sem_seg'].item())
+    assert np.allclose(losses['regularize.loss_regularize'].item(),
+                       forward_losses['regularize.loss_regularize'].item())
+
+    # test loss with ignore_index
+    ignore_masks = [torch.ones_like(gt_masks[0]) * 13 for _ in range(2)]
+    losses = self.forward_train(points, img_metas, ignore_masks)
+    assert losses['decode.loss_sem_seg'].item() == 0
+
+    # test simple_test
+    self.eval()
+    with torch.no_grad():
+        scene_points = [
+            torch.randn(200, 6).float().cuda() * 3.0,
+            torch.randn(100, 6).float().cuda() * 2.5
+        ]
+        results = self.simple_test(scene_points, img_metas)
+        assert results[0]['semantic_mask'].shape == torch.Size([200])
+        assert results[1]['semantic_mask'].shape == torch.Size([100])
+
+    # test forward function calling simple_test
+    with torch.no_grad():
+        data_dict = dict(points=[scene_points], img_metas=[img_metas])
+        results = self.forward(return_loss=False, **data_dict)
+        assert results[0]['semantic_mask'].shape == torch.Size([200])
+        assert results[1]['semantic_mask'].shape == torch.Size([100])
+
+    # test aug_test
+    with torch.no_grad():
+        scene_points = [
+            torch.randn(2, 200, 6).float().cuda() * 3.0,
+            torch.randn(2, 100, 6).float().cuda() * 2.5
+        ]
+        img_metas = [[dict(), dict()], [dict(), dict()]]
+        results = self.aug_test(scene_points, img_metas)
+        assert results[0]['semantic_mask'].shape == torch.Size([200])
+        assert results[1]['semantic_mask'].shape == torch.Size([100])
+
+    # test forward function calling aug_test
+    with torch.no_grad():
+        data_dict = dict(points=scene_points, img_metas=img_metas)
+        results = self.forward(return_loss=False, **data_dict)
+        assert results[0]['semantic_mask'].shape == torch.Size([200])
+        assert results[1]['semantic_mask'].shape == torch.Size([100])