From de629347882d27c7a21410a8e0330ea7d160572a Mon Sep 17 00:00:00 2001
From: JingweiZhang12 <zjw18@mails.tsinghua.edu.cn>
Date: Thu, 20 Oct 2022 10:26:31 +0800
Subject: [PATCH 1/4] refactor coco_video_metric

---
 .../datasets/imagenet_vid_fgfa_style.py       |   2 +-
 .../evaluation/metrics/coco_video_metric.py   | 124 ++++++++++--------
 2 files changed, 69 insertions(+), 57 deletions(-)

diff --git a/configs/_base_/datasets/imagenet_vid_fgfa_style.py b/configs/_base_/datasets/imagenet_vid_fgfa_style.py
index d090f4bde..72b5cdf82 100644
--- a/configs/_base_/datasets/imagenet_vid_fgfa_style.py
+++ b/configs/_base_/datasets/imagenet_vid_fgfa_style.py
@@ -87,6 +87,6 @@
 # evaluator
 val_evaluator = dict(
     type='CocoVideoMetric',
-    ann_file=data_root + 'annotations/imagenet_vid_val.json',
+    # ann_file=data_root + 'annotations/imagenet_vid_val.json',
     metric='bbox')
 test_evaluator = val_evaluator
diff --git a/mmtrack/evaluation/metrics/coco_video_metric.py b/mmtrack/evaluation/metrics/coco_video_metric.py
index 158917cbd..9eaa12228 100644
--- a/mmtrack/evaluation/metrics/coco_video_metric.py
+++ b/mmtrack/evaluation/metrics/coco_video_metric.py
@@ -3,17 +3,17 @@
 from typing import Optional, Sequence
 
 from mmdet.datasets.api_wrappers import COCO
-from mmdet.evaluation import CocoMetric
 from mmdet.structures.mask import encode_mask_results
 from mmengine.dist import broadcast_object_list, is_main_process
 from mmengine.fileio import FileClient
+from mmeval import CocoMetric as _CocoMetric
 
 from mmtrack.registry import METRICS
 from .base_video_metrics import collect_tracking_results
 
 
 @METRICS.register_module()
-class CocoVideoMetric(CocoMetric):
+class CocoVideoMetric(_CocoMetric):
     """COCO evaluation metric.
 
     Evaluate AR, AP, and mAP for detection tasks including proposal/box
@@ -21,16 +21,19 @@ class CocoVideoMetric(CocoMetric):
     https://cocodataset.org/#detection-eval for more details.
     """
 
-    def __init__(self, ann_file: Optional[str] = None, **kwargs) -> None:
-        super().__init__(**kwargs)
+    def __init__(self,
+                 ann_file: Optional[str] = None,
+                 dist_collect_mode='cat',
+                 **kwargs) -> None:
+        super().__init__(dist_collect_mode=dist_collect_mode, **kwargs)
         # if ann_file is not specified,
         # initialize coco api with the converted dataset
-        if ann_file:
-            file_client = FileClient.infer_client(uri=ann_file)
-            with file_client.get_local_path(ann_file) as local_path:
-                self._coco_api = COCO(local_path)
-        else:
-            self._coco_api = None
+        # if ann_file:
+        #     file_client = FileClient.infer_client(uri=ann_file)
+        #     with file_client.get_local_path(ann_file) as local_path:
+        #         self._coco_api = COCO(local_path)
+        # else:
+        #     self._coco_api = None
 
     def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
         """Process one batch of data samples and predictions. The processed
@@ -45,6 +48,7 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
             data_samples (Sequence[dict]): A batch of data samples that
                 contain annotations and predictions.
         """
+        predictions, groundtruths = [], []
         for data_sample in data_samples:
             result = dict()
             pred = data_sample['pred_det_instances']
@@ -60,52 +64,60 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
             if 'mask_scores' in pred:
                 result['mask_scores'] = pred['mask_scores'].cpu().numpy()
 
-            # parse gt
-            gt = dict()
-            gt['width'] = data_sample['ori_shape'][1]
-            gt['height'] = data_sample['ori_shape'][0]
-            gt['img_id'] = data_sample['img_id']
             if self._coco_api is None:
-                assert 'instances' in data_sample, \
-                    'ground truth is required for evaluation when ' \
-                    '`ann_file` is not provided'
-                gt['anns'] = data_sample['instances']
-            # add converted result to the results list
-            self.results.append((gt, result))
-
-    def evaluate(self, size: int) -> dict:
-        """Evaluate the model performance of the whole dataset after processing
-        all batches.
+                ann = self.add_gt(data_sample)
+            else:
+                ann = dict()
+            groundtruths.append(ann)
 
-        Args:
-            size (int): Length of the entire validation dataset.
+            self.add(predictions, groundtruths)
 
-        Returns:
-            dict: Evaluation metrics dict on the val dataset. The keys are the
-            names of the metrics, and the values are corresponding results.
-        """
-        if len(self.results) == 0:
-            warnings.warn(
-                f'{self.__class__.__name__} got empty `self.results`. Please '
-                'ensure that the processed results are properly added into '
-                '`self.results` in `process` method.')
-
-        results = collect_tracking_results(self.results, self.collect_device)
-
-        if is_main_process():
-            _metrics = self.compute_metrics(results)  # type: ignore
-            # Add prefix to metric names
-            if self.prefix:
-                _metrics = {
-                    '/'.join((self.prefix, k)): v
-                    for k, v in _metrics.items()
-                }
-            metrics = [_metrics]
-        else:
-            metrics = [None]  # type: ignore
-
-        broadcast_object_list(metrics)
-
-        # reset the results list
-        self.results.clear()
-        return metrics[0]
+            # parse gt
+            # gt = dict()
+            # gt['width'] = data_sample['ori_shape'][1]
+            # gt['height'] = data_sample['ori_shape'][0]
+            # gt['img_id'] = data_sample['img_id']
+            # if self._coco_api is None:
+            #     assert 'instances' in data_sample, \
+            #         'ground truth is required for evaluation when ' \
+            #         '`ann_file` is not provided'
+            #     gt['anns'] = data_sample['instances']
+            # # add converted result to the results list
+            # self.results.append((gt, result))
+
+    # def evaluate(self, size: int) -> dict:
+    #     """Evaluate the model performance of the whole dataset after processing
+    #     all batches.
+
+    #     Args:
+    #         size (int): Length of the entire validation dataset.
+
+    #     Returns:
+    #         dict: Evaluation metrics dict on the val dataset. The keys are the
+    #         names of the metrics, and the values are corresponding results.
+    #     """
+    #     if len(self.results) == 0:
+    #         warnings.warn(
+    #             f'{self.__class__.__name__} got empty `self.results`. Please '
+    #             'ensure that the processed results are properly added into '
+    #             '`self.results` in `process` method.')
+
+    #     results = collect_tracking_results(self.results, self.collect_device)
+
+    #     if is_main_process():
+    #         _metrics = self.compute_metrics(results)  # type: ignore
+    #         # Add prefix to metric names
+    #         if self.prefix:
+    #             _metrics = {
+    #                 '/'.join((self.prefix, k)): v
+    #                 for k, v in _metrics.items()
+    #             }
+    #         metrics = [_metrics]
+    #     else:
+    #         metrics = [None]  # type: ignore
+
+    #     broadcast_object_list(metrics)
+
+    #     # reset the results list
+    #     self.results.clear()
+    #     return metrics[0]

From 125fac19ec60d5df6b770625c716f03b5a4ec7f3 Mon Sep 17 00:00:00 2001
From: JingweiZhang12 <zjw18@mails.tsinghua.edu.cn>
Date: Thu, 20 Oct 2022 19:09:45 +0800
Subject: [PATCH 2/4] fix some bugs

---
 .../datasets/imagenet_vid_fgfa_style.py       |   2 +-
 .../evaluation/metrics/coco_video_metric.py   | 104 ++++--------------
 2 files changed, 24 insertions(+), 82 deletions(-)

diff --git a/configs/_base_/datasets/imagenet_vid_fgfa_style.py b/configs/_base_/datasets/imagenet_vid_fgfa_style.py
index 72b5cdf82..d090f4bde 100644
--- a/configs/_base_/datasets/imagenet_vid_fgfa_style.py
+++ b/configs/_base_/datasets/imagenet_vid_fgfa_style.py
@@ -87,6 +87,6 @@
 # evaluator
 val_evaluator = dict(
     type='CocoVideoMetric',
-    # ann_file=data_root + 'annotations/imagenet_vid_val.json',
+    ann_file=data_root + 'annotations/imagenet_vid_val.json',
     metric='bbox')
 test_evaluator = val_evaluator
diff --git a/mmtrack/evaluation/metrics/coco_video_metric.py b/mmtrack/evaluation/metrics/coco_video_metric.py
index 9eaa12228..580dfc9bb 100644
--- a/mmtrack/evaluation/metrics/coco_video_metric.py
+++ b/mmtrack/evaluation/metrics/coco_video_metric.py
@@ -1,19 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import warnings
-from typing import Optional, Sequence
+from typing import Sequence
 
-from mmdet.datasets.api_wrappers import COCO
+from mmdet.evaluation import CocoMetric
 from mmdet.structures.mask import encode_mask_results
-from mmengine.dist import broadcast_object_list, is_main_process
-from mmengine.fileio import FileClient
-from mmeval import CocoMetric as _CocoMetric
 
 from mmtrack.registry import METRICS
-from .base_video_metrics import collect_tracking_results
 
 
 @METRICS.register_module()
-class CocoVideoMetric(_CocoMetric):
+class CocoVideoMetric(CocoMetric):
     """COCO evaluation metric.
 
     Evaluate AR, AP, and mAP for detection tasks including proposal/box
@@ -22,18 +17,13 @@ class CocoVideoMetric(_CocoMetric):
     """
 
     def __init__(self,
-                 ann_file: Optional[str] = None,
                  dist_collect_mode='cat',
+                 dist_backend='torch_cuda',
                  **kwargs) -> None:
-        super().__init__(dist_collect_mode=dist_collect_mode, **kwargs)
-        # if ann_file is not specified,
-        # initialize coco api with the converted dataset
-        # if ann_file:
-        #     file_client = FileClient.infer_client(uri=ann_file)
-        #     with file_client.get_local_path(ann_file) as local_path:
-        #         self._coco_api = COCO(local_path)
-        # else:
-        #     self._coco_api = None
+        super().__init__(
+            dist_collect_mode=dist_collect_mode,
+            dist_backend=dist_backend,
+            **kwargs)
 
     def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
         """Process one batch of data samples and predictions. The processed
@@ -50,74 +40,26 @@ def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
         """
         predictions, groundtruths = [], []
         for data_sample in data_samples:
-            result = dict()
-            pred = data_sample['pred_det_instances']
-            result['img_id'] = data_sample['img_id']
-            result['bboxes'] = pred['bboxes'].cpu().numpy()
-            result['scores'] = pred['scores'].cpu().numpy()
-            result['labels'] = pred['labels'].cpu().numpy()
-            # encode mask to RLE
-            if 'masks' in pred:
-                result['masks'] = encode_mask_results(
-                    pred['masks'].detach().cpu().numpy())
+            pred = dict()
+            pred_instances = data_sample['pred_det_instances']
+            pred['img_id'] = data_sample['img_id']
+            pred['bboxes'] = pred_instances['bboxes'].cpu().numpy()
+            pred['scores'] = pred_instances['scores'].cpu().numpy()
+            pred['labels'] = pred_instances['labels'].cpu().numpy()
+            if 'masks' in pred_instances:
+                pred['masks'] = encode_mask_results(
+                    pred_instances['masks'].detach().cpu().numpy())
             # some detectors use different scores for bbox and mask
-            if 'mask_scores' in pred:
-                result['mask_scores'] = pred['mask_scores'].cpu().numpy()
+            if 'mask_scores' in pred_instances:
+                pred['mask_scores'] = \
+                    pred_instances['mask_scores'].cpu().numpy()
+            predictions.append(pred)
 
+            # parse gt
             if self._coco_api is None:
                 ann = self.add_gt(data_sample)
             else:
                 ann = dict()
             groundtruths.append(ann)
 
-            self.add(predictions, groundtruths)
-
-            # parse gt
-            # gt = dict()
-            # gt['width'] = data_sample['ori_shape'][1]
-            # gt['height'] = data_sample['ori_shape'][0]
-            # gt['img_id'] = data_sample['img_id']
-            # if self._coco_api is None:
-            #     assert 'instances' in data_sample, \
-            #         'ground truth is required for evaluation when ' \
-            #         '`ann_file` is not provided'
-            #     gt['anns'] = data_sample['instances']
-            # # add converted result to the results list
-            # self.results.append((gt, result))
-
-    # def evaluate(self, size: int) -> dict:
-    #     """Evaluate the model performance of the whole dataset after processing
-    #     all batches.
-
-    #     Args:
-    #         size (int): Length of the entire validation dataset.
-
-    #     Returns:
-    #         dict: Evaluation metrics dict on the val dataset. The keys are the
-    #         names of the metrics, and the values are corresponding results.
-    #     """
-    #     if len(self.results) == 0:
-    #         warnings.warn(
-    #             f'{self.__class__.__name__} got empty `self.results`. Please '
-    #             'ensure that the processed results are properly added into '
-    #             '`self.results` in `process` method.')
-
-    #     results = collect_tracking_results(self.results, self.collect_device)
-
-    #     if is_main_process():
-    #         _metrics = self.compute_metrics(results)  # type: ignore
-    #         # Add prefix to metric names
-    #         if self.prefix:
-    #             _metrics = {
-    #                 '/'.join((self.prefix, k)): v
-    #                 for k, v in _metrics.items()
-    #             }
-    #         metrics = [_metrics]
-    #     else:
-    #         metrics = [None]  # type: ignore
-
-    #     broadcast_object_list(metrics)
-
-    #     # reset the results list
-    #     self.results.clear()
-    #     return metrics[0]
+        self.add(predictions, groundtruths)

From 842d1c95f6ddfb05e144e411839dc44b75fea1e2 Mon Sep 17 00:00:00 2001
From: JingweiZhang12 <zjw18@mails.tsinghua.edu.cn>
Date: Thu, 20 Oct 2022 20:11:05 +0800
Subject: [PATCH 3/4] add docstring

---
 mmtrack/evaluation/metrics/coco_video_metric.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/mmtrack/evaluation/metrics/coco_video_metric.py b/mmtrack/evaluation/metrics/coco_video_metric.py
index 580dfc9bb..6367c575c 100644
--- a/mmtrack/evaluation/metrics/coco_video_metric.py
+++ b/mmtrack/evaluation/metrics/coco_video_metric.py
@@ -14,6 +14,15 @@ class CocoVideoMetric(CocoMetric):
     Evaluate AR, AP, and mAP for detection tasks including proposal/box
     detection and instance segmentation. Please refer to
     https://cocodataset.org/#detection-eval for more details.
+
+    dist_collect_mode (str, optional): The method of concatenating the
+            collected synchronization results. This depends on how the
+            distributed data is split. Currently only 'unzip' and 'cat' are
+            supported. For samplers in MMTrakcking, 'cat' should
+            be used. Defaults to 'cat'.
+    dist_backend (str, optional): The name of the distributed communication
+        backend, you can get all the backend names through
+        ``mmeval.core.list_all_backends()``. Defaults to 'torch_cuda'.
     """
 
     def __init__(self,

From 63d6cf98e60e17ee98c1410426fde8e091b359fb Mon Sep 17 00:00:00 2001
From: JingweiZhang12 <zjw18@mails.tsinghua.edu.cn>
Date: Thu, 20 Oct 2022 20:17:44 +0800
Subject: [PATCH 4/4] add docstring

---
 mmtrack/evaluation/metrics/coco_video_metric.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/mmtrack/evaluation/metrics/coco_video_metric.py b/mmtrack/evaluation/metrics/coco_video_metric.py
index 6367c575c..aa858a2e6 100644
--- a/mmtrack/evaluation/metrics/coco_video_metric.py
+++ b/mmtrack/evaluation/metrics/coco_video_metric.py
@@ -20,23 +20,14 @@ class CocoVideoMetric(CocoMetric):
             distributed data is split. Currently only 'unzip' and 'cat' are
             supported. For samplers in MMTrakcking, 'cat' should
             be used. Defaults to 'cat'.
-    dist_backend (str, optional): The name of the distributed communication
-        backend, you can get all the backend names through
-        ``mmeval.core.list_all_backends()``. Defaults to 'torch_cuda'.
     """
 
-    def __init__(self,
-                 dist_collect_mode='cat',
-                 dist_backend='torch_cuda',
-                 **kwargs) -> None:
-        super().__init__(
-            dist_collect_mode=dist_collect_mode,
-            dist_backend=dist_backend,
-            **kwargs)
+    def __init__(self, dist_collect_mode='cat', **kwargs) -> None:
+        super().__init__(dist_collect_mode=dist_collect_mode, **kwargs)
 
     def process(self, data_batch: dict, data_samples: Sequence[dict]) -> None:
         """Process one batch of data samples and predictions. The processed
-        results should be stored in ``self.results``, which will be used to
+        results should be stored in ``self._results``, which will be used to
         compute the metrics when all batches have been processed.
 
         Note that we only modify ``pred['pred_instances']`` in ``CocoMetric``