From 1fdf6f188b6552adfccafd6db81f84a2d9e5ded6 Mon Sep 17 00:00:00 2001 From: Ilya Trushkin Date: Wed, 24 Jul 2024 09:22:29 +0900 Subject: [PATCH 1/4] Change common semantic segmentation dataset detection rule Signed-off-by: Ilya Trushkin --- .../plugins/data_formats/common_semantic_segmentation.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py index 4e9f55f625..6dfd599c33 100644 --- a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py +++ b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py @@ -163,11 +163,10 @@ def build_cmdline_parser(cls, **kwargs): @classmethod def detect(cls, context: FormatDetectionContext) -> FormatDetectionConfidence: - path = context.require_file(f"**/{DATASET_META_FILE}") - path = osp.dirname(path) + context.require_file(DATASET_META_FILE) - context.require_file(osp.join(path, CommonSemanticSegmentationPath.IMAGES_DIR, "**", "*")) - context.require_file(osp.join(path, CommonSemanticSegmentationPath.MASKS_DIR, "**", "*")) + context.require_file(osp.join(CommonSemanticSegmentationPath.IMAGES_DIR, "**", "*")) + context.require_file(osp.join(CommonSemanticSegmentationPath.MASKS_DIR, "**", "*")) return FormatDetectionConfidence.MEDIUM From eaa7da67300c07c5fd2fb5fc12a72587063f2911 Mon Sep 17 00:00:00 2001 From: Ilya Trushkin Date: Wed, 24 Jul 2024 12:29:28 +0900 Subject: [PATCH 2/4] Add test Signed-off-by: Ilya Trushkin --- tests/unit/data_formats/conftest.py | 11 ++--- ...est_common_semantic_segmentation_format.py | 43 ++++++++++++++++++- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/tests/unit/data_formats/conftest.py b/tests/unit/data_formats/conftest.py index c4c8c32f95..f303102b2d 100644 --- a/tests/unit/data_formats/conftest.py +++ b/tests/unit/data_formats/conftest.py @@ -9,6 +9,7 @@ import pytest from datumaro import Dataset +from tests.utils.test_utils import TestDir @pytest.fixture @@ -35,12 +36,12 @@ def fxt_export_kwargs(): @pytest.fixture def fxt_dataset_dir_with_subset_dirs(test_dir: str, request: pytest.FixtureRequest): fxt_dataset_dir = request.param + with TestDir(f"{test_dir}_with_subsets") as new_test_dir: + for subset in ["train", "val", "test"]: + dst = os.path.join(new_test_dir, subset) + shutil.copytree(fxt_dataset_dir, dst) - for subset in ["train", "val", "test"]: - dst = os.path.join(test_dir, subset) - shutil.copytree(fxt_dataset_dir, dst) - - yield test_dir + yield new_test_dir @pytest.fixture diff --git a/tests/unit/data_formats/test_common_semantic_segmentation_format.py b/tests/unit/data_formats/test_common_semantic_segmentation_format.py index b7297e4885..da5b8e1c5c 100644 --- a/tests/unit/data_formats/test_common_semantic_segmentation_format.py +++ b/tests/unit/data_formats/test_common_semantic_segmentation_format.py @@ -3,7 +3,9 @@ # SPDX-License-Identifier: MIT from collections import OrderedDict -from typing import Any, Dict, Optional +from typing import Any, Dict +import shutil +import os import numpy as np import pytest @@ -186,3 +188,42 @@ def test_can_import( fxt_import_kwargs, request, ) + + + @pytest.mark.parametrize( + [ + "fxt_dataset_dir_with_subset_dirs", + "fxt_expected_dataset_with_subsets", + "fxt_import_kwargs", + ], + [ + (DUMMY_DATASET_DIR, "fxt_dataset", {}), + ( + DUMMY_NON_STANDARD_DATASET_DIR, + "fxt_non_standard_dataset", + {"image_prefix": "image_", "mask_prefix": "gt_"}, + ), + ], + indirect=["fxt_dataset_dir_with_subset_dirs", "fxt_expected_dataset_with_subsets"], + ids=IDS, + ) + def test_can_import_nested( + self, + fxt_dataset_dir_with_subset_dirs: str, + fxt_expected_dataset_with_subsets: Dataset, + fxt_import_kwargs: Dict[str, Any], + request: pytest.FixtureRequest, + ): + subdir_name = "subdir" + subdir = os.path.join(fxt_dataset_dir_with_subset_dirs, subdir_name) + os.makedirs(subdir) + for _file in os.listdir(fxt_dataset_dir_with_subset_dirs): + if _file != subdir_name: + file_path = os.path.join(fxt_dataset_dir_with_subset_dirs, _file) + shutil.move(file_path, subdir) + return super().test_can_import( + fxt_dataset_dir_with_subset_dirs, + fxt_expected_dataset_with_subsets, + fxt_import_kwargs, + request, + ) From 48207ea4be5bcf90735f744f6efb84654c794486 Mon Sep 17 00:00:00 2001 From: Ilya Trushkin Date: Thu, 15 Aug 2024 18:31:25 +0300 Subject: [PATCH 3/4] Update test Signed-off-by: Ilya Trushkin --- .../common_semantic_segmentation.py | 9 ++- ...est_common_semantic_segmentation_format.py | 72 +++++++++---------- 2 files changed, 38 insertions(+), 43 deletions(-) diff --git a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py index 6dfd599c33..7845ffc406 100644 --- a/src/datumaro/plugins/data_formats/common_semantic_segmentation.py +++ b/src/datumaro/plugins/data_formats/common_semantic_segmentation.py @@ -3,7 +3,6 @@ # SPDX-License-Identifier: MIT import errno -import glob import os.path as osp from typing import List, Optional @@ -69,11 +68,11 @@ def __init__( self._image_prefix = image_prefix self._mask_prefix = mask_prefix - meta_file = glob.glob(osp.join(path, "**", DATASET_META_FILE), recursive=True) - if is_meta_file(meta_file[0]): - self._root_dir = osp.dirname(meta_file[0]) + meta_file = osp.join(path, DATASET_META_FILE) + if is_meta_file(meta_file): + self._root_dir = osp.dirname(meta_file) - label_map = parse_meta_file(meta_file[0]) + label_map = parse_meta_file(meta_file) self._categories = make_categories(label_map) else: raise FileNotFoundError(errno.ENOENT, "Dataset meta info file was not found", path) diff --git a/tests/unit/data_formats/test_common_semantic_segmentation_format.py b/tests/unit/data_formats/test_common_semantic_segmentation_format.py index da5b8e1c5c..1d03e9ba01 100644 --- a/tests/unit/data_formats/test_common_semantic_segmentation_format.py +++ b/tests/unit/data_formats/test_common_semantic_segmentation_format.py @@ -2,10 +2,10 @@ # # SPDX-License-Identifier: MIT +import os +import shutil from collections import OrderedDict from typing import Any, Dict -import shutil -import os import numpy as np import pytest @@ -13,6 +13,7 @@ from datumaro.components.annotation import Mask from datumaro.components.dataset import Dataset from datumaro.components.dataset_base import DatasetItem +from datumaro.components.errors import DatasetImportError from datumaro.components.media import Image from datumaro.plugins.data_formats.common_semantic_segmentation import ( CommonSemanticSegmentationImporter, @@ -145,25 +146,8 @@ def test_can_import( fxt_dataset_dir, fxt_expected_dataset, fxt_import_kwargs, request ) - -class CommonSemanticSegmentationWithSubsetDirsImporterTest(TestDataFormatBase): - IMPORTER = CommonSemanticSegmentationWithSubsetDirsImporter - @pytest.mark.parametrize( - "fxt_dataset_dir_with_subset_dirs", - [DUMMY_DATASET_DIR, DUMMY_NON_STANDARD_DATASET_DIR], - indirect=["fxt_dataset_dir_with_subset_dirs"], - ids=IDS, - ) - def test_can_detect(self, fxt_dataset_dir_with_subset_dirs: str): - return super().test_can_detect(fxt_dataset_dir_with_subset_dirs) - - @pytest.mark.parametrize( - [ - "fxt_dataset_dir_with_subset_dirs", - "fxt_expected_dataset_with_subsets", - "fxt_import_kwargs", - ], + ["fxt_dataset_dir", "fxt_expected_dataset", "fxt_import_kwargs"], [ (DUMMY_DATASET_DIR, "fxt_dataset", {}), ( @@ -172,23 +156,42 @@ def test_can_detect(self, fxt_dataset_dir_with_subset_dirs: str): {"image_prefix": "image_", "mask_prefix": "gt_"}, ), ], - indirect=["fxt_dataset_dir_with_subset_dirs", "fxt_expected_dataset_with_subsets"], + indirect=["fxt_expected_dataset"], ids=IDS, ) - def test_can_import( + def test_cannot_import_nested( self, - fxt_dataset_dir_with_subset_dirs: str, - fxt_expected_dataset_with_subsets: Dataset, + fxt_dataset_dir: str, + fxt_expected_dataset: Dataset, fxt_import_kwargs: Dict[str, Any], request: pytest.FixtureRequest, + test_dir: str, ): - return super().test_can_import( - fxt_dataset_dir_with_subset_dirs, - fxt_expected_dataset_with_subsets, - fxt_import_kwargs, - request, - ) + shutil.copytree(fxt_dataset_dir, test_dir, dirs_exist_ok=True) + subdir_name = "subdir" + subdir = os.path.join(test_dir, subdir_name) + os.makedirs(subdir) + for _file in os.listdir(test_dir): + if _file != subdir_name: + file_path = os.path.join(test_dir, _file) + shutil.move(file_path, subdir) + with pytest.raises(DatasetImportError) as exc_info: + super().test_can_import(test_dir, fxt_expected_dataset, fxt_import_kwargs, request) + assert exc_info.value.__cause__ is not None + assert isinstance(exc_info.value.__cause__, FileNotFoundError) + + +class CommonSemanticSegmentationWithSubsetDirsImporterTest(TestDataFormatBase): + IMPORTER = CommonSemanticSegmentationWithSubsetDirsImporter + @pytest.mark.parametrize( + "fxt_dataset_dir_with_subset_dirs", + [DUMMY_DATASET_DIR, DUMMY_NON_STANDARD_DATASET_DIR], + indirect=["fxt_dataset_dir_with_subset_dirs"], + ids=IDS, + ) + def test_can_detect(self, fxt_dataset_dir_with_subset_dirs: str): + return super().test_can_detect(fxt_dataset_dir_with_subset_dirs) @pytest.mark.parametrize( [ @@ -207,20 +210,13 @@ def test_can_import( indirect=["fxt_dataset_dir_with_subset_dirs", "fxt_expected_dataset_with_subsets"], ids=IDS, ) - def test_can_import_nested( + def test_can_import( self, fxt_dataset_dir_with_subset_dirs: str, fxt_expected_dataset_with_subsets: Dataset, fxt_import_kwargs: Dict[str, Any], request: pytest.FixtureRequest, ): - subdir_name = "subdir" - subdir = os.path.join(fxt_dataset_dir_with_subset_dirs, subdir_name) - os.makedirs(subdir) - for _file in os.listdir(fxt_dataset_dir_with_subset_dirs): - if _file != subdir_name: - file_path = os.path.join(fxt_dataset_dir_with_subset_dirs, _file) - shutil.move(file_path, subdir) return super().test_can_import( fxt_dataset_dir_with_subset_dirs, fxt_expected_dataset_with_subsets, From 10cd400de227c1b23095334d91a177745ed94de6 Mon Sep 17 00:00:00 2001 From: Ilya Trushkin Date: Thu, 15 Aug 2024 18:37:06 +0300 Subject: [PATCH 4/4] Linter fix Signed-off-by: Ilya Trushkin --- tests/unit/data_formats/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/data_formats/conftest.py b/tests/unit/data_formats/conftest.py index f303102b2d..12351ee037 100644 --- a/tests/unit/data_formats/conftest.py +++ b/tests/unit/data_formats/conftest.py @@ -9,6 +9,7 @@ import pytest from datumaro import Dataset + from tests.utils.test_utils import TestDir