From 9c517309f3b8abbe69544d53917ef6b5885695f1 Mon Sep 17 00:00:00 2001 From: "Douglas Cerna (Soy Douglas)" Date: Wed, 31 Jan 2024 23:35:52 +0100 Subject: [PATCH] Fix fileSec group sorting with non default groups --- metsrw/mets.py | 32 +++++++++++++++---------------- tests/test_mets.py | 47 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/metsrw/mets.py b/metsrw/mets.py index dfc6782..890234b 100755 --- a/metsrw/mets.py +++ b/metsrw/mets.py @@ -12,8 +12,6 @@ from . import metadata from . import utils -# This package - LOGGER = logging.getLogger(__name__) @@ -24,6 +22,17 @@ ) TRANSFORM_PREFIX = "TRANSFORM" TRANSFORM_PREFIX_LEN = len(TRANSFORM_PREFIX) +DEFAULT_FILESEC_GROUPS_ORDER = [ + "original", + "submissionDocumentation", + "preservation", + "service", + "access", + "license", + "text/ocr", + "metadata", + "derivative", +] class METSDocument: @@ -312,28 +321,17 @@ def _filesec(self, files=None): return filesec def _sort_filegrps(self, filegrps): - uses_order = [ - "original", - "submissionDocumentation", - "preservation", - "service", - "access", - "license", - "text/ocr", - "metadata", - "derivative", - ] result = [] - count = len(filegrps) + default_groups_count = len(DEFAULT_FILESEC_GROUPS_ORDER) for i, use in enumerate(filegrps.keys()): filegrp = filegrps[use] try: - filegrp_position = uses_order.index(use) + filegrp_position = DEFAULT_FILESEC_GROUPS_ORDER.index(use) except ValueError: - filegrp_position = count + i + filegrp_position = default_groups_count + i result.append((filegrp_position, filegrp)) - return [v for i, v in sorted(result)] + return [v for _, v in sorted(result, key=lambda i: i[0])] def serialize(self, fully_qualified=True, normative_structmap=True): """ diff --git a/tests/test_mets.py b/tests/test_mets.py index 09f6175..17ef471 100644 --- a/tests/test_mets.py +++ b/tests/test_mets.py @@ -1169,3 +1169,50 @@ def test_dspace_filegrp_sorting_in_filesec(self): def test_get_subsections_counts(mets_path, expected_counts): mw = metsrw.METSDocument().fromfile(mets_path) assert mw.get_subsections_counts() == expected_counts + + +@pytest.mark.parametrize( + "file_group_uses,expected_uses_order", + [ + ( + [ + "unknown2", + "original", + "unknown1", + "text/ocr", + ], + [ + "original", + "text/ocr", + "unknown2", + "unknown1", + ], + ), + ( + [ + "unknown2", + "unknown1", + "original", + "unknown3", + ], + [ + "original", + "unknown2", + "unknown1", + "unknown3", + ], + ), + ], +) +def test_filegrp_sorting_returns_non_default_groups( + file_group_uses, expected_uses_order +): + file_groups = { + use: etree.Element(metsrw.utils.lxmlns("mets") + "fileGrp", USE=use) + for use in file_group_uses + } + + mw = metsrw.METSDocument() + result = mw._sort_filegrps(file_groups) + + assert [g.attrib["USE"] for g in result] == expected_uses_order