Skip to content

Commit

Permalink
Fix fileSec group sorting with non default groups
Browse files Browse the repository at this point in the history
  • Loading branch information
replaceafill committed Jan 31, 2024
1 parent 5e055b0 commit 9c51730
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 17 deletions.
32 changes: 15 additions & 17 deletions metsrw/mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
from . import metadata
from . import utils

# This package


LOGGER = logging.getLogger(__name__)

Expand All @@ -24,6 +22,17 @@
)
TRANSFORM_PREFIX = "TRANSFORM"
TRANSFORM_PREFIX_LEN = len(TRANSFORM_PREFIX)
DEFAULT_FILESEC_GROUPS_ORDER = [
"original",
"submissionDocumentation",
"preservation",
"service",
"access",
"license",
"text/ocr",
"metadata",
"derivative",
]


class METSDocument:
Expand Down Expand Up @@ -312,28 +321,17 @@ def _filesec(self, files=None):
return filesec

def _sort_filegrps(self, filegrps):
uses_order = [
"original",
"submissionDocumentation",
"preservation",
"service",
"access",
"license",
"text/ocr",
"metadata",
"derivative",
]
result = []
count = len(filegrps)
default_groups_count = len(DEFAULT_FILESEC_GROUPS_ORDER)
for i, use in enumerate(filegrps.keys()):
filegrp = filegrps[use]
try:
filegrp_position = uses_order.index(use)
filegrp_position = DEFAULT_FILESEC_GROUPS_ORDER.index(use)
except ValueError:
filegrp_position = count + i
filegrp_position = default_groups_count + i
result.append((filegrp_position, filegrp))

return [v for i, v in sorted(result)]
return [v for _, v in sorted(result, key=lambda i: i[0])]

def serialize(self, fully_qualified=True, normative_structmap=True):
"""
Expand Down
47 changes: 47 additions & 0 deletions tests/test_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1169,3 +1169,50 @@ def test_dspace_filegrp_sorting_in_filesec(self):
def test_get_subsections_counts(mets_path, expected_counts):
mw = metsrw.METSDocument().fromfile(mets_path)
assert mw.get_subsections_counts() == expected_counts


@pytest.mark.parametrize(
"file_group_uses,expected_uses_order",
[
(
[
"unknown2",
"original",
"unknown1",
"text/ocr",
],
[
"original",
"text/ocr",
"unknown2",
"unknown1",
],
),
(
[
"unknown2",
"unknown1",
"original",
"unknown3",
],
[
"original",
"unknown2",
"unknown1",
"unknown3",
],
),
],
)
def test_filegrp_sorting_returns_non_default_groups(
file_group_uses, expected_uses_order
):
file_groups = {
use: etree.Element(metsrw.utils.lxmlns("mets") + "fileGrp", USE=use)
for use in file_group_uses
}

mw = metsrw.METSDocument()
result = mw._sort_filegrps(file_groups)

assert [g.attrib["USE"] for g in result] == expected_uses_order

0 comments on commit 9c51730

Please # to comment.