From a18e7bafda68ed9202f498827b03b1d7b6c76dff Mon Sep 17 00:00:00 2001 From: Karl Besser Date: Fri, 5 Jul 2024 17:57:48 -0400 Subject: [PATCH] Fix IndexError when abbrv is longer than original In some cases, there is a mismatch between abbreviation and original, where a dot is added to an unabbreviated word, e.g., "Control". If this occurs, the dot is removed and the abbreviation is reduced to the length of the original word. --- pyiso4/ltwa.py | 3 +++ tests/tests.tsv | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/pyiso4/ltwa.py b/pyiso4/ltwa.py index bd6311b..8ff4f5d 100644 --- a/pyiso4/ltwa.py +++ b/pyiso4/ltwa.py @@ -177,6 +177,9 @@ def match_capitalization_and_diacritic(abbrv: str, original: str) -> str: """Matches the capitalization and diacritics of the `original` word, as long as they are similar """ + if len(abbrv) > len(original): + abbrv = abbrv[:len(original)] + normalized_abbrv = list(normalize(abbrv, Level.SOFT)) for i, c in enumerate(normalized_abbrv): unided = unidecode(original[i]) diff --git a/tests/tests.tsv b/tests/tests.tsv index 5ffc0ab..bcea7ee 100644 --- a/tests/tests.tsv +++ b/tests/tests.tsv @@ -40,4 +40,6 @@ Zeitschrift des Deutschen Palästina-Vereins Z. Dtsch. Paläst.-Ver. International Journal of e-Collaboration Int. J. e-Collab. Proceedings of A. Razmadze Mathematical Institute Proc. A. Razmadze Math. Inst. Norsk Militært Tidsskrift Nor. Mil. Tidsskr. -Proceedings of the 2024 Conference on Science Proc. 2024 Conf. Sci. \ No newline at end of file +Proceedings of the 2024 Conference on Science Proc. 2024 Conf. Sci. +IEEE Power and Energy Magazine IEEE Power Energy Mag. +IEEE Transactions on Automatic Control IEEE Trans. Autom. Control