From ea299dcb38ad78c9b3de961e88da214ccadd31be Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Wed, 3 Jul 2024 16:39:36 -0500 Subject: [PATCH] fix: Ignore .pth files that are not utf-8 encoded Issue-300: https://github.com/mkdocstrings/griffe/issues/300 PR-301: https://github.com/mkdocstrings/griffe/pull/301 --- src/griffe/finder.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/griffe/finder.py b/src/griffe/finder.py index d3de5d5e..1eb66dc2 100644 --- a/src/griffe/finder.py +++ b/src/griffe/finder.py @@ -439,8 +439,15 @@ def _handle_pth_file(path: Path) -> list[_SP]: # No item is added to sys.path more than once. # Blank lines and lines beginning with # are skipped. # Lines starting with import (followed by space or tab) are executed. - directories = [] - for line in path.read_text(encoding="utf8").strip().replace(";", "\n").splitlines(keepends=False): + directories: list[_SP] = [] + try: + # It turns out PyTorch recommends its users to use `.pth` as the extension + # when saving models on the disk. These model files are not encoded in UTF8. + # If UTF8 decoding fails, we skip the .pth file. + text = path.read_text(encoding="utf8") + except UnicodeDecodeError: + return directories + for line in text.strip().replace(";", "\n").splitlines(keepends=False): line = line.strip() # noqa: PLW2901 if _re_import_line.match(line): editable_module = path.parent / f"{line[len('import'):].lstrip()}.py"