From 250d757bef0e5e4967a9093bb38e40be60ed5f0b Mon Sep 17 00:00:00 2001 From: Lucain Date: Fri, 25 Nov 2022 17:57:49 +0100 Subject: [PATCH] Set correct permissions on blob file (#1220) * Set correct permissions on blob file * Hack to get current umask thread-safe --- src/huggingface_hub/file_download.py | 31 ++++++++++++++++++++++++++-- tests/test_file_download.py | 20 ++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/huggingface_hub/file_download.py b/src/huggingface_hub/file_download.py index e3f415e807..02b3ba3ec8 100644 --- a/src/huggingface_hub/file_download.py +++ b/src/huggingface_hub/file_download.py @@ -5,7 +5,9 @@ import os import re import shutil +import stat import tempfile +import uuid import warnings from contextlib import contextmanager from dataclasses import dataclass @@ -747,7 +749,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]: ) logger.info("storing %s in cache at %s", url, cache_path) - os.replace(temp_file.name, cache_path) + _chmod_and_replace(temp_file.name, cache_path) if force_filename is None: logger.info("creating metadata file for %s", cache_path) @@ -1246,7 +1248,7 @@ def _resumable_file_manager() -> Generator[io.BufferedWriter, None, None]: ) logger.info("storing %s in cache at %s", url, blob_path) - os.replace(temp_file.name, blob_path) + _chmod_and_replace(temp_file.name, blob_path) logger.info("creating pointer to %s from %s", blob_path, pointer_path) _create_relative_symlink(blob_path, pointer_path, new_blob=True) @@ -1398,3 +1400,28 @@ def _int_or_none(value: Optional[str]) -> Optional[int]: return int(value) # type: ignore except (TypeError, ValueError): return None + + +def _chmod_and_replace(src: str, dst: str) -> None: + """Set correct permission before moving a blob from tmp directory to cache dir. + + Do not take into account the `umask` from the process as there is no convenient way + to get it that is thread-safe. + + See: + - About umask: https://docs.python.org/3/library/os.html#os.umask + - Thread-safety: https://stackoverflow.com/a/70343066 + - About solution: https://github.com/huggingface/huggingface_hub/pull/1220#issuecomment-1326211591 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1141 + - Fix issue: https://github.com/huggingface/huggingface_hub/issues/1215 + """ + # Get umask by creating a temporary file in the cached repo folder. + tmp_file = Path(dst).parent.parent / f"tmp_{uuid.uuid4()}" + try: + tmp_file.touch() + cache_dir_mode = Path(tmp_file).stat().st_mode + os.chmod(src, stat.S_IMODE(cache_dir_mode)) + finally: + tmp_file.unlink() + + os.replace(src, dst) diff --git a/tests/test_file_download.py b/tests/test_file_download.py index aff772234b..40cad9121c 100644 --- a/tests/test_file_download.py +++ b/tests/test_file_download.py @@ -13,6 +13,7 @@ # limitations under the License. import os import re +import stat import unittest from pathlib import Path from tempfile import TemporaryDirectory @@ -223,6 +224,25 @@ def test_dataset_lfs_object(self): (url, '"95aa6a52d5d6a735563366753ca50492a658031da74f301ac5238b03966972c9"'), ) + def test_hf_hub_download_custom_cache_permission(self): + """Checks `hf_hub_download` respect the cache dir permission. + + Regression test for #1141 #1215. + https://github.com/huggingface/huggingface_hub/issues/1141 + https://github.com/huggingface/huggingface_hub/issues/1215 + """ + with TemporaryDirectory() as tmpdir: + # Equivalent to umask u=rwx,g=r,o= + previous_umask = os.umask(0o037) + try: + filepath = hf_hub_download( + DUMMY_RENAMED_OLD_MODEL_ID, "config.json", cache_dir=tmpdir + ) + # Permissions are honored (640: u=rw,g=r,o=) + self.assertEqual(stat.S_IMODE(os.stat(filepath).st_mode), 0o640) + finally: + os.umask(previous_umask) + def test_download_from_a_renamed_repo_with_hf_hub_download(self): """Checks `hf_hub_download` works also on a renamed repo.