From 61853c74ed2790a99474b6e4e2a8f1efc762dd3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20P=C5=99ikryl?= <2625825+petrprikryl@users.noreply.github.com> Date: Wed, 30 Oct 2024 00:02:32 +0100 Subject: [PATCH] Fix compression speed gains (#616) --------- Co-authored-by: petr.prikryl Co-authored-by: Adam Johnson --- docs/changelog.rst | 8 ++++++++ src/whitenoise/compress.py | 13 ++++++------- src/whitenoise/storage.py | 10 +++++++--- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 56468335..02937a51 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -2,6 +2,14 @@ Changelog ========= +Unreleased +---------- + +* Fix compression speed gains for the thread pool when running Django’s ``collectstatic``. + The thread pool had no effect due to use of a generator for the results, a refactoring introduced when reviewing the initial PR. + + Thanks to Petr Přikryl for the investigation and fix in `PR #616 `__. + 6.8.1 (2024-10-28) ------------------ diff --git a/src/whitenoise/compress.py b/src/whitenoise/compress.py index ff316d35..9550f6b4 100644 --- a/src/whitenoise/compress.py +++ b/src/whitenoise/compress.py @@ -79,7 +79,8 @@ def should_compress(self, filename): def log(self, message): pass - def _lazy_compress(self, path): + def compress(self, path): + filenames = [] with open(path, "rb") as f: stat_result = os.fstat(f.fileno()) data = f.read() @@ -87,17 +88,15 @@ def _lazy_compress(self, path): if self.use_brotli: compressed = self.compress_brotli(data) if self.is_compressed_effectively("Brotli", path, size, compressed): - yield self.write_data(path, compressed, ".br", stat_result) + filenames.append(self.write_data(path, compressed, ".br", stat_result)) else: # If Brotli compression wasn't effective gzip won't be either - return + return filenames if self.use_gzip: compressed = self.compress_gzip(data) if self.is_compressed_effectively("Gzip", path, size, compressed): - yield self.write_data(path, compressed, ".gz", stat_result) - - def compress(self, path): - return list(self._lazy_compress(path)) + filenames.append(self.write_data(path, compressed, ".gz", stat_result)) + return filenames @staticmethod def compress_gzip(data): diff --git a/src/whitenoise/storage.py b/src/whitenoise/storage.py index 6fd58d13..820c909d 100644 --- a/src/whitenoise/storage.py +++ b/src/whitenoise/storage.py @@ -35,11 +35,13 @@ def post_process( self.compressor = self.create_compressor(extensions=extensions, quiet=True) def _compress_path(path: str) -> Generator[tuple[str, str, bool]]: + compressed: list[tuple[str, str, bool]] = [] full_path = self.path(path) prefix_len = len(full_path) - len(path) for compressed_path in self.compressor.compress(full_path): compressed_name = compressed_path[prefix_len:] - yield (path, compressed_name, True) + compressed.append((path, compressed_name, True)) + return compressed with ThreadPoolExecutor() as executor: futures = ( @@ -142,12 +144,14 @@ def compress_files(self, paths): extensions = getattr(settings, "WHITENOISE_SKIP_COMPRESS_EXTENSIONS", None) self.compressor = self.create_compressor(extensions=extensions, quiet=True) - def _compress_path(path: str) -> Generator[tuple[str, str]]: + def _compress_path(path: str) -> list[tuple[str, str]]: + compressed: list[tuple[str, str]] = [] full_path = self.path(path) prefix_len = len(full_path) - len(path) for compressed_path in self.compressor.compress(full_path): compressed_name = compressed_path[prefix_len:] - yield (path, compressed_name) + compressed.append((path, compressed_name)) + return compressed with ThreadPoolExecutor() as executor: futures = (