diff --git a/project/tests/test_collector.py b/project/tests/test_collector.py index 9fea56dd..cd00a67b 100644 --- a/project/tests/test_collector.py +++ b/project/tests/test_collector.py @@ -47,22 +47,38 @@ def test_finalise(self): self.assertTrue(content) self.assertGreater(len(content), 0) - def test_profile_file_name(self): - request = RequestMinFactory() - DataCollector().configure(request) - expected_file_name_prefix = request.path.replace('/', '_').lstrip('_') - print(expected_file_name_prefix) + def test_profile_file_name_with_disabled_extended_file_name(self): + SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = False + request_path = 'normal/uri/' + resulting_prefix = self._get_prof_file_name(request_path) + self.assertEqual(resulting_prefix, '') - with self.subTest("With disabled extended file name"): - SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = False - DataCollector().finalise() - file = DataCollector().request.prof_file - result_file_name = file.name.rsplit('/')[-1] - self.assertFalse(result_file_name.startswith(f"{expected_file_name_prefix}_")) + def test_profile_file_name_with_enabled_extended_file_name(self): - with self.subTest("With enabled extended file name"): - SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True - DataCollector().finalise() - file = DataCollector().request.prof_file - result_file_name = file.name.rsplit('/')[-1] - self.assertTrue(result_file_name.startswith(f"{expected_file_name_prefix}_")) + SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True + request_path = 'normal/uri/' + resulting_prefix = self._get_prof_file_name(request_path) + self.assertEqual(resulting_prefix, 'normal_uri_') + + def test_profile_file_name_with_path_traversal_and_special_char(self): + SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True + request_path = 'spÉciàl/.././大/uri/@É/' + resulting_prefix = self._get_prof_file_name(request_path) + self.assertEqual(resulting_prefix, 'special_uri_e_') + + def test_profile_file_name_with_long_path(self): + SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME = True + request_path = 'long/path/' + 'a' * 100 + resulting_prefix = self._get_prof_file_name(request_path) + # the path is limited to 50 char plus the last `_` + self.assertEqual(len(resulting_prefix), 51) + + @classmethod + def _get_prof_file_name(cls, request_path: str) -> str: + request = RequestMinFactory() + request.path = request_path + DataCollector().configure(request) + DataCollector().finalise() + file_path = DataCollector().request.prof_file.name + filename = file_path.rsplit('/')[-1] + return filename.replace(f"{request.id}.prof", "") diff --git a/silk/collector.py b/silk/collector.py index 741d7cb6..79b8c48a 100644 --- a/silk/collector.py +++ b/silk/collector.py @@ -2,6 +2,8 @@ import logging import marshal import pstats +import re +import unicodedata from io import StringIO from threading import local @@ -191,10 +193,28 @@ def finalise(self): def register_silk_query(self, *args): self.register_objects(TYP_SILK_QUERIES, *args) - def _get_proposed_file_name(self): + def _get_proposed_file_name(self) -> str: """Retrieve the profile file name to be proposed to the storage""" if SilkyConfig().SILKY_PYTHON_PROFILER_EXTENDED_FILE_NAME: - request_path = self.request.path.replace('/', '_').lstrip('_') - return f"{request_path}_{str(self.request.id)}.prof" + slugified_path = slugify_path(self.request.path) + return f"{slugified_path}_{str(self.request.id)}.prof" return f"{str(self.request.id)}.prof" + + +def slugify_path(request_path: str) -> str: + """ + Convert any characters not included in [a-zA-Z0-9_]) with a single underscore. + Convert to lowercase. Also strip leading and trailing whitespace, dashes, and + underscores. + + Inspired from django slugify + """ + request_path = str(request_path) + request_path = ( + unicodedata.normalize("NFKD", request_path) + .encode("ascii", "ignore") + .decode("ascii") + ) + request_path = request_path.lower()[:50] + return re.sub(r'\W+', '_', request_path).strip('_')