From dfa32183775fa1661a7aeffbb3c8b12350c59654 Mon Sep 17 00:00:00 2001 From: Armen Zambrano G <44410+armenzg@users.noreply.github.com> Date: Wed, 26 Feb 2025 14:51:05 -0500 Subject: [PATCH 1/2] ref(derived_code_mappings): Simplify FrameFilename Changes included: * Remove some unused properties * Unify code that transforms the file path --- .../endpoints/project_repo_path_parsing.py | 4 +- .../auto_source_code_config/code_mapping.py | 89 ++++++++++--------- .../test_code_mapping.py | 68 +++++++------- 3 files changed, 81 insertions(+), 80 deletions(-) diff --git a/src/sentry/api/endpoints/project_repo_path_parsing.py b/src/sentry/api/endpoints/project_repo_path_parsing.py index d6b7f2056e78f1..caedb4a6326fe9 100644 --- a/src/sentry/api/endpoints/project_repo_path_parsing.py +++ b/src/sentry/api/endpoints/project_repo_path_parsing.py @@ -12,7 +12,7 @@ from sentry.integrations.base import IntegrationFeatures from sentry.integrations.manager import default_manager as integrations from sentry.integrations.services.integration import RpcIntegration, integration_service -from sentry.issues.auto_source_code_config.code_mapping import FrameFilename, find_roots +from sentry.issues.auto_source_code_config.code_mapping import FrameInfo, find_roots from sentry.models.repository import Repository @@ -119,7 +119,7 @@ def post(self, request: Request, project) -> Response: branch = installation.extract_branch_from_source_url(repo, source_url) source_path = installation.extract_source_path_from_source_url(repo, source_url) - stack_root, source_root = find_roots(FrameFilename({"filename": stack_path}), source_path) + stack_root, source_root = find_roots(FrameInfo({"filename": stack_path}), source_path) return self.respond( { diff --git a/src/sentry/issues/auto_source_code_config/code_mapping.py b/src/sentry/issues/auto_source_code_config/code_mapping.py index af52f9cc639095..ebfc61c6385f19 100644 --- a/src/sentry/issues/auto_source_code_config/code_mapping.py +++ b/src/sentry/issues/auto_source_code_config/code_mapping.py @@ -43,7 +43,7 @@ class UnexpectedPathException(Exception): pass -class UnsupportedFrameFilename(Exception): +class UnsupportedFrameInfo(Exception): pass @@ -60,23 +60,16 @@ def derive_code_mappings( return [] trees = installation.get_trees_for_org() trees_helper = CodeMappingTreesHelper(trees) - frame_filename = FrameFilename(frame) + frame_filename = FrameInfo(frame) return trees_helper.list_file_matches(frame_filename) # XXX: Look at sentry.interfaces.stacktrace and maybe use that -class FrameFilename: +class FrameInfo: def __init__(self, frame: Mapping[str, Any]) -> None: # XXX: In the next PR, we will use more than just the filename frame_file_path = frame["filename"] - self.raw_path = frame_file_path - is_windows_path = False - if "\\" in frame_file_path: - is_windows_path = True - frame_file_path = frame_file_path.replace("\\", "/") - - if frame_file_path[0] == "/" or frame_file_path[0] == "\\": - frame_file_path = frame_file_path[1:] + frame_file_path = self.transformations(frame_file_path) # Using regexes would be better but this is easier to understand if ( @@ -85,43 +78,51 @@ def __init__(self, frame: Mapping[str, Any]) -> None: or frame_file_path.find(" ") > -1 or frame_file_path.find("/") == -1 ): - raise UnsupportedFrameFilename("This path is not supported.") + raise UnsupportedFrameInfo("This path is not supported.") - self.full_path = frame_file_path - self.extension = get_extension(frame_file_path) - if not self.extension: + if not get_extension(frame_file_path): raise NeedsExtension("It needs an extension.") - # Remove drive letter if it exists - if is_windows_path and frame_file_path[1] == ":": - frame_file_path = frame_file_path[2:] - # windows drive letters can be like C:\ or C: - # so we need to remove the slash if it exists - if frame_file_path[0] == "/": - frame_file_path = frame_file_path[1:] - start_at_index = get_straight_path_prefix_end_index(frame_file_path) - self.straight_path_prefix = frame_file_path[:start_at_index] # We normalize the path to be as close to what the path would # look like in the source code repository, hence why we remove # the straight path prefix and drive letter self.normalized_path = frame_file_path[start_at_index:] + start_at_index = get_straight_path_prefix_end_index(frame_file_path) if start_at_index == 0: - self.root = frame_file_path.split("/")[0] + self.stack_root = frame_file_path.split("/")[0] else: slash_index = frame_file_path.find("/", start_at_index) - self.root = frame_file_path[0:slash_index] + self.stack_root = frame_file_path[0:slash_index] + + def transformations(self, frame_file_path: str) -> str: + self.raw_path = frame_file_path + self.is_windows_path = False + if "\\" in frame_file_path: + self.is_windows_path = True + frame_file_path = frame_file_path.replace("\\", "/") + + if frame_file_path[0] == "/" or frame_file_path[0] == "\\": + frame_file_path = frame_file_path[1:] + + # Remove drive letter if it exists + if self.is_windows_path and frame_file_path[1] == ":": + frame_file_path = frame_file_path[2:] + # windows drive letters can be like C:\ or C: + # so we need to remove the slash if it exists + if frame_file_path[0] == "/": + frame_file_path = frame_file_path[1:] - self.file_name = frame_file_path.split("/")[-1] + return frame_file_path def __repr__(self) -> str: - return f"FrameFilename: {self.full_path}" + return f"FrameInfo: {self.raw_path}" def __eq__(self, other: object) -> bool: - if not isinstance(other, FrameFilename): + if not isinstance(other, FrameInfo): return False - return self.full_path == other.full_path + return self.raw_path == other.raw_path # call generate_code_mappings() after you initialize CodeMappingTreesHelper @@ -135,7 +136,7 @@ def generate_code_mappings(self, frames: Sequence[Mapping[str, Any]]) -> list[Co # We need to make sure that calling this method with a new list of stack traces # should always start with a clean slate self.code_mappings = {} - buckets: dict[str, list[FrameFilename]] = self._stacktrace_buckets(frames) + buckets: dict[str, list[FrameInfo]] = self._stacktrace_buckets(frames) # We reprocess stackframes until we are told that no code mappings were produced # This is order to reprocess past stackframes in light of newly discovered code mappings @@ -148,7 +149,7 @@ def generate_code_mappings(self, frames: Sequence[Mapping[str, Any]]) -> list[Co return list(self.code_mappings.values()) - def list_file_matches(self, frame_filename: FrameFilename) -> list[dict[str, str]]: + def list_file_matches(self, frame_filename: FrameInfo) -> list[dict[str, str]]: """List all the files in a repo that match the frame_filename""" file_matches = [] for repo_full_name in self.trees.keys(): @@ -190,22 +191,22 @@ def list_file_matches(self, frame_filename: FrameFilename) -> list[dict[str, str def _stacktrace_buckets( self, frames: Sequence[Mapping[str, Any]] - ) -> dict[str, list[FrameFilename]]: + ) -> dict[str, list[FrameInfo]]: """Groups stacktraces into buckets based on the root of the stacktrace path""" - buckets: defaultdict[str, list[FrameFilename]] = defaultdict(list) + buckets: defaultdict[str, list[FrameInfo]] = defaultdict(list) for frame in frames: try: - frame_filename = FrameFilename(frame) + frame_filename = FrameInfo(frame) # Any files without a top directory will be grouped together - buckets[frame_filename.root].append(frame_filename) - except UnsupportedFrameFilename: + buckets[frame_filename.stack_root].append(frame_filename) + except UnsupportedFrameInfo: logger.warning("Frame's filepath not supported: %s", frame.get("filename")) except Exception: logger.exception("Unable to split stacktrace path into buckets") return buckets - def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameFilename]]) -> bool: + def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameInfo]]) -> bool: """This processes all stackframes and returns if a new code mapping has been generated""" reprocess = False for stackframe_root, stackframes in buckets.items(): @@ -219,7 +220,7 @@ def _process_stackframes(self, buckets: Mapping[str, Sequence[FrameFilename]]) - self.code_mappings[stackframe_root] = code_mapping return reprocess - def _find_code_mapping(self, frame_filename: FrameFilename) -> CodeMapping | None: + def _find_code_mapping(self, frame_filename: FrameInfo) -> CodeMapping | None: """Look for the file path through all the trees and a generate code mapping for it if a match is found""" code_mappings: list[CodeMapping] = [] # XXX: This will need optimization by changing the data structure of the trees @@ -238,11 +239,11 @@ def _find_code_mapping(self, frame_filename: FrameFilename) -> CodeMapping | Non logger.exception("Unexpected error. Processing continues.") if len(code_mappings) == 0: - logger.warning("No files matched for %s", frame_filename.full_path) + logger.warning("No files matched for %s", frame_filename.raw_path) return None # This means that the file has been found in more than one repo elif len(code_mappings) > 1: - logger.warning("More than one repo matched %s", frame_filename.full_path) + logger.warning("More than one repo matched %s", frame_filename.raw_path) return None return code_mappings[0] @@ -250,7 +251,7 @@ def _find_code_mapping(self, frame_filename: FrameFilename) -> CodeMapping | Non def _generate_code_mapping_from_tree( self, repo_tree: RepoTree, - frame_filename: FrameFilename, + frame_filename: FrameInfo, ) -> list[CodeMapping]: """ Finds a match in the repo tree and generates a code mapping for it. At most one code mapping is generated, if any. @@ -291,7 +292,7 @@ def _generate_code_mapping_from_tree( ) ] - def _is_potential_match(self, src_file: str, frame_filename: FrameFilename) -> bool: + def _is_potential_match(self, src_file: str, frame_filename: FrameInfo) -> bool: """ Tries to see if the stacktrace without the root matches the file from the source code. Use existing code mappings to exclude some source files @@ -476,7 +477,7 @@ def get_straight_path_prefix_end_index(file_path: str) -> int: return index -def find_roots(frame_filename: FrameFilename, source_path: str) -> tuple[str, str]: +def find_roots(frame_filename: FrameInfo, source_path: str) -> tuple[str, str]: """ Returns a tuple containing the stack_root, and the source_root. If there is no overlap, raise an exception since this should not happen diff --git a/tests/sentry/issues/auto_source_code_config/test_code_mapping.py b/tests/sentry/issues/auto_source_code_config/test_code_mapping.py index ba5801a04e1b5e..9bfd8f640039e1 100644 --- a/tests/sentry/issues/auto_source_code_config/test_code_mapping.py +++ b/tests/sentry/issues/auto_source_code_config/test_code_mapping.py @@ -14,10 +14,10 @@ from sentry.issues.auto_source_code_config.code_mapping import ( CodeMapping, CodeMappingTreesHelper, - FrameFilename, + FrameInfo, NeedsExtension, UnexpectedPathException, - UnsupportedFrameFilename, + UnsupportedFrameInfo, convert_stacktrace_frame_path_to_source_path, find_roots, get_sorted_code_mapping_configs, @@ -102,51 +102,51 @@ def test_buckets_logic() -> None: helper = CodeMappingTreesHelper({}) buckets = helper._stacktrace_buckets(frames) assert buckets == { - "./app": [FrameFilename({"filename": "./app/utils/handleXhrErrorResponse.tsx"})], - "app:": [FrameFilename({"filename": "app://foo.js"})], - "cronscripts": [FrameFilename({"filename": "/cronscripts/monitoringsync.php"})], - "getsentry": [FrameFilename({"filename": "getsentry/billing/tax/manager.py"})], + "./app": [FrameInfo({"filename": "./app/utils/handleXhrErrorResponse.tsx"})], + "app:": [FrameInfo({"filename": "app://foo.js"})], + "cronscripts": [FrameInfo({"filename": "/cronscripts/monitoringsync.php"})], + "getsentry": [FrameInfo({"filename": "getsentry/billing/tax/manager.py"})], } -class TestFrameFilename: +class TestFrameInfo: def test_frame_filename_repr(self) -> None: path = "getsentry/billing/tax/manager.py" - assert FrameFilename({"filename": path}).__repr__() == f"FrameFilename: {path}" + assert FrameInfo({"filename": path}).__repr__() == f"FrameInfo: {path}" def test_raises_unsupported(self) -> None: for filepath in UNSUPPORTED_FRAME_FILENAMES: - with pytest.raises(UnsupportedFrameFilename): - FrameFilename({"filename": filepath}) + with pytest.raises(UnsupportedFrameInfo): + FrameInfo({"filename": filepath}) def test_raises_no_extension(self) -> None: for filepath in NO_EXTENSION_FRAME_FILENAMES: with pytest.raises(NeedsExtension): - FrameFilename({"filename": filepath}) + FrameInfo({"filename": filepath}) @pytest.mark.parametrize( "frame_filename, prefix", [ pytest.param( - FrameFilename({"filename": "app:///utils/something.py"}), - "app:///", + FrameInfo({"filename": "app:///utils/something.py"}), + "app:///utils", ), pytest.param( - FrameFilename({"filename": "./app/utils/something.py"}), - "./", + FrameInfo({"filename": "./app/utils/something.py"}), + "./app", ), pytest.param( - FrameFilename({"filename": "../../../../../../packages/something.py"}), - "../../../../../../", + FrameInfo({"filename": "../../../../../../packages/something.py"}), + "../../../../../../packages", ), pytest.param( - FrameFilename({"filename": "app:///../services/something.py"}), - "app:///../", + FrameInfo({"filename": "app:///../services/something.py"}), + "app:///../services", ), ], ) - def test_straight_path_prefix(self, frame_filename: FrameFilename, prefix: str) -> None: - assert frame_filename.straight_path_prefix == prefix + def test_straight_path_prefix(self, frame_filename: FrameInfo, prefix: str) -> None: + assert frame_filename.stack_root == prefix class TestDerivedCodeMappings(TestCase): @@ -179,7 +179,7 @@ def test_package_also_matches(self) -> None: # We create a new tree helper in order to improve the understability of this test cmh = CodeMappingTreesHelper({self.foo_repo.name: repo_tree}) cm = cmh._generate_code_mapping_from_tree( - repo_tree=repo_tree, frame_filename=FrameFilename({"filename": "raven/base.py"}) + repo_tree=repo_tree, frame_filename=FrameInfo({"filename": "raven/base.py"}) ) # We should not derive a code mapping since the package name does not match assert cm == [] @@ -261,7 +261,7 @@ def test_more_than_one_repo_match(self, logger: Any) -> None: logger.warning.assert_called_with("More than one repo matched %s", "sentry/web/urls.py") def test_list_file_matches_single(self) -> None: - frame_filename = FrameFilename({"filename": "sentry_plugins/slack/client.py"}) + frame_filename = FrameInfo({"filename": "sentry_plugins/slack/client.py"}) matches = self.code_mapping_helper.list_file_matches(frame_filename) expected_matches = [ { @@ -275,7 +275,7 @@ def test_list_file_matches_single(self) -> None: assert matches == expected_matches def test_list_file_matches_multiple(self) -> None: - frame_filename = FrameFilename({"filename": "sentry/web/urls.py"}) + frame_filename = FrameInfo({"filename": "sentry/web/urls.py"}) matches = self.code_mapping_helper.list_file_matches(frame_filename) expected_matches = [ { @@ -297,49 +297,49 @@ def test_list_file_matches_multiple(self) -> None: def test_find_roots_starts_with_period_slash(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "./app/foo.tsx"}), "static/app/foo.tsx" + FrameInfo({"filename": "./app/foo.tsx"}), "static/app/foo.tsx" ) assert stacktrace_root == "./" assert source_path == "static/" def test_find_roots_starts_with_period_slash_no_containing_directory(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "./app/foo.tsx"}), "app/foo.tsx" + FrameInfo({"filename": "./app/foo.tsx"}), "app/foo.tsx" ) assert stacktrace_root == "./" assert source_path == "" def test_find_roots_not_matching(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "sentry/foo.py"}), "src/sentry/foo.py" + FrameInfo({"filename": "sentry/foo.py"}), "src/sentry/foo.py" ) assert stacktrace_root == "sentry/" assert source_path == "src/sentry/" def test_find_roots_equal(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "source/foo.py"}), "source/foo.py" + FrameInfo({"filename": "source/foo.py"}), "source/foo.py" ) assert stacktrace_root == "" assert source_path == "" def test_find_roots_starts_with_period_slash_two_levels(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "./app/foo.tsx"}), "app/foo/app/foo.tsx" + FrameInfo({"filename": "./app/foo.tsx"}), "app/foo/app/foo.tsx" ) assert stacktrace_root == "./" assert source_path == "app/foo/" def test_find_roots_starts_with_app(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "app:///utils/foo.tsx"}), "utils/foo.tsx" + FrameInfo({"filename": "app:///utils/foo.tsx"}), "utils/foo.tsx" ) assert stacktrace_root == "app:///" assert source_path == "" def test_find_roots_starts_with_multiple_dot_dot_slash(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "../../../../../../packages/foo.tsx"}), + FrameInfo({"filename": "../../../../../../packages/foo.tsx"}), "packages/foo.tsx", ) assert stacktrace_root == "../../../../../../" @@ -347,7 +347,7 @@ def test_find_roots_starts_with_multiple_dot_dot_slash(self) -> None: def test_find_roots_starts_with_app_dot_dot_slash(self) -> None: stacktrace_root, source_path = find_roots( - FrameFilename({"filename": "app:///../services/foo.tsx"}), + FrameInfo({"filename": "app:///../services/foo.tsx"}), "services/foo.tsx", ) assert stacktrace_root == "app:///../" @@ -356,14 +356,14 @@ def test_find_roots_starts_with_app_dot_dot_slash(self) -> None: def test_find_roots_bad_stack_path(self) -> None: with pytest.raises(UnexpectedPathException): find_roots( - FrameFilename({"filename": "https://yrurlsinyourstackpath.com/"}), + FrameInfo({"filename": "https://yrurlsinyourstackpath.com/"}), "sentry/something.py", ) def test_find_roots_bad_source_path(self) -> None: with pytest.raises(UnexpectedPathException): find_roots( - FrameFilename({"filename": "sentry/random.py"}), + FrameInfo({"filename": "sentry/random.py"}), "nothing/something.js", ) From 71a37c7f8a62f78d395ed4345db5850b45f43037 Mon Sep 17 00:00:00 2001 From: Armen Zambrano G <44410+armenzg@users.noreply.github.com> Date: Wed, 26 Feb 2025 15:13:24 -0500 Subject: [PATCH 2/2] Minor changes --- src/sentry/issues/auto_source_code_config/code_mapping.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sentry/issues/auto_source_code_config/code_mapping.py b/src/sentry/issues/auto_source_code_config/code_mapping.py index ebfc61c6385f19..a36e46e5295912 100644 --- a/src/sentry/issues/auto_source_code_config/code_mapping.py +++ b/src/sentry/issues/auto_source_code_config/code_mapping.py @@ -89,7 +89,6 @@ def __init__(self, frame: Mapping[str, Any]) -> None: # look like in the source code repository, hence why we remove # the straight path prefix and drive letter self.normalized_path = frame_file_path[start_at_index:] - start_at_index = get_straight_path_prefix_end_index(frame_file_path) if start_at_index == 0: self.stack_root = frame_file_path.split("/")[0] else: @@ -98,16 +97,16 @@ def __init__(self, frame: Mapping[str, Any]) -> None: def transformations(self, frame_file_path: str) -> str: self.raw_path = frame_file_path - self.is_windows_path = False + is_windows_path = False if "\\" in frame_file_path: - self.is_windows_path = True + is_windows_path = True frame_file_path = frame_file_path.replace("\\", "/") if frame_file_path[0] == "/" or frame_file_path[0] == "\\": frame_file_path = frame_file_path[1:] # Remove drive letter if it exists - if self.is_windows_path and frame_file_path[1] == ":": + if is_windows_path and frame_file_path[1] == ":": frame_file_path = frame_file_path[2:] # windows drive letters can be like C:\ or C: # so we need to remove the slash if it exists