Skip to content

feat(code-mappings): Add support for stacktrace frames with backslashes for automatic code mappings #68845

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions src/sentry/integrations/utils/code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,19 @@ class UnsupportedFrameFilename(Exception):
class FrameFilename:
def __init__(self, frame_file_path: str) -> None:
self.raw_path = frame_file_path
if frame_file_path[0] == "/":
is_windows_path = False
if "\\" in frame_file_path:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this conflict with the escape character on Linux? e.g: \ as an escape for a space in the path? or is that just a shell thing?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is an interesting edge case! I am admittedly not very knowledgable on Linux or Shell, but a quick search tells me that this seems like more of a shell thing than a Linux thing. I'm less sure about the probability of a backslash escape character being used in a the file path of a stack trace, though the safety checks should catch these and ensure it doesn't result in any faulty code mappings. I'll periodically check the logs generated by those safety checks to see how often this edge case actually occurs and what the best way to resolve them is.

is_windows_path = True
frame_file_path = frame_file_path.replace("\\", "/")

if frame_file_path[0] == "/" or frame_file_path[0] == "\\":
frame_file_path = frame_file_path[1:]

# Using regexes would be better but this is easier to understand
if (
not frame_file_path
or frame_file_path[0] in ["[", "<"]
or frame_file_path.find(" ") > -1
or frame_file_path.find("\\") > -1 # Windows support
or frame_file_path.find("/") == -1
):
raise UnsupportedFrameFilename("This path is not supported.")
Expand All @@ -79,8 +83,20 @@ def __init__(self, frame_file_path: str) -> None:
if not self.extension:
raise UnsupportedFrameFilename("It needs an extension.")

# Remove drive letter if it exists
if is_windows_path and frame_file_path[1] == ":":
frame_file_path = frame_file_path[2:]
# windows drive letters can be like C:\ or C:
# so we need to remove the slash if it exists
if frame_file_path[0] == "/":
frame_file_path = frame_file_path[1:]

start_at_index = get_straight_path_prefix_end_index(frame_file_path)
self.straight_path_prefix = frame_file_path[:start_at_index]

# We normalize the path to be as close to what the path would
# look like in the source code repository, hence why we remove
# the straight path prefix and drive letter
self.normalized_path = frame_file_path[start_at_index:]
if start_at_index == 0:
self.root = frame_file_path.split("/")[0]
Expand Down Expand Up @@ -145,7 +161,7 @@ def list_file_matches(self, frame_filename: FrameFilename) -> list[dict[str, str
)
continue

if stack_path.replace(stack_root, source_root, 1) != source_path:
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.info(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra={
Expand Down Expand Up @@ -259,7 +275,7 @@ def _generate_code_mapping_from_tree(
)
return []

if stack_path.replace(stack_root, source_root, 1) != source_path:
if stack_path.replace(stack_root, source_root, 1).replace("\\", "/") != source_path:
logger.info(
"Unexpected stack_path/source_path found. A code mapping was not generated.",
extra={
Expand Down Expand Up @@ -370,6 +386,10 @@ def convert_stacktrace_frame_path_to_source_path(
If the code mapping does not apply to the frame, returns None.
"""

stack_root = code_mapping.stack_root
if "\\" in code_mapping.stack_root:
stack_root = code_mapping.stack_root.replace("\\", "/")

Comment on lines +389 to +392
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function (convert_stacktrace_frame_path_to_source_path) contains the logic that applies a code mapping to a given stack trace and returns the source path to the GH file.

This needed to be augmented to normalize for backslashes in the stack root, as described in the PR description

# In most cases, code mappings get applied to frame.filename, but some platforms such as Java
# contain folder info in other parts of the frame (e.g. frame.module="com.example.app.MainActivity"
# gets transformed to "com/example/app/MainActivity.java"), so in those cases we use the
Expand All @@ -379,13 +399,13 @@ def convert_stacktrace_frame_path_to_source_path(
)

if stacktrace_path and stacktrace_path.startswith(code_mapping.stack_root):
return stacktrace_path.replace(code_mapping.stack_root, code_mapping.source_root, 1)
return stacktrace_path.replace(stack_root, code_mapping.source_root, 1)

# Some platforms only provide the file's name without folder paths, so we
# need to use the absolute path instead. If the code mapping has a non-empty
# stack_root value and it matches the absolute path, we do the mapping on it.
if frame.abs_path and frame.abs_path.startswith(code_mapping.stack_root):
return frame.abs_path.replace(code_mapping.stack_root, code_mapping.source_root, 1)
return frame.abs_path.replace(stack_root, code_mapping.source_root, 1)

return None

Expand Down Expand Up @@ -505,8 +525,8 @@ def find_roots(stack_path: str, source_path: str) -> tuple[str, str]:
If there is no overlap, raise an exception since this should not happen
"""
stack_root = ""
if stack_path[0] == "/":
stack_root += "/"
if stack_path[0] == "/" or stack_path[0] == "\\":
stack_root += stack_path[0]
stack_path = stack_path[1:]

if stack_path == source_path:
Expand Down Expand Up @@ -534,7 +554,7 @@ def find_roots(stack_path: str, source_path: str) -> tuple[str, str]:
source_root = source_path.rpartition(overlap)[0]
stack_root += stack_path_delim.join(stack_root_items)

if stack_root and stack_root[-1] != SLASH: # append trailing slash
if stack_root and stack_root[-1] != stack_path_delim: # append trailing slash
stack_root = f"{stack_root}{stack_path_delim}"
if source_root and source_root[-1] != SLASH:
source_root = f"{source_root}{SLASH}"
Expand Down
1 change: 0 additions & 1 deletion tests/sentry/integrations/utils/test_code_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
"README", # no extension
"ssl.py",
# XXX: The following will need to be supported
"C:\\Users\\Donia\\AppData\\Roaming\\Adobe\\UXP\\Plugins\\External\\452f92d2_0.13.0\\main.js",
"initialization.dart",
"backburner.js",
]
Expand Down
77 changes: 77 additions & 0 deletions tests/sentry/tasks/test_derive_code_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,83 @@ def test_raises_generic_errors(self, mock_logger):
)


class TestBackSlashDeriveCodeMappings(BaseDeriveCodeMappings):
def setUp(self):
super().setUp()
self.platform = "python"
# The lack of a \ after the drive letter in the third frame signals that
# this is a relative path. This may be unlikely to occur in practice,
# but worth testing nonetheless.
self.event_data = self.generate_data(
[
{"in_app": True, "filename": "\\sentry\\mouse.py"},
{"in_app": True, "filename": "\\sentry\\dog\\cat\\parrot.py"},
{"in_app": True, "filename": "C:sentry\\tasks.py"},
{"in_app": True, "filename": "D:\\Users\\code\\sentry\\models\\release.py"},
]
)

@responses.activate
def test_backslash_filename_simple(self):
repo_name = "foo/bar"
with patch(
"sentry.integrations.github.client.GitHubClientMixin.get_trees_for_org"
) as mock_get_trees_for_org:
mock_get_trees_for_org.return_value = {
repo_name: RepoTree(Repo(repo_name, "master"), ["sentry/mouse.py"])
}
derive_code_mappings(self.project.id, self.event_data)
code_mapping = RepositoryProjectPathConfig.objects.all()[0]
assert code_mapping.stack_root == "\\"
assert code_mapping.source_root == ""
assert code_mapping.repository.name == repo_name

@responses.activate
def test_backslash_drive_letter_filename_simple(self):
repo_name = "foo/bar"
with patch(
"sentry.integrations.github.client.GitHubClientMixin.get_trees_for_org"
) as mock_get_trees_for_org:
mock_get_trees_for_org.return_value = {
repo_name: RepoTree(Repo(repo_name, "master"), ["sentry/tasks.py"])
}
derive_code_mappings(self.project.id, self.event_data)
code_mapping = RepositoryProjectPathConfig.objects.all()[0]
assert code_mapping.stack_root == "C:sentry\\"
assert code_mapping.source_root == "sentry/"
assert code_mapping.repository.name == repo_name

@responses.activate
def test_backslash_drive_letter_filename_monorepo(self):
repo_name = "foo/bar"
with patch(
"sentry.integrations.github.client.GitHubClientMixin.get_trees_for_org"
) as mock_get_trees_for_org:
mock_get_trees_for_org.return_value = {
repo_name: RepoTree(Repo(repo_name, "master"), ["src/sentry/tasks.py"])
}
derive_code_mappings(self.project.id, self.event_data)
code_mapping = RepositoryProjectPathConfig.objects.all()[0]
assert code_mapping.stack_root == "C:sentry\\"
assert code_mapping.source_root == "src/sentry/"
assert code_mapping.repository.name == repo_name

@responses.activate
def test_backslash_drive_letter_filename_abs_path(self):
repo_name = "foo/bar"
with patch(
"sentry.integrations.github.client.GitHubClientMixin.get_trees_for_org"
) as mock_get_trees_for_org:
mock_get_trees_for_org.return_value = {
repo_name: RepoTree(Repo(repo_name, "master"), ["sentry/models/release.py"])
}
derive_code_mappings(self.project.id, self.event_data)
code_mapping = RepositoryProjectPathConfig.objects.all()[0]
assert code_mapping.stack_root == "D:\\Users\\code\\"
assert code_mapping.source_root == ""
assert code_mapping.repository.name == repo_name


class TestJavascriptDeriveCodeMappings(BaseDeriveCodeMappings):
def setUp(self):
super().setUp()
Expand Down
Loading