Skip to content

Commit 9710589

Browse files
committed
🚧 Use the parameterized URL for checking similarity
1 parent 7dca844 commit 9710589

File tree

1 file changed

+3
-60
lines changed

1 file changed

+3
-60
lines changed

src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py

Lines changed: 3 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
from __future__ import annotations
22

3-
import hashlib
43
import os
54
from collections import defaultdict
6-
from collections.abc import Mapping, Sequence
5+
from collections.abc import Mapping
76
from datetime import timedelta
87
from typing import Any
98
from urllib.parse import parse_qs, urlparse
109

11-
from django.utils.encoding import force_bytes
12-
1310
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsGroupType
1411
from sentry.issues.issue_occurrence import IssueEvidence
1512
from sentry.models.organization import Organization
@@ -51,7 +48,6 @@ def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) ->
5148
# TODO: Only store the span IDs and timestamps instead of entire span objects
5249
self.stored_problems: PerformanceProblemsMap = {}
5350
self.spans: list[Span] = []
54-
self.span_hashes: dict[str, str | None] = {}
5551

5652
def visit_span(self, span: Span) -> None:
5753
if not NPlusOneAPICallsDetector.is_span_eligible(span):
@@ -61,8 +57,6 @@ def visit_span(self, span: Span) -> None:
6157
if op not in self.settings.get("allowed_span_ops", []):
6258
return
6359

64-
self.span_hashes[span["span_id"]] = get_span_hash(span)
65-
6660
previous_span = self.spans[-1] if len(self.spans) > 0 else None
6761

6862
if previous_span is None:
@@ -250,62 +244,11 @@ def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
250244

251245
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
252246
return (
253-
self.span_hashes[span_a["span_id"]] == self.span_hashes[span_b["span_id"]]
247+
parameterize_url(get_url_from_span(span_a))
248+
== parameterize_url(get_url_from_span(span_b))
254249
and span_a["parent_span_id"] == span_b["parent_span_id"]
255250
)
256251

257252

258-
HTTP_METHODS = {
259-
"GET",
260-
"HEAD",
261-
"POST",
262-
"PUT",
263-
"DELETE",
264-
"CONNECT",
265-
"OPTIONS",
266-
"TRACE",
267-
"PATCH",
268-
}
269-
270-
271-
def get_span_hash(span: Span) -> str | None:
272-
if span.get("op") != "http.client":
273-
return span.get("hash")
274-
275-
parts = remove_http_client_query_string_strategy(span)
276-
if not parts:
277-
return None
278-
279-
hash = hashlib.md5()
280-
for part in parts:
281-
hash.update(force_bytes(part, errors="replace"))
282-
283-
return hash.hexdigest()[:16]
284-
285-
286-
def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None:
287-
"""
288-
This is an inline version of the `http.client` parameterization code in
289-
`"default:2022-10-27"`, the default span grouping strategy at time of
290-
writing. It's inlined here to insulate this detector from changes in the
291-
strategy, which are coming soon.
292-
"""
293-
294-
# Check the description is of the form `<HTTP METHOD> <URL>`
295-
description = span.get("description") or ""
296-
parts = description.split(" ", 1)
297-
if len(parts) != 2:
298-
return None
299-
300-
# Ensure that this is a valid http method
301-
method, url_str = parts
302-
method = method.upper()
303-
if method not in HTTP_METHODS:
304-
return None
305-
306-
url = urlparse(url_str)
307-
return [method, url.scheme, url.netloc, url.path]
308-
309-
310253
def without_query_params(url: str) -> str:
311254
return urlparse(url)._replace(query="").geturl()

0 commit comments

Comments
 (0)