Skip to content

Commit f2f114f

Browse files
committed
🚧 Use the parameterized URL for checking similarity
1 parent 9099805 commit f2f114f

File tree

1 file changed

+3
-60
lines changed

1 file changed

+3
-60
lines changed

src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py

Lines changed: 3 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
from __future__ import annotations
22

3-
import hashlib
43
import os
54
from collections import defaultdict
6-
from collections.abc import Mapping, Sequence
5+
from collections.abc import Mapping
76
from datetime import timedelta
87
from typing import Any
98
from urllib.parse import parse_qs, urlparse
109

11-
from django.utils.encoding import force_bytes
12-
1310
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsGroupType
1411
from sentry.issues.issue_occurrence import IssueEvidence
1512
from sentry.models.organization import Organization
@@ -53,7 +50,6 @@ def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) ->
5350
# TODO: Only store the span IDs and timestamps instead of entire span objects
5451
self.stored_problems: PerformanceProblemsMap = {}
5552
self.spans: list[Span] = []
56-
self.span_hashes: dict[str, str | None] = {}
5753

5854
def visit_span(self, span: Span) -> None:
5955
if not NPlusOneAPICallsDetector.is_span_eligible(span):
@@ -63,8 +59,6 @@ def visit_span(self, span: Span) -> None:
6359
if op not in self.settings.get("allowed_span_ops", []):
6460
return
6561

66-
self.span_hashes[span["span_id"]] = get_span_hash(span)
67-
6862
previous_span = self.spans[-1] if len(self.spans) > 0 else None
6963

7064
if previous_span is None:
@@ -255,62 +249,11 @@ def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
255249

256250
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
257251
return (
258-
self.span_hashes[span_a["span_id"]] == self.span_hashes[span_b["span_id"]]
252+
parameterize_url(get_url_from_span(span_a))
253+
== parameterize_url(get_url_from_span(span_b))
259254
and span_a["parent_span_id"] == span_b["parent_span_id"]
260255
)
261256

262257

263-
HTTP_METHODS = {
264-
"GET",
265-
"HEAD",
266-
"POST",
267-
"PUT",
268-
"DELETE",
269-
"CONNECT",
270-
"OPTIONS",
271-
"TRACE",
272-
"PATCH",
273-
}
274-
275-
276-
def get_span_hash(span: Span) -> str | None:
277-
if span.get("op") != "http.client":
278-
return span.get("hash")
279-
280-
parts = remove_http_client_query_string_strategy(span)
281-
if not parts:
282-
return None
283-
284-
hash = hashlib.md5()
285-
for part in parts:
286-
hash.update(force_bytes(part, errors="replace"))
287-
288-
return hash.hexdigest()[:16]
289-
290-
291-
def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None:
292-
"""
293-
This is an inline version of the `http.client` parameterization code in
294-
`"default:2022-10-27"`, the default span grouping strategy at time of
295-
writing. It's inlined here to insulate this detector from changes in the
296-
strategy, which are coming soon.
297-
"""
298-
299-
# Check the description is of the form `<HTTP METHOD> <URL>`
300-
description = span.get("description") or ""
301-
parts = description.split(" ", 1)
302-
if len(parts) != 2:
303-
return None
304-
305-
# Ensure that this is a valid http method
306-
method, url_str = parts
307-
method = method.upper()
308-
if method not in HTTP_METHODS:
309-
return None
310-
311-
url = urlparse(url_str)
312-
return [method, url.scheme, url.netloc, url.path]
313-
314-
315258
def without_query_params(url: str) -> str:
316259
return urlparse(url)._replace(query="").geturl()

0 commit comments

Comments
 (0)