Skip to content

Commit 925f407

Browse files
committed
🤡 mimic detector change
1 parent cc37b69 commit 925f407

File tree

5 files changed

+290
-19
lines changed

5 files changed

+290
-19
lines changed

src/sentry/utils/performance_issues/base.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from abc import ABC, abstractmethod
77
from datetime import timedelta
88
from enum import Enum
9-
from typing import Any, ClassVar
9+
from typing import Any, ClassVar, TypedDict
1010
from urllib.parse import parse_qs, urlparse
1111

1212
from sentry import options
@@ -292,7 +292,17 @@ def fingerprint_resource_span(span: Span):
292292
return hashlib.sha1(stripped_url.encode("utf-8")).hexdigest()
293293

294294

295-
def parameterize_url(url: str) -> str:
295+
class ParameterizedUrl(TypedDict):
296+
url: str
297+
path_params: list[str] # e.g. ["123", "abc123de-1024-4321-abcd-1234567890ab"]
298+
query_params: dict[str, list[str]] # e.g. {"limit": "50", "offset": "100"}
299+
300+
301+
def parameterize_url_with_result(url: str) -> ParameterizedUrl:
302+
"""
303+
Given a URL, return the URL with parsed path and query parameters replaced with '*',
304+
a list of the path parameters, and a dict of the query parameters.
305+
"""
296306
parsed_url = urlparse(str(url))
297307

298308
protocol_fragments = []
@@ -305,15 +315,18 @@ def parameterize_url(url: str) -> str:
305315
host_fragments.append(str(fragment))
306316

307317
path_fragments = []
318+
path_params = []
308319
for fragment in parsed_url.path.split("/"):
309-
if PARAMETERIZED_URL_REGEX.search(fragment):
320+
path_param = PARAMETERIZED_URL_REGEX.search(fragment)
321+
if path_param:
310322
path_fragments.append("*")
323+
path_params.append(path_param.group())
311324
else:
312325
path_fragments.append(str(fragment))
313326

314327
query = parse_qs(parsed_url.query)
315328

316-
return "".join(
329+
parameterized_url = "".join(
317330
[
318331
"".join(protocol_fragments),
319332
".".join(host_fragments),
@@ -323,6 +336,16 @@ def parameterize_url(url: str) -> str:
323336
]
324337
).rstrip("?")
325338

339+
return ParameterizedUrl(
340+
url=parameterized_url,
341+
path_params=path_params,
342+
query_params=query,
343+
)
344+
345+
346+
def parameterize_url(url: str) -> str:
347+
return parameterize_url_with_result(url).get("url", "")
348+
326349

327350
def fingerprint_http_spans(spans: list[Span]) -> str:
328351
"""
Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +0,0 @@
1-
from .consecutive_db_detector import ConsecutiveDBSpanDetector # NOQA
2-
from .consecutive_http_detector import ConsecutiveHTTPSpanDetector # NOQA
3-
from .http_overhead_detector import HTTPOverheadDetector # NOQA
4-
from .io_main_thread_detector import DBMainThreadDetector, FileIOMainThreadDetector # NOQA
5-
from .large_payload_detector import LargeHTTPPayloadDetector # NOQA
6-
from .mn_plus_one_db_span_detector import MNPlusOneDBSpanDetector # NOQA
7-
from .n_plus_one_api_calls_detector import NPlusOneAPICallsDetector # NOQA
8-
from .n_plus_one_db_span_detector import ( # NOQA
9-
NPlusOneDBSpanDetector,
10-
NPlusOneDBSpanDetectorExtended,
11-
)
12-
from .render_blocking_asset_span_detector import RenderBlockingAssetSpanDetector # NOQA
13-
from .slow_db_query_detector import SlowDBQueryDetector # NOQA
14-
from .uncompressed_asset_detector import UncompressedAssetSpanDetector # NOQA
Lines changed: 260 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,260 @@
1+
from __future__ import annotations
2+
3+
import logging
4+
import os
5+
from collections import defaultdict
6+
from collections.abc import Mapping
7+
from datetime import timedelta
8+
from typing import Any
9+
from urllib.parse import urlparse
10+
11+
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsGroupType
12+
from sentry.issues.issue_occurrence import IssueEvidence
13+
from sentry.models.organization import Organization
14+
from sentry.models.project import Project
15+
from sentry.utils.performance_issues.detectors.utils import get_total_span_duration
16+
from sentry.utils.performance_issues.performance_problem import PerformanceProblem
17+
18+
from ..base import (
19+
DetectorType,
20+
PerformanceDetector,
21+
fingerprint_http_spans,
22+
get_notification_attachment_body,
23+
get_span_evidence_value,
24+
get_url_from_span,
25+
parameterize_url,
26+
parameterize_url_with_result,
27+
)
28+
from ..types import PerformanceProblemsMap, Span
29+
30+
logger = logging.getLogger(__name__)
31+
32+
33+
class DummyNPlusOneAPICallsDetector(PerformanceDetector):
34+
"""
35+
Detect parallel network calls to the same endpoint.
36+
37+
[-------- transaction -----------]
38+
[-------- parent span -----------]
39+
[n0] https://service.io/resources/?id=12443
40+
[n1] https://service.io/resources/?id=13342
41+
[n2] https://service.io/resources/?id=13441
42+
...
43+
"""
44+
45+
__slots__: list[str] = []
46+
type = DetectorType.N_PLUS_ONE_API_CALLS
47+
settings_key = DetectorType.N_PLUS_ONE_API_CALLS
48+
49+
def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) -> None:
50+
super().__init__(settings, event)
51+
52+
# TODO: Only store the span IDs and timestamps instead of entire span objects
53+
self.stored_problems: PerformanceProblemsMap = {}
54+
self.spans: list[Span] = []
55+
56+
def visit_span(self, span: Span) -> None:
57+
if not DummyNPlusOneAPICallsDetector.is_span_eligible(span):
58+
return
59+
60+
op = span.get("op", None)
61+
if op not in self.settings.get("allowed_span_ops", []):
62+
return
63+
64+
previous_span = self.spans[-1] if len(self.spans) > 0 else None
65+
66+
if previous_span is None:
67+
self.spans.append(span)
68+
elif self._spans_are_concurrent(previous_span, span) and self._spans_are_similar(
69+
previous_span, span
70+
):
71+
self.spans.append(span)
72+
else:
73+
self._maybe_store_problem()
74+
self.spans = [span]
75+
76+
def is_creation_allowed_for_organization(self, organization: Organization) -> bool:
77+
return True
78+
79+
def is_creation_allowed_for_project(self, project: Project) -> bool:
80+
return self.settings["detection_enabled"]
81+
82+
@classmethod
83+
def is_event_eligible(cls, event: dict[str, Any], project: Project | None = None) -> bool:
84+
trace_op = event.get("contexts", {}).get("trace", {}).get("op")
85+
if trace_op and trace_op not in ["navigation", "pageload", "ui.load", "ui.action"]:
86+
return False
87+
88+
return True
89+
90+
@classmethod
91+
def is_span_eligible(cls, span: Span) -> bool:
92+
span_id = span.get("span_id", None)
93+
op = span.get("op", None)
94+
hash = span.get("hash", None)
95+
96+
if not span_id or not op or not hash:
97+
return False
98+
99+
description = span.get("description")
100+
if not description:
101+
return False
102+
103+
if description.strip()[:3].upper() != "GET":
104+
return False
105+
106+
url = get_url_from_span(span)
107+
108+
# GraphQL URLs have complicated queries in them. Until we parse those
109+
# queries to check for what's duplicated, we can't tell what is being
110+
# duplicated. Ignore them for now
111+
if "graphql" in url:
112+
return False
113+
114+
# Next.js infixes its data URLs with a build ID. (e.g.,
115+
# /_next/data/<uuid>/some-endpoint) This causes a fingerprinting
116+
# explosion, since every deploy would change this ID and create new
117+
# fingerprints. Since we're not parameterizing URLs yet, we need to
118+
# exclude them
119+
if "_next/data" in url:
120+
return False
121+
122+
# Next.js error pages cause an N+1 API Call that isn't useful to anyone
123+
if "__nextjs_original-stack-frame" in url:
124+
return False
125+
126+
if not url:
127+
return False
128+
129+
# Once most users update their SDKs to use the latest standard, we
130+
# won't have to do this, since the URLs will be sent in as `span.data`
131+
# in a parsed format
132+
parsed_url = urlparse(str(url))
133+
134+
# Ignore anything that looks like an asset. Some frameworks (and apps)
135+
# fetch assets via XHR, which is not our concern
136+
_pathname, extension = os.path.splitext(parsed_url.path)
137+
if extension and extension in [".js", ".css", ".svg", ".png", ".mp3", ".jpg", ".jpeg"]:
138+
return False
139+
140+
return True
141+
142+
def on_complete(self) -> None:
143+
self._maybe_store_problem()
144+
self.spans = []
145+
146+
def _maybe_store_problem(self) -> None:
147+
if len(self.spans) < 1:
148+
return
149+
150+
if len(self.spans) < self.settings["count"]:
151+
return
152+
153+
total_duration = get_total_span_duration(self.spans)
154+
if total_duration < self.settings["total_duration"]:
155+
return
156+
157+
last_span = self.spans[-1]
158+
159+
fingerprint = self._fingerprint()
160+
if not fingerprint:
161+
return
162+
163+
offender_span_ids = [span["span_id"] for span in self.spans]
164+
problem_description = self._get_parameterized_url(self.spans[0])
165+
if problem_description == "":
166+
problem_description = os.path.commonprefix(
167+
[span.get("description", "") or "" for span in self.spans]
168+
)
169+
170+
problem = PerformanceProblem(
171+
fingerprint=fingerprint,
172+
op=last_span["op"],
173+
desc=problem_description,
174+
type=PerformanceNPlusOneAPICallsGroupType,
175+
cause_span_ids=[],
176+
parent_span_ids=[last_span.get("parent_span_id", None)],
177+
offender_span_ids=offender_span_ids,
178+
evidence_data={
179+
"op": last_span["op"],
180+
"cause_span_ids": [],
181+
"parent_span_ids": [last_span.get("parent_span_id", None)],
182+
"offender_span_ids": offender_span_ids,
183+
"transaction_name": self._event.get("transaction", ""),
184+
"num_repeating_spans": str(len(offender_span_ids)) if offender_span_ids else "",
185+
"repeating_spans": self._get_path_prefix(self.spans[0]),
186+
"repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False),
187+
"parameters": self._get_parameters()["query_params"],
188+
"path_parameters": self._get_parameters()["path_params"],
189+
},
190+
evidence_display=[
191+
IssueEvidence(
192+
name="Offending Spans",
193+
value=get_notification_attachment_body(
194+
op=last_span["op"], desc=problem_description
195+
),
196+
# Has to be marked important to be displayed in the notifications
197+
important=True,
198+
)
199+
],
200+
)
201+
logger.info("new_issue", extra={"problem": problem.to_dict()})
202+
203+
def _get_parameters(self) -> dict[str, list[str]]:
204+
if not self.spans or len(self.spans) == 0:
205+
return {"query_params": [], "path_params": []}
206+
207+
parameterized_urls = [
208+
parameterize_url_with_result(get_url_from_span(span)) for span in self.spans
209+
]
210+
path_params = [param["path_params"] for param in parameterized_urls]
211+
query_dict: Mapping[str, list[str]] = defaultdict(list)
212+
213+
for parameterized_url in parameterized_urls:
214+
query_params = parameterized_url["query_params"]
215+
216+
for key, value in query_params.items():
217+
query_dict[key] += value
218+
return {
219+
"path_params": [f"{', '.join(param_group)}" for param_group in path_params],
220+
"query_params": [f"{key}: {', '.join(values)}" for key, values in query_dict.items()],
221+
}
222+
223+
def _get_parameterized_url(self, span: Span) -> str:
224+
return parameterize_url(get_url_from_span(span))
225+
226+
def _get_path_prefix(self, repeating_span: Span) -> str:
227+
if not repeating_span:
228+
return ""
229+
230+
url = get_url_from_span(repeating_span)
231+
parsed_url = urlparse(url)
232+
return parsed_url.path or ""
233+
234+
def _fingerprint(self) -> str | None:
235+
first_url = get_url_from_span(self.spans[0])
236+
parameterized_first_url = parameterize_url(first_url)
237+
238+
# Check if we parameterized the URL at all. If not, do not attempt
239+
# fingerprinting. Unparameterized URLs run too high a risk of
240+
# fingerprinting explosions.
241+
if parameterized_first_url == first_url:
242+
return None
243+
244+
fingerprint = fingerprint_http_spans([self.spans[0]])
245+
246+
return f"1-{PerformanceNPlusOneAPICallsGroupType.type_id}-{fingerprint}"
247+
248+
def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
249+
span_a_start: int = span_a.get("start_timestamp", 0) or 0
250+
span_b_start: int = span_b.get("start_timestamp", 0) or 0
251+
252+
return timedelta(seconds=abs(span_a_start - span_b_start)) < timedelta(
253+
milliseconds=self.settings["concurrency_threshold"]
254+
)
255+
256+
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
257+
return (
258+
self._get_parameterized_url(span_a) == self._get_parameterized_url(span_b)
259+
and span_a["parent_span_id"] == span_b["parent_span_id"]
260+
)

src/sentry/utils/performance_issues/performance_detection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from .base import DetectorType, PerformanceDetector
2323
from .detectors.consecutive_db_detector import ConsecutiveDBSpanDetector
2424
from .detectors.consecutive_http_detector import ConsecutiveHTTPSpanDetector
25+
from .detectors.dummy_n_plus_one_api_calls_detector import DummyNPlusOneAPICallsDetector
2526
from .detectors.http_overhead_detector import HTTPOverheadDetector
2627
from .detectors.io_main_thread_detector import DBMainThreadDetector, FileIOMainThreadDetector
2728
from .detectors.large_payload_detector import LargeHTTPPayloadDetector
@@ -322,6 +323,7 @@ def get_detection_settings(project_id: int | None = None) -> dict[DetectorType,
322323
NPlusOneDBSpanDetectorExtended,
323324
FileIOMainThreadDetector,
324325
NPlusOneAPICallsDetector,
326+
DummyNPlusOneAPICallsDetector,
325327
MNPlusOneDBSpanDetector,
326328
UncompressedAssetSpanDetector,
327329
LargeHTTPPayloadDetector,

src/sentry/utils/performance_issues/performance_problem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class PerformanceProblem:
2222
# We can't make it required until we stop loading these from nodestore via EventPerformanceProblem,
2323
# since there's legacy data in there that won't have these fields.
2424
# So until we disable transaction based perf issues we'll need to keep this optional.
25-
evidence_data: Mapping[str, Any] | None
25+
evidence_data: dict[str, Any] | None
2626
# User-friendly evidence to be displayed directly
2727
evidence_display: Sequence[IssueEvidence]
2828

0 commit comments

Comments
 (0)