Skip to content

Commit 3249ad6

Browse files
committed
✨ copy over existing detectors
1 parent d2348db commit 3249ad6

File tree

2 files changed

+775
-0
lines changed

2 files changed

+775
-0
lines changed
Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
from __future__ import annotations
2+
3+
import hashlib
4+
import os
5+
from collections import defaultdict
6+
from collections.abc import Mapping, Sequence
7+
from datetime import timedelta
8+
from typing import Any
9+
from urllib.parse import parse_qs, urlparse
10+
11+
from django.utils.encoding import force_bytes
12+
13+
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsExperimentalGroupType
14+
from sentry.issues.issue_occurrence import IssueEvidence
15+
from sentry.models.organization import Organization
16+
from sentry.models.project import Project
17+
from sentry.utils.performance_issues.base import (
18+
DetectorType,
19+
PerformanceDetector,
20+
fingerprint_http_spans,
21+
get_notification_attachment_body,
22+
get_span_evidence_value,
23+
get_url_from_span,
24+
parameterize_url,
25+
)
26+
from sentry.utils.performance_issues.detectors.utils import get_total_span_duration
27+
from sentry.utils.performance_issues.performance_problem import PerformanceProblem
28+
from sentry.utils.performance_issues.types import PerformanceProblemsMap, Span
29+
30+
31+
class NPlusOneAPICallsExperimentalDetector(PerformanceDetector):
32+
"""
33+
Detect parallel network calls to the same endpoint.
34+
35+
[-------- transaction -----------]
36+
[-------- parent span -----------]
37+
[n0] https://service.io/resources/?id=12443
38+
[n1] https://service.io/resources/?id=13342
39+
[n2] https://service.io/resources/?id=13441
40+
...
41+
"""
42+
43+
__slots__ = ["stored_problems"]
44+
type = DetectorType.N_PLUS_ONE_API_CALLS
45+
settings_key = DetectorType.N_PLUS_ONE_API_CALLS
46+
47+
def __init__(self, settings: dict[DetectorType, Any], event: dict[str, Any]) -> None:
48+
super().__init__(settings, event)
49+
50+
# TODO: Only store the span IDs and timestamps instead of entire span objects
51+
self.stored_problems: PerformanceProblemsMap = {}
52+
self.spans: list[Span] = []
53+
self.span_hashes: dict[str, str | None] = {}
54+
55+
def visit_span(self, span: Span) -> None:
56+
if not NPlusOneAPICallsExperimentalDetector.is_span_eligible(span):
57+
return
58+
59+
op = span.get("op", None)
60+
if op not in self.settings.get("allowed_span_ops", []):
61+
return
62+
63+
self.span_hashes[span["span_id"]] = get_span_hash(span)
64+
65+
previous_span = self.spans[-1] if len(self.spans) > 0 else None
66+
67+
if previous_span is None:
68+
self.spans.append(span)
69+
elif self._spans_are_concurrent(previous_span, span) and self._spans_are_similar(
70+
previous_span, span
71+
):
72+
self.spans.append(span)
73+
else:
74+
self._maybe_store_problem()
75+
self.spans = [span]
76+
77+
def is_creation_allowed_for_organization(self, organization: Organization) -> bool:
78+
return True
79+
80+
def is_creation_allowed_for_project(self, project: Project) -> bool:
81+
return self.settings["detection_enabled"]
82+
83+
@classmethod
84+
def is_event_eligible(cls, event: dict[str, Any], project: Project | None = None) -> bool:
85+
trace_op = event.get("contexts", {}).get("trace", {}).get("op")
86+
if trace_op and trace_op not in ["navigation", "pageload", "ui.load", "ui.action"]:
87+
return False
88+
89+
return True
90+
91+
@classmethod
92+
def is_span_eligible(cls, span: Span) -> bool:
93+
span_id = span.get("span_id", None)
94+
op = span.get("op", None)
95+
hash = span.get("hash", None)
96+
97+
if not span_id or not op or not hash:
98+
return False
99+
100+
description = span.get("description")
101+
if not description:
102+
return False
103+
104+
if description.strip()[:3].upper() != "GET":
105+
return False
106+
107+
url = get_url_from_span(span)
108+
109+
# GraphQL URLs have complicated queries in them. Until we parse those
110+
# queries to check for what's duplicated, we can't tell what is being
111+
# duplicated. Ignore them for now
112+
if "graphql" in url:
113+
return False
114+
115+
# Next.js infixes its data URLs with a build ID. (e.g.,
116+
# /_next/data/<uuid>/some-endpoint) This causes a fingerprinting
117+
# explosion, since every deploy would change this ID and create new
118+
# fingerprints. Since we're not parameterizing URLs yet, we need to
119+
# exclude them
120+
if "_next/data" in url:
121+
return False
122+
123+
# Next.js error pages cause an N+1 API Call that isn't useful to anyone
124+
if "__nextjs_original-stack-frame" in url:
125+
return False
126+
127+
if not url:
128+
return False
129+
130+
# Once most users update their SDKs to use the latest standard, we
131+
# won't have to do this, since the URLs will be sent in as `span.data`
132+
# in a parsed format
133+
parsed_url = urlparse(str(url))
134+
135+
# Ignore anything that looks like an asset. Some frameworks (and apps)
136+
# fetch assets via XHR, which is not our concern
137+
_pathname, extension = os.path.splitext(parsed_url.path)
138+
if extension and extension in [".js", ".css", ".svg", ".png", ".mp3", ".jpg", ".jpeg"]:
139+
return False
140+
141+
return True
142+
143+
def on_complete(self) -> None:
144+
self._maybe_store_problem()
145+
self.spans = []
146+
147+
def _maybe_store_problem(self) -> None:
148+
if len(self.spans) < 1:
149+
return
150+
151+
if len(self.spans) < self.settings["count"]:
152+
return
153+
154+
total_duration = get_total_span_duration(self.spans)
155+
if total_duration < self.settings["total_duration"]:
156+
return
157+
158+
last_span = self.spans[-1]
159+
160+
fingerprint = self._fingerprint()
161+
if not fingerprint:
162+
return
163+
164+
offender_span_ids = [span["span_id"] for span in self.spans]
165+
166+
self.stored_problems[fingerprint] = PerformanceProblem(
167+
fingerprint=fingerprint,
168+
op=last_span["op"],
169+
desc=os.path.commonprefix([span.get("description", "") or "" for span in self.spans]),
170+
type=PerformanceNPlusOneAPICallsExperimentalGroupType,
171+
cause_span_ids=[],
172+
parent_span_ids=[last_span.get("parent_span_id", None)],
173+
offender_span_ids=offender_span_ids,
174+
evidence_data={
175+
"op": last_span["op"],
176+
"cause_span_ids": [],
177+
"parent_span_ids": [last_span.get("parent_span_id", None)],
178+
"offender_span_ids": offender_span_ids,
179+
"transaction_name": self._event.get("transaction", ""),
180+
"num_repeating_spans": str(len(offender_span_ids)) if offender_span_ids else "",
181+
"repeating_spans": self._get_path_prefix(self.spans[0]),
182+
"repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False),
183+
"parameters": self._get_parameters(),
184+
},
185+
evidence_display=[
186+
IssueEvidence(
187+
name="Offending Spans",
188+
value=get_notification_attachment_body(
189+
last_span["op"],
190+
os.path.commonprefix(
191+
[span.get("description", "") or "" for span in self.spans]
192+
),
193+
),
194+
# Has to be marked important to be displayed in the notifications
195+
important=True,
196+
)
197+
],
198+
)
199+
200+
def _get_parameters(self) -> list[str]:
201+
if not self.spans or len(self.spans) == 0:
202+
return []
203+
204+
urls = [get_url_from_span(span) for span in self.spans]
205+
206+
all_parameters: Mapping[str, list[str]] = defaultdict(list)
207+
208+
for url in urls:
209+
parsed_url = urlparse(url)
210+
parameters = parse_qs(parsed_url.query)
211+
212+
for key, value in parameters.items():
213+
all_parameters[key] += value
214+
215+
return [
216+
"{{{}: {}}}".format(key, ",".join(values)) for key, values in all_parameters.items()
217+
]
218+
219+
def _get_path_prefix(self, repeating_span: Span) -> str:
220+
if not repeating_span:
221+
return ""
222+
223+
url = get_url_from_span(repeating_span)
224+
parsed_url = urlparse(url)
225+
return parsed_url.path or ""
226+
227+
def _fingerprint(self) -> str | None:
228+
first_url = get_url_from_span(self.spans[0])
229+
parameterized_first_url = parameterize_url(first_url)
230+
231+
# Check if we parameterized the URL at all. If not, do not attempt
232+
# fingerprinting. Unparameterized URLs run too high a risk of
233+
# fingerprinting explosions. Query parameters are parameterized by
234+
# definition, so exclude them from comparison
235+
if without_query_params(parameterized_first_url) == without_query_params(first_url):
236+
return None
237+
238+
fingerprint = fingerprint_http_spans([self.spans[0]])
239+
240+
return f"1-{PerformanceNPlusOneAPICallsExperimentalGroupType.type_id}-{fingerprint}"
241+
242+
def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
243+
span_a_start: int = span_a.get("start_timestamp", 0) or 0
244+
span_b_start: int = span_b.get("start_timestamp", 0) or 0
245+
246+
return timedelta(seconds=abs(span_a_start - span_b_start)) < timedelta(
247+
milliseconds=self.settings["concurrency_threshold"]
248+
)
249+
250+
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
251+
return (
252+
self.span_hashes[span_a["span_id"]] == self.span_hashes[span_b["span_id"]]
253+
and span_a["parent_span_id"] == span_b["parent_span_id"]
254+
)
255+
256+
257+
HTTP_METHODS = {
258+
"GET",
259+
"HEAD",
260+
"POST",
261+
"PUT",
262+
"DELETE",
263+
"CONNECT",
264+
"OPTIONS",
265+
"TRACE",
266+
"PATCH",
267+
}
268+
269+
270+
def get_span_hash(span: Span) -> str | None:
271+
if span.get("op") != "http.client":
272+
return span.get("hash")
273+
274+
parts = remove_http_client_query_string_strategy(span)
275+
if not parts:
276+
return None
277+
278+
hash = hashlib.md5()
279+
for part in parts:
280+
hash.update(force_bytes(part, errors="replace"))
281+
282+
return hash.hexdigest()[:16]
283+
284+
285+
def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None:
286+
"""
287+
This is an inline version of the `http.client` parameterization code in
288+
`"default:2022-10-27"`, the default span grouping strategy at time of
289+
writing. It's inlined here to insulate this detector from changes in the
290+
strategy, which are coming soon.
291+
"""
292+
293+
# Check the description is of the form `<HTTP METHOD> <URL>`
294+
description = span.get("description") or ""
295+
parts = description.split(" ", 1)
296+
if len(parts) != 2:
297+
return None
298+
299+
# Ensure that this is a valid http method
300+
method, url_str = parts
301+
method = method.upper()
302+
if method not in HTTP_METHODS:
303+
return None
304+
305+
url = urlparse(url_str)
306+
return [method, url.scheme, url.netloc, url.path]
307+
308+
309+
def without_query_params(url: str) -> str:
310+
return urlparse(url)._replace(query="").geturl()

0 commit comments

Comments
 (0)