Skip to content

Commit 749da92

Browse files
committed
♻️ better evidence data, tests
1 parent 9710589 commit 749da92

File tree

4 files changed

+107
-57
lines changed

4 files changed

+107
-57
lines changed

src/sentry/utils/performance_issues/base.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from abc import ABC, abstractmethod
77
from datetime import timedelta
88
from enum import Enum
9-
from typing import Any, ClassVar
9+
from typing import Any, ClassVar, TypedDict
1010
from urllib.parse import parse_qs, urlparse
1111

1212
from sentry import options
@@ -292,7 +292,17 @@ def fingerprint_resource_span(span: Span):
292292
return hashlib.sha1(stripped_url.encode("utf-8")).hexdigest()
293293

294294

295-
def parameterize_url(url: str) -> str:
295+
class ParameterizedUrl(TypedDict):
296+
url: str
297+
path_params: list[str] # e.g. ["123", "abc123de-1024-4321-abcd-1234567890ab"]
298+
query_params: dict[str, list[str]] # e.g. {"limit": "50", "offset": "100"}
299+
300+
301+
def parameterize_url_with_result(url: str) -> ParameterizedUrl:
302+
"""
303+
Given a URL, return the URL with parsed path and query parameters replaced with '*',
304+
a list of the path parameters, and a dict of the query parameters.
305+
"""
296306
parsed_url = urlparse(str(url))
297307

298308
protocol_fragments = []
@@ -305,15 +315,18 @@ def parameterize_url(url: str) -> str:
305315
host_fragments.append(str(fragment))
306316

307317
path_fragments = []
318+
path_params = []
308319
for fragment in parsed_url.path.split("/"):
309-
if PARAMETERIZED_URL_REGEX.search(fragment):
320+
path_param = PARAMETERIZED_URL_REGEX.search(fragment)
321+
if path_param:
310322
path_fragments.append("*")
323+
path_params.append(path_param.group())
311324
else:
312325
path_fragments.append(str(fragment))
313326

314327
query = parse_qs(parsed_url.query)
315328

316-
return "".join(
329+
parameterized_url = "".join(
317330
[
318331
"".join(protocol_fragments),
319332
".".join(host_fragments),
@@ -323,6 +336,16 @@ def parameterize_url(url: str) -> str:
323336
]
324337
).rstrip("?")
325338

339+
return ParameterizedUrl(
340+
url=parameterized_url,
341+
path_params=path_params,
342+
query_params=query,
343+
)
344+
345+
346+
def parameterize_url(url: str) -> str:
347+
return parameterize_url_with_result(url).get("url", "")
348+
326349

327350
def fingerprint_http_spans(spans: list[Span]) -> str:
328351
"""

src/sentry/utils/performance_issues/detectors/n_plus_one_api_calls_detector.py

Lines changed: 31 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from collections.abc import Mapping
66
from datetime import timedelta
77
from typing import Any
8-
from urllib.parse import parse_qs, urlparse
8+
from urllib.parse import urlparse
99

1010
from sentry.issues.grouptype import PerformanceNPlusOneAPICallsGroupType
1111
from sentry.issues.issue_occurrence import IssueEvidence
@@ -21,6 +21,7 @@
2121
get_span_evidence_value,
2222
get_url_from_span,
2323
parameterize_url,
24+
parameterize_url_with_result,
2425
)
2526
from ..performance_problem import PerformanceProblem
2627
from ..types import PerformanceProblemsMap, Span
@@ -157,11 +158,16 @@ def _maybe_store_problem(self) -> None:
157158
return
158159

159160
offender_span_ids = [span["span_id"] for span in self.spans]
161+
problem_description = self._get_parameterized_url(self.spans[0])
162+
if problem_description == "":
163+
problem_description = os.path.commonprefix(
164+
[span.get("description", "") or "" for span in self.spans]
165+
)
160166

161167
self.stored_problems[fingerprint] = PerformanceProblem(
162168
fingerprint=fingerprint,
163169
op=last_span["op"],
164-
desc=os.path.commonprefix([span.get("description", "") or "" for span in self.spans]),
170+
desc=problem_description,
165171
type=PerformanceNPlusOneAPICallsGroupType,
166172
cause_span_ids=[],
167173
parent_span_ids=[last_span.get("parent_span_id", None)],
@@ -175,41 +181,43 @@ def _maybe_store_problem(self) -> None:
175181
"num_repeating_spans": str(len(offender_span_ids)) if offender_span_ids else "",
176182
"repeating_spans": self._get_path_prefix(self.spans[0]),
177183
"repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False),
178-
"parameters": self._get_parameters(),
184+
"parameters": self._get_parameters()["query_params"],
185+
"path_parameters": self._get_parameters()["path_params"],
179186
},
180187
evidence_display=[
181188
IssueEvidence(
182189
name="Offending Spans",
183190
value=get_notification_attachment_body(
184-
last_span["op"],
185-
os.path.commonprefix(
186-
[span.get("description", "") or "" for span in self.spans]
187-
),
191+
op=last_span["op"], desc=problem_description
188192
),
189193
# Has to be marked important to be displayed in the notifications
190194
important=True,
191195
)
192196
],
193197
)
194198

195-
def _get_parameters(self) -> list[str]:
199+
def _get_parameters(self) -> dict[str, list[str]]:
196200
if not self.spans or len(self.spans) == 0:
197-
return []
201+
return {"query_params": [], "path_params": []}
198202

199-
urls = [get_url_from_span(span) for span in self.spans]
200-
201-
all_parameters: Mapping[str, list[str]] = defaultdict(list)
203+
parameterized_urls = [
204+
parameterize_url_with_result(get_url_from_span(span)) for span in self.spans
205+
]
206+
path_params = [param["path_params"] for param in parameterized_urls]
207+
query_dict: Mapping[str, list[str]] = defaultdict(list)
202208

203-
for url in urls:
204-
parsed_url = urlparse(url)
205-
parameters = parse_qs(parsed_url.query)
209+
for parameterized_url in parameterized_urls:
210+
query_params = parameterized_url["query_params"]
206211

207-
for key, value in parameters.items():
208-
all_parameters[key] += value
212+
for key, value in query_params.items():
213+
query_dict[key] += value
214+
return {
215+
"path_params": [f"{', '.join(param_group)}" for param_group in path_params],
216+
"query_params": [f"{key}: {', '.join(values)}" for key, values in query_dict.items()],
217+
}
209218

210-
return [
211-
"{{{}: {}}}".format(key, ",".join(values)) for key, values in all_parameters.items()
212-
]
219+
def _get_parameterized_url(self, span: Span) -> str:
220+
return parameterize_url(get_url_from_span(span))
213221

214222
def _get_path_prefix(self, repeating_span: Span) -> str:
215223
if not repeating_span:
@@ -225,9 +233,8 @@ def _fingerprint(self) -> str | None:
225233

226234
# Check if we parameterized the URL at all. If not, do not attempt
227235
# fingerprinting. Unparameterized URLs run too high a risk of
228-
# fingerprinting explosions. Query parameters are parameterized by
229-
# definition, so exclude them from comparison
230-
if without_query_params(parameterized_first_url) == without_query_params(first_url):
236+
# fingerprinting explosions.
237+
if parameterized_first_url == first_url:
231238
return None
232239

233240
fingerprint = fingerprint_http_spans([self.spans[0]])
@@ -244,11 +251,6 @@ def _spans_are_concurrent(self, span_a: Span, span_b: Span) -> bool:
244251

245252
def _spans_are_similar(self, span_a: Span, span_b: Span) -> bool:
246253
return (
247-
parameterize_url(get_url_from_span(span_a))
248-
== parameterize_url(get_url_from_span(span_b))
254+
self._get_parameterized_url(span_a) == self._get_parameterized_url(span_b)
249255
and span_a["parent_span_id"] == span_b["parent_span_id"]
250256
)
251-
252-
253-
def without_query_params(url: str) -> str:
254-
return urlparse(url)._replace(query="").geturl()

src/sentry/utils/performance_issues/performance_problem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class PerformanceProblem:
2222
# We can't make it required until we stop loading these from nodestore via EventPerformanceProblem,
2323
# since there's legacy data in there that won't have these fields.
2424
# So until we disable transaction based perf issues we'll need to keep this optional.
25-
evidence_data: Mapping[str, Any] | None
25+
evidence_data: dict[str, Any] | None
2626
# User-friendly evidence to be displayed directly
2727
evidence_display: Sequence[IssueEvidence]
2828

tests/sentry/utils/performance_issues/test_n_plus_one_api_calls_detector.py

Lines changed: 48 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from sentry.utils.performance_issues.base import DetectorType, parameterize_url
1717
from sentry.utils.performance_issues.detectors.n_plus_one_api_calls_detector import (
1818
NPlusOneAPICallsDetector,
19-
without_query_params,
2019
)
2120
from sentry.utils.performance_issues.performance_detection import (
2221
get_detection_settings,
@@ -188,9 +187,22 @@ def test_does_not_detect_problems_with_low_span_count(self):
188187
problems = self.find_problems(event)
189188
assert problems == []
190189

191-
def test_does_not_detect_problem_with_unparameterized_urls(self):
190+
def test_does_detect_problem_with_unparameterized_urls(self):
192191
event = get_event("n-plus-one-api-calls/n-plus-one-api-calls-in-weather-app")
193-
assert self.find_problems(event) == []
192+
[problem] = self.find_problems(event)
193+
194+
assert problem.fingerprint == "1-1010-bf7ad6b20bb345ae327362c849427956862bf839"
195+
196+
def test_does_detect_problem_with_parameterized_urls(self):
197+
event = self.create_event(lambda i: f"GET /clients/{i}/info/{i*100}/")
198+
[problem] = self.find_problems(event)
199+
assert problem.desc == "/clients/*/info/*/"
200+
assert problem.evidence_data is not None
201+
path_params = problem.evidence_data.get("path_parameters", [])
202+
# It should sequentially store sets of path parameters on the evidence data
203+
for i in range(len(path_params)):
204+
assert path_params[i] == f"{i}, {i*100}"
205+
assert problem.fingerprint == "1-1010-8bf177290e2d78550fef5a1f6e9ddf115e4b0614"
194206

195207
def test_does_not_detect_problem_with_concurrent_calls_to_different_urls(self):
196208
event = get_event("n-plus-one-api-calls/not-n-plus-one-api-calls")
@@ -221,28 +233,55 @@ def test_fingerprints_same_relative_urls_together(self):
221233
assert problem1.fingerprint == problem2.fingerprint
222234

223235
def test_fingerprints_same_parameterized_integer_relative_urls_together(self):
224-
event1 = self.create_event(lambda i: f"GET /clients/17/info?id={i}")
236+
event1 = self.create_event(lambda i: f"GET /clients/{i}/info?id={i}")
225237
[problem1] = self.find_problems(event1)
226238

227-
event2 = self.create_event(lambda i: f"GET /clients/16/info?id={i*2}")
239+
event2 = self.create_event(lambda i: f"GET /clients/{i}/info?id={i*2}")
228240
[problem2] = self.find_problems(event2)
229241

230242
assert problem1.fingerprint == problem2.fingerprint
231243

232244
def test_fingerprints_different_relative_url_separately(self):
233-
event1 = self.create_event(lambda i: f"GET /clients/11/info?id={i}")
245+
event1 = self.create_event(lambda i: f"GET /clients/{i}/info?id={i}")
234246
[problem1] = self.find_problems(event1)
235247

236-
event2 = self.create_event(lambda i: f"GET /projects/11/details?pid={i}")
248+
event2 = self.create_event(lambda i: f"GET /projects/{i}/details?pid={i}")
249+
[problem2] = self.find_problems(event2)
250+
251+
assert problem1.fingerprint != problem2.fingerprint
252+
253+
def test_fingerprints_relative_urls_with_query_params_together(self):
254+
event1 = self.create_event(lambda i: f"GET /clients/{i}/info")
255+
[problem1] = self.find_problems(event1)
256+
257+
event2 = self.create_event(lambda i: f"GET /clients/{i}/info?id={i}")
258+
[problem2] = self.find_problems(event2)
259+
260+
assert problem1.fingerprint == problem2.fingerprint
261+
262+
def test_fingerprints_multiple_parameterized_integer_relative_urls_together(self):
263+
event1 = self.create_event(lambda i: f"GET /clients/{i}/organization/{i}/info")
264+
[problem1] = self.find_problems(event1)
265+
266+
event2 = self.create_event(lambda i: f"GET /clients/{i*100}/organization/{i*100}/info")
267+
[problem2] = self.find_problems(event2)
268+
269+
assert problem1.fingerprint == problem2.fingerprint
270+
271+
def test_fingerprints_different_parameterized_integer_relative_urls_separately(self):
272+
event1 = self.create_event(lambda i: f"GET /clients/{i}/mario/{i}/info")
273+
[problem1] = self.find_problems(event1)
274+
275+
event2 = self.create_event(lambda i: f"GET /clients/{i}/luigi/{i}/info")
237276
[problem2] = self.find_problems(event2)
238277

239278
assert problem1.fingerprint != problem2.fingerprint
240279

241280
def test_ignores_hostname_for_fingerprinting(self):
242-
event1 = self.create_event(lambda i: f"GET http://service.io/clients/42/info?id={i}")
281+
event1 = self.create_event(lambda i: f"GET http://service.io/clients/{i}/info?id={i}")
243282
[problem1] = self.find_problems(event1)
244283

245-
event2 = self.create_event(lambda i: f"GET /clients/42/info?id={i}")
284+
event2 = self.create_event(lambda i: f"GET /clients/{i}/info?id={i}")
246285
[problem2] = self.find_problems(event2)
247286

248287
assert problem1.fingerprint == problem2.fingerprint
@@ -433,20 +472,6 @@ def test_rejects_ineligible_spans(span):
433472
assert not NPlusOneAPICallsDetector.is_span_eligible(span)
434473

435474

436-
@pytest.mark.parametrize(
437-
"url,url_without_query",
438-
[
439-
("", ""),
440-
("http://service.io", "http://service.io"),
441-
("http://service.io/resource", "http://service.io/resource"),
442-
("/resource?id=1", "/resource"),
443-
("/resource?id=1&sort=down", "/resource"),
444-
],
445-
)
446-
def test_removes_query_params(url, url_without_query):
447-
assert without_query_params(url) == url_without_query
448-
449-
450475
@pytest.mark.parametrize(
451476
"event",
452477
[get_event("n-plus-one-api-calls/not-n-plus-one-api-calls")],

0 commit comments

Comments
 (0)