From 0531bd60553d2fc107a171561a24f37bada7c22e Mon Sep 17 00:00:00 2001 From: Leander Rodrigues Date: Fri, 23 May 2025 14:05:57 -0400 Subject: [PATCH 1/2] =?UTF-8?q?=E2=9C=A8=20Dedupe=20values=20for=20path/qu?= =?UTF-8?q?ery=20params?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../experiments/n_plus_one_api_calls_detector.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py index 4b3f096f5cea0c..f1dcd7c22a25f9 100644 --- a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py +++ b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py @@ -187,6 +187,7 @@ def _maybe_store_problem(self) -> None: "repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False), "parameters": self._get_parameters()["query_params"], "path_parameters": self._get_parameters()["path_params"], + "common_url": problem_description, }, evidence_display=[ IssueEvidence( @@ -215,9 +216,13 @@ def _get_parameters(self) -> dict[str, list[str]]: for key, value in query_params.items(): query_dict[key] += value + + path_params_list = [f"{', '.join(param_group)}" for param_group in path_params] + query_params_list = [f"{key}: {', '.join(values)}" for key, values in query_dict.items()] return { - "path_params": [f"{', '.join(param_group)}" for param_group in path_params], - "query_params": [f"{key}: {', '.join(values)}" for key, values in query_dict.items()], + # Use sets to deduplicate the lists. + "path_params": list(set(path_params_list)), + "query_params": list(set(query_params_list)), } def _get_parameterized_url(self, span: Span) -> str: From a1dea9bf7f47a2f84f213b7c44716aa2172bfee6 Mon Sep 17 00:00:00 2001 From: Leander Rodrigues Date: Fri, 23 May 2025 14:21:22 -0400 Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=A8=20better=20evidence=20for=20N+1?= =?UTF-8?q?=20API=20calls?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../experiments/n_plus_one_api_calls_detector.py | 16 +++++++++++++--- .../test_n_plus_one_api_calls_detector.py | 7 +++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py index f1dcd7c22a25f9..fb21414d89610b 100644 --- a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py +++ b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py @@ -218,11 +218,21 @@ def _get_parameters(self) -> dict[str, list[str]]: query_dict[key] += value path_params_list = [f"{', '.join(param_group)}" for param_group in path_params] + path_seen_set = set() query_params_list = [f"{key}: {', '.join(values)}" for key, values in query_dict.items()] + query_param_set = set() return { - # Use sets to deduplicate the lists. - "path_params": list(set(path_params_list)), - "query_params": list(set(query_params_list)), + # Use sets to deduplicate the lists, but still preserve the order. + "path_params": [ + param + for param in path_params_list + if not (param in path_seen_set or path_seen_set.add(param)) + ], + "query_params": [ + param + for param in query_params_list + if not (param in query_param_set or query_param_set.add(param)) + ], } def _get_parameterized_url(self, span: Span) -> str: diff --git a/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py index 5c5aff9d62a508..16a2072fdc15a0 100644 --- a/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py +++ b/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py @@ -198,14 +198,17 @@ def test_does_detect_problem_with_unparameterized_urls(self): assert problem.fingerprint == f"1-{self.type_id}-bf7ad6b20bb345ae327362c849427956862bf839" def test_does_detect_problem_with_parameterized_urls(self): - event = self.create_event(lambda i: f"GET /clients/{i}/info/{i*100}/") + event = self.create_event(lambda i: f"GET /clients/{i}/info/{i*100}/?id={i}") [problem] = self.find_problems(event) - assert problem.desc == "/clients/*/info/*/" + assert problem.desc == "/clients/*/info/*/?id=*" assert problem.evidence_data is not None + assert problem.evidence_data["common_url"] == "/clients/*/info/*/?id=*" path_params = problem.evidence_data.get("path_parameters", []) # It should sequentially store sets of path parameters on the evidence data for i in range(len(path_params)): assert path_params[i] == f"{i}, {i*100}" + query_params = problem.evidence_data.get("parameters", []) + assert query_params == ["id: 0, 1, 2, 3, 4, 5"] assert problem.fingerprint == f"1-{self.type_id}-8bf177290e2d78550fef5a1f6e9ddf115e4b0614" def test_does_not_detect_problem_with_concurrent_calls_to_different_urls(self):