diff --git a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py index 4b3f096f5cea0c..cc0bb171c146f6 100644 --- a/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py +++ b/src/sentry/utils/performance_issues/detectors/experiments/n_plus_one_api_calls_detector.py @@ -187,6 +187,7 @@ def _maybe_store_problem(self) -> None: "repeating_spans_compact": get_span_evidence_value(self.spans[0], include_op=False), "parameters": self._get_parameters()["query_params"], "path_parameters": self._get_parameters()["path_params"], + "common_url": problem_description, }, evidence_display=[ IssueEvidence( @@ -215,9 +216,20 @@ def _get_parameters(self) -> dict[str, list[str]]: for key, value in query_params.items(): query_dict[key] += value + + # Note: dict.fromkeys() is just to deduplicate values and Python dicts are ordered + path_params_list: list[str] = list( + dict.fromkeys([f"{', '.join(param_group)}" for param_group in path_params]).keys() + ) + query_params_list: list[str] = list( + dict.fromkeys( + [f"{key}: {', '.join(values)}" for key, values in query_dict.items()] + ).keys() + ) return { - "path_params": [f"{', '.join(param_group)}" for param_group in path_params], - "query_params": [f"{key}: {', '.join(values)}" for key, values in query_dict.items()], + # Use sets to deduplicate the lists, but still preserve the order. + "path_params": path_params_list, + "query_params": query_params_list, } def _get_parameterized_url(self, span: Span) -> str: diff --git a/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py b/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py index 5c5aff9d62a508..16a2072fdc15a0 100644 --- a/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py +++ b/tests/sentry/utils/performance_issues/experiments/test_n_plus_one_api_calls_detector.py @@ -198,14 +198,17 @@ def test_does_detect_problem_with_unparameterized_urls(self): assert problem.fingerprint == f"1-{self.type_id}-bf7ad6b20bb345ae327362c849427956862bf839" def test_does_detect_problem_with_parameterized_urls(self): - event = self.create_event(lambda i: f"GET /clients/{i}/info/{i*100}/") + event = self.create_event(lambda i: f"GET /clients/{i}/info/{i*100}/?id={i}") [problem] = self.find_problems(event) - assert problem.desc == "/clients/*/info/*/" + assert problem.desc == "/clients/*/info/*/?id=*" assert problem.evidence_data is not None + assert problem.evidence_data["common_url"] == "/clients/*/info/*/?id=*" path_params = problem.evidence_data.get("path_parameters", []) # It should sequentially store sets of path parameters on the evidence data for i in range(len(path_params)): assert path_params[i] == f"{i}, {i*100}" + query_params = problem.evidence_data.get("parameters", []) + assert query_params == ["id: 0, 1, 2, 3, 4, 5"] assert problem.fingerprint == f"1-{self.type_id}-8bf177290e2d78550fef5a1f6e9ddf115e4b0614" def test_does_not_detect_problem_with_concurrent_calls_to_different_urls(self):