Skip to content

Commit 85c0661

Browse files
authored
fix(similarity): Make similar issues endpoint faster (#82390)
Similar issues tab loads slowly when a group has a lot of group hashes Check the similar issue's group id instead of the group's hashes
1 parent 7faa530 commit 85c0661

File tree

2 files changed

+11
-9
lines changed

2 files changed

+11
-9
lines changed

src/sentry/issues/endpoints/group_similar_issues_embeddings.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,24 +42,26 @@ class GroupSimilarIssuesEmbeddingsEndpoint(GroupEndpoint):
4242
"GET": ApiPublishStatus.PRIVATE,
4343
}
4444

45-
def get_group_hashes_for_group_id(self, group_id: int) -> set[str]:
46-
hashes = GroupHash.objects.filter(group_id=group_id)
47-
return {hash.hash for hash in hashes}
48-
4945
def get_formatted_results(
5046
self,
5147
similar_issues_data: Sequence[SeerSimilarIssueData],
5248
user: User | AnonymousUser,
53-
group_id: int,
49+
group: Group,
5450
) -> Sequence[tuple[Mapping[str, Any], Mapping[str, Any]] | None]:
5551
"""
5652
Format the responses using to be used by the frontend by changing the field names and
5753
changing the cosine distances into cosine similarities.
5854
"""
59-
hashes = self.get_group_hashes_for_group_id(group_id)
6055
group_data = {}
56+
parent_hashes = [
57+
similar_issue_data.parent_hash for similar_issue_data in similar_issues_data
58+
]
59+
group_hashes = GroupHash.objects.filter(project_id=group.project_id, hash__in=parent_hashes)
60+
parent_hashes_group_ids = {
61+
group_hash.hash: group_hash.group_id for group_hash in group_hashes
62+
}
6163
for similar_issue_data in similar_issues_data:
62-
if similar_issue_data.parent_hash not in hashes:
64+
if parent_hashes_group_ids[similar_issue_data.parent_hash] != group.id:
6365
formatted_response: FormattedSimilarIssuesEmbeddingsData = {
6466
"exception": round(1 - similar_issue_data.stacktrace_distance, 4),
6567
"shouldBeGrouped": "Yes" if similar_issue_data.should_group else "No",
@@ -138,6 +140,6 @@ def get(self, request: Request, group: Group) -> Response:
138140

139141
if not results:
140142
return Response([])
141-
formatted_results = self.get_formatted_results(results, request.user, group.id)
143+
formatted_results = self.get_formatted_results(results, request.user, group)
142144

143145
return Response(formatted_results)

tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,7 @@ def test_get_formatted_results(self):
201201
formatted_results = group_similar_endpoint.get_formatted_results(
202202
similar_issues_data=[similar_issue_data_1, similar_issue_data_2],
203203
user=self.user,
204-
group_id=self.group.id,
204+
group=self.group,
205205
)
206206
assert formatted_results == self.get_expected_response(
207207
[

0 commit comments

Comments
 (0)