From 193d5d389bcf0d10847c39a1ac696a554075de6c Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Apr 2025 15:35:10 -0700 Subject: [PATCH 1/4] add killswitch for secondary Seer request --- src/sentry/options/defaults.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index d6d7d9bc3d3831..5875f14ecbb02d 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -1044,6 +1044,16 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# Temporary killswitch for making a second request to Seer to store the incoming event when we have +# a hybrid fingerprint and none of the matches returned by Seer is a fingerprint match +register( + "seer.similarity.ingest.store_hybrid_fingerprint_non_matches", + type=Bool, + default=True, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) + + # TODO: Once Seer grouping is GA-ed, we probably either want to turn this down or get rid of it in # favor of the default 10% sample rate register( From 889d7260581e31f467c6b1bf9a236ee14fbb0d17 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Apr 2025 15:14:32 -0700 Subject: [PATCH 2/4] make second request to Seer if results are unusable --- src/sentry/grouping/ingest/seer.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index c22136028c4907..09415769bb52ec 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -323,6 +323,27 @@ def get_seer_similar_issues( seer_match_status = "match_found" break + # If Seer sent back matches, that means it didn't store the incoming event's data in its + # database. But if we then weren't able to use any of the matches Seer sent back, we do actually + # want a Seer record to be created, so that future events with this fingerprint have something + # with which to match. + if seer_match_status == "no_matches_usable" and options.get( + "seer.similarity.ingest.store_hybrid_fingerprint_non_matches" + ): + request_data = { + **request_data, + "referrer": "ingest_follow_up", + # By asking Seer to find zero matches, we can trick it into thinking there aren't + # any, thereby forcing it to create the record + "k": 0, + # Turn off re-ranking to speed up the process of finding nothing + "use_reranking": False, + } + + # We only want this for the side effect, and we know it'll return no matches, so we don't + # bother to capture the return value. + get_similarity_data_from_seer(request_data) + is_hybrid_fingerprint_case = ( event_has_hybrid_fingerprint # This means we had to reject at least one match because it was a hybrid even though the From 85facafba71aaec8c372e80bbecaba48ced9ac18 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Apr 2025 15:14:35 -0700 Subject: [PATCH 3/4] add logs to confirm behavior --- src/sentry/grouping/ingest/seer.py | 5 +++++ src/sentry/seer/similarity/similar_issues.py | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 09415769bb52ec..a50f75e7d1eeec 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -340,6 +340,11 @@ def get_seer_similar_issues( "use_reranking": False, } + # TODO: Temporary log to prove things are working as they should. This should come in a pair + # with the `get_similarity_data_from_seer.ingest_follow_up` log in `similar_issues.py`, + # which should show that no matches are returned. + logger.info("get_seer_similar_issues.follow_up_seer_request", extra={"hash": event_hash}) + # We only want this for the side effect, and we know it'll return no matches, so we don't # bother to capture the return value. get_similarity_data_from_seer(request_data) diff --git a/src/sentry/seer/similarity/similar_issues.py b/src/sentry/seer/similarity/similar_issues.py index 0b2fdda6c2a2a0..bdccbdcd5e9c23 100644 --- a/src/sentry/seer/similarity/similar_issues.py +++ b/src/sentry/seer/similarity/similar_issues.py @@ -134,6 +134,17 @@ def get_similarity_data_from_seer( ) return [] + # TODO: Temporary log to prove things are working as they should. This should come in a pair + # with the `get_seer_similar_issues.follow_up_seer_request` log in `seer.py`. + if referrer == "ingest_follow_up": + logger.info( + "get_similarity_data_from_seer.ingest_follow_up", + extra={ + "hash": request_hash, + "response_data": response_data, # Should always be an empty list + }, + ) + if not response_data: metrics.incr( "seer.similar_issues_request", From f856e96f12c208590a30944a5d11e96ef37dfd87 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Mon, 14 Apr 2025 15:14:37 -0700 Subject: [PATCH 4/4] add test --- .../test_get_seer_similar_issues.py | 68 ++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-) diff --git a/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py b/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py index b5e145940c2bf1..a17ec2d4798662 100644 --- a/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py +++ b/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py @@ -1,5 +1,5 @@ from typing import Any -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, call, patch from sentry import options from sentry.eventstore.models import Event @@ -112,6 +112,72 @@ def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock) {"hybrid_fingerprint": False}, ) + @patch("sentry.grouping.ingest.seer.metrics.incr") + def test_sends_second_seer_request_when_seer_matches_are_unusable( + self, mock_incr: MagicMock + ) -> None: + + existing_event = save_new_event( + {"message": "Dogs are great!", "fingerprint": ["{{ default }}", "maisey"]}, + self.project, + ) + assert existing_event.group_id + + new_event, new_variants, new_grouphash, new_stacktrace_string = create_new_event( + self.project + ) + + seer_result_data = [ + SeerSimilarIssueData( + parent_hash=existing_event.get_primary_hash(), + parent_group_id=existing_event.group_id, + stacktrace_distance=0.01, + should_group=True, + ) + ] + + with patch( + "sentry.grouping.ingest.seer.get_similarity_data_from_seer", + return_value=seer_result_data, + ) as mock_get_similarity_data: + get_seer_similar_issues(new_event, new_grouphash, new_variants) + + assert_metrics_call( + mock_incr, "get_seer_similar_issues", "no_matches_usable", {"is_hybrid": True} + ) + + base_request_params = { + "event_id": new_event.event_id, + "hash": new_event.get_primary_hash(), + "project_id": self.project.id, + "stacktrace": new_stacktrace_string, + "exception_type": "FailedToFetchError", + } + + assert mock_get_similarity_data.call_count == 2 + assert mock_get_similarity_data.mock_calls == [ + # Initial call to Seer + call( + { + **base_request_params, + "k": options.get("seer.similarity.ingest.num_matches_to_request"), + "referrer": "ingest", + "use_reranking": True, + }, + {"hybrid_fingerprint": False}, + ), + # Second call to store the event's data since the match that came back from Seer + # wasn't usable + call( + { + **base_request_params, + "k": 0, + "referrer": "ingest_follow_up", + "use_reranking": False, + } + ), + ] + class ParentGroupFoundTest(TestCase): @patch("sentry.grouping.ingest.seer.metrics.distribution")