Skip to content

fix(seer grouping): Store events with unusable Seer matches in Seer (take 2) #89563

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,32 @@ def get_seer_similar_issues(
seer_match_status = "match_found"
break

# If Seer sent back matches, that means it didn't store the incoming event's data in its
# database. But if we then weren't able to use any of the matches Seer sent back, we do actually
# want a Seer record to be created, so that future events with this fingerprint have something
# with which to match.
if seer_match_status == "no_matches_usable" and options.get(
"seer.similarity.ingest.store_hybrid_fingerprint_non_matches"
):
request_data = {
**request_data,
"referrer": "ingest_follow_up",
# By asking Seer to find zero matches, we can trick it into thinking there aren't
# any, thereby forcing it to create the record
"k": 0,
# Turn off re-ranking to speed up the process of finding nothing
"use_reranking": False,
}

# TODO: Temporary log to prove things are working as they should. This should come in a pair
# with the `get_similarity_data_from_seer.ingest_follow_up` log in `similar_issues.py`,
# which should show that no matches are returned.
logger.info("get_seer_similar_issues.follow_up_seer_request", extra={"hash": event_hash})

# We only want this for the side effect, and we know it'll return no matches, so we don't
# bother to capture the return value.
get_similarity_data_from_seer(request_data)

is_hybrid_fingerprint_case = (
event_has_hybrid_fingerprint
# This means we had to reject at least one match because it was a hybrid even though the
Expand Down
10 changes: 10 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,16 @@
flags=FLAG_AUTOMATOR_MODIFIABLE,
)

# Temporary killswitch for making a second request to Seer to store the incoming event when we have
# a hybrid fingerprint and none of the matches returned by Seer is a fingerprint match
register(
"seer.similarity.ingest.store_hybrid_fingerprint_non_matches",
type=Bool,
default=True,
flags=FLAG_AUTOMATOR_MODIFIABLE,
)


# TODO: Once Seer grouping is GA-ed, we probably either want to turn this down or get rid of it in
# favor of the default 10% sample rate
register(
Expand Down
11 changes: 11 additions & 0 deletions src/sentry/seer/similarity/similar_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,17 @@ def get_similarity_data_from_seer(
)
return []

# TODO: Temporary log to prove things are working as they should. This should come in a pair
# with the `get_seer_similar_issues.follow_up_seer_request` log in `seer.py`.
if referrer == "ingest_follow_up":
logger.info(
"get_similarity_data_from_seer.ingest_follow_up",
extra={
"hash": request_hash,
"response_data": response_data, # Should always be an empty list
},
)

if not response_data:
metrics.incr(
"seer.similar_issues_request",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from typing import Any
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock, call, patch

from sentry import options
from sentry.eventstore.models import Event
Expand Down Expand Up @@ -112,6 +112,72 @@ def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock)
{"hybrid_fingerprint": False},
)

@patch("sentry.grouping.ingest.seer.metrics.incr")
def test_sends_second_seer_request_when_seer_matches_are_unusable(
self, mock_incr: MagicMock
) -> None:

existing_event = save_new_event(
{"message": "Dogs are great!", "fingerprint": ["{{ default }}", "maisey"]},
self.project,
)
assert existing_event.group_id

new_event, new_variants, new_grouphash, new_stacktrace_string = create_new_event(
self.project
)

seer_result_data = [
SeerSimilarIssueData(
parent_hash=existing_event.get_primary_hash(),
parent_group_id=existing_event.group_id,
stacktrace_distance=0.01,
should_group=True,
)
]

with patch(
"sentry.grouping.ingest.seer.get_similarity_data_from_seer",
return_value=seer_result_data,
) as mock_get_similarity_data:
get_seer_similar_issues(new_event, new_grouphash, new_variants)

assert_metrics_call(
mock_incr, "get_seer_similar_issues", "no_matches_usable", {"is_hybrid": True}
)

base_request_params = {
"event_id": new_event.event_id,
"hash": new_event.get_primary_hash(),
"project_id": self.project.id,
"stacktrace": new_stacktrace_string,
"exception_type": "FailedToFetchError",
}

assert mock_get_similarity_data.call_count == 2
assert mock_get_similarity_data.mock_calls == [
# Initial call to Seer
call(
{
**base_request_params,
"k": options.get("seer.similarity.ingest.num_matches_to_request"),
"referrer": "ingest",
"use_reranking": True,
},
{"hybrid_fingerprint": False},
),
# Second call to store the event's data since the match that came back from Seer
# wasn't usable
call(
{
**base_request_params,
"k": 0,
"referrer": "ingest_follow_up",
"use_reranking": False,
}
),
]


class ParentGroupFoundTest(TestCase):
@patch("sentry.grouping.ingest.seer.metrics.distribution")
Expand Down
Loading