Skip to content

Commit f928193

Browse files
authored
fix(ingest): Repair broken title data (#69398)
Last week, we ran into a bug wherein we were mistakenly classifying error events as default (message) events. Because the two types of events have slightly different schema, that meant that we were looking in the wrong place for title data, and as a result ended up with a whole mess of events all titled `<unlabled event>`. Because groups update their title with each new event that comes in, that meant we also had a bunch of groups called `<unlabeled event>`, with the result that the issue stream became an unusable sea of indistinguishable issues. We quickly realized the problem and fixed the bug, but while that meant events started coming in with correct titles again, the groups weren't fully updating themselves. Why? Because it turns out that default events add title information to `group.data["metadata"]` as well as to `group.data` itself, where as error events only add it to `group.data`. This meant that as new events came in only half of the bad title data was getting overwritten by good title data, while the bad data in `group.data["metadata"]` was untouched. This fixes that by allowing `None` to overwrite existing title data, provided the existing title data is `<unlabeled event>` or one of its cousins (`<unknown>`, `<untitled>`, etc.) It also aims to prevent a similar thing from happening in the future, by not allowing `<unlabeled event>` (et al) to overwrite either a real title or a `None`. The same fix is applied to `group.message`, which comes in part from the title. Note: To determine if a title is worthwhile, I reused the collection of titles we use when checking if an event's message is worth sending to seer or not. To make it a little clearer what they're about, I renamed the constant to `PLACEHOLDER_TITLES`, and to make things ever-so-slightly more efficient, I changed it from a list to a set.
1 parent 75d7f60 commit f928193

File tree

3 files changed

+264
-14
lines changed

3 files changed

+264
-14
lines changed

src/sentry/event_manager.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@
147147
# Timeout for cached group crash report counts
148148
CRASH_REPORT_TIMEOUT = 24 * 3600 # one day
149149

150-
NON_TITLE_EVENT_TITLES = ["<untitled>", "<unknown>", "<unlabeled event>", "Error"]
150+
PLACEHOLDER_EVENT_TITLES = frozenset(["<untitled>", "<unknown>", "<unlabeled event>", "Error"])
151151

152152
HIGH_SEVERITY_THRESHOLD = 0.1
153153

@@ -2126,6 +2126,48 @@ def _handle_regression(group: Group, event: BaseEvent, release: Release | None)
21262126
return is_regression
21272127

21282128

2129+
def _is_placeholder_title(title):
2130+
return title in PLACEHOLDER_EVENT_TITLES
2131+
2132+
2133+
def _is_real_title(title):
2134+
return bool(title) and title not in PLACEHOLDER_EVENT_TITLES
2135+
2136+
2137+
def _get_updated_group_title(existing_container, incoming_container):
2138+
"""
2139+
Given either `group.data` or `group.data["metadata"]`, in both existing and incoming forms, pick
2140+
the correct title to use when updating the group. Uses the incoming title (or `None` if there
2141+
isn't one) except in the case where a placeholder title (`<unlabeled event>`, `<untitled>`,
2142+
etc) would be replacing a non-placeholder title (either `None` or a real title).
2143+
2144+
This stems from an incident during which we were interpreting error events as default-type
2145+
events and thereby overwriting good titles with placeholder ones and inserting placeholder
2146+
titles where there shouldn't have been a title at all. (The second case matters because
2147+
default-type and error-type events differ in where they include a `title` attribute, and we
2148+
count on the lack of a `title` attribute in certain cases as well as the presence of one.) This
2149+
prevents that from happening in the future and will delete errant placeholder titles by
2150+
overwriting them with `None`.
2151+
"""
2152+
2153+
existing_title = existing_container.get("title")
2154+
incoming_title = incoming_container.get("title")
2155+
2156+
return (
2157+
incoming_title
2158+
if (
2159+
# Real titles beat both placeholder and non-existent titles
2160+
_is_real_title(incoming_title)
2161+
or
2162+
# Conversely, placeholder titles lose to both real titles and lack of a title (the
2163+
# latter in order to fix the regression caused by error events being interpreted as
2164+
# default-type events)
2165+
_is_placeholder_title(existing_title)
2166+
)
2167+
else existing_title
2168+
)
2169+
2170+
21292171
def _process_existing_aggregate(
21302172
group: Group,
21312173
event: BaseEvent,
@@ -2141,6 +2183,7 @@ def _process_existing_aggregate(
21412183
if (
21422184
event.search_message
21432185
and event.search_message != group.message
2186+
and not _is_placeholder_title(event.search_message)
21442187
and event.get_event_type() != TransactionEvent.key
21452188
):
21462189
updated_group_values["message"] = event.search_message
@@ -2157,19 +2200,25 @@ def _process_existing_aggregate(
21572200

21582201
is_regression = _handle_regression(group, event, release)
21592202

2160-
# Merge new data with existing data
2161-
incoming_data = incoming_group_values["data"]
2162-
incoming_metadata = incoming_group_values["data"].get("metadata", {})
2163-
21642203
existing_data = group.data
2165-
# Grab a reference to this before it gets clobbered when we update `existing_data`
21662204
existing_metadata = group.data.get("metadata", {})
21672205

2168-
existing_data.update(incoming_data)
2169-
existing_metadata.update(incoming_metadata)
2206+
incoming_data = incoming_group_values["data"]
2207+
incoming_metadata = incoming_group_values["data"].get("metadata", {})
21702208

2171-
updated_group_values["data"] = existing_data
2172-
updated_group_values["data"]["metadata"] = existing_metadata
2209+
# Merge old and new data/metadata, keeping the existing title if the incoming title is a
2210+
# placeholder (`<unlabeled event`, `<untitled>`, etc.) and the existing one isn't. See
2211+
# `_get_updated_group_title` docstring.
2212+
updated_group_values["data"] = {
2213+
**existing_data,
2214+
**incoming_data,
2215+
"title": _get_updated_group_title(existing_data, incoming_data),
2216+
}
2217+
updated_group_values["data"]["metadata"] = {
2218+
**existing_metadata,
2219+
**incoming_metadata,
2220+
"title": _get_updated_group_title(existing_metadata, incoming_metadata),
2221+
}
21732222

21742223
update_kwargs = {"times_seen": 1}
21752224

@@ -2364,11 +2413,11 @@ def _get_severity_score(event: Event) -> tuple[float, str]:
23642413
title = event.title
23652414

23662415
# If the event hasn't yet been given a helpful title, attempt to calculate one
2367-
if title in NON_TITLE_EVENT_TITLES:
2416+
if title in PLACEHOLDER_EVENT_TITLES:
23682417
title = event_type.get_title(metadata)
23692418

23702419
# If there's still nothing helpful to be had, bail
2371-
if title in NON_TITLE_EVENT_TITLES:
2420+
if title in PLACEHOLDER_EVENT_TITLES:
23722421
logger_data.update(
23732422
{"event_type": event_type.key, "event_title": event.title, "computed_title": title}
23742423
)

tests/sentry/event_manager/test_event_manager_grouping.py

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,15 @@
55
from unittest import mock
66
from unittest.mock import MagicMock
77

8+
import pytest
9+
10+
from sentry.event_manager import _get_updated_group_title
11+
from sentry.eventtypes.base import DefaultEvent
812
from sentry.grouping.result import CalculatedHashes
913
from sentry.models.group import Group
1014
from sentry.testutils.cases import TestCase
1115
from sentry.testutils.helpers.eventprocessing import save_new_event
16+
from sentry.testutils.pytest.fixtures import django_db_all
1217
from sentry.testutils.skips import requires_snuba
1318

1419
pytestmark = [requires_snuba]
@@ -113,6 +118,202 @@ def test_updates_group_metadata(self):
113118
assert group.message == event2.message
114119

115120

121+
class PlaceholderTitleTest(TestCase):
122+
"""
123+
Tests for a bug where error events were interpreted as default-type events and therefore all
124+
came out with a placeholder title.
125+
"""
126+
127+
def test_fixes_broken_title_data(self):
128+
# An event before the bug was introduced
129+
event1 = save_new_event(
130+
{
131+
"exception": {
132+
"values": [{"type": "DogsAreNeverAnError", "value": "Dogs are great!"}],
133+
},
134+
# Use a fingerprint to guarantee all events end up in the same group
135+
"fingerprint": ["adopt don't shop"],
136+
},
137+
self.project,
138+
)
139+
140+
group = Group.objects.get(id=event1.group_id)
141+
142+
assert group.title == event1.title == "DogsAreNeverAnError: Dogs are great!"
143+
assert group.data["title"] == event1.data["title"] == "DogsAreNeverAnError: Dogs are great!"
144+
assert group.data["metadata"].get("title") is event1.data["metadata"].get("title") is None
145+
assert group.message == "Dogs are great! DogsAreNeverAnError"
146+
147+
# Simulate the bug
148+
with mock.patch(
149+
"sentry.event_manager.get_event_type",
150+
return_value=DefaultEvent(),
151+
):
152+
# Neutralize the data fixes by making them unable to recognize a bad title and by
153+
# unconditionally using the incoming title
154+
with (
155+
mock.patch(
156+
"sentry.event_manager._is_placeholder_title",
157+
return_value=False,
158+
),
159+
mock.patch(
160+
"sentry.event_manager._get_updated_group_title",
161+
new=lambda existing_container, incoming_container: incoming_container.get(
162+
"title"
163+
),
164+
),
165+
):
166+
event2 = save_new_event(
167+
{
168+
"exception": {
169+
"values": [{"type": "DogsAreNeverAnError", "value": "Maisey is silly"}],
170+
},
171+
"fingerprint": ["adopt don't shop"],
172+
},
173+
self.project,
174+
)
175+
176+
assert event2.group_id == event1.group_id
177+
178+
# Pull the group again to get updated data
179+
group = Group.objects.get(id=event2.group_id)
180+
181+
# As expected, without the fixes, the bug screws up both the event and group data. (Compare
182+
# this to the next test, where the fixes are left in place, and the group remains untouched.)
183+
assert group.title == event2.title == "<unlabeled event>"
184+
assert group.data["title"] == event2.data["title"] == "<unlabeled event>"
185+
assert (
186+
group.data["metadata"].get("title")
187+
== event2.data["metadata"].get("title")
188+
== "<unlabeled event>"
189+
)
190+
assert group.message == "<unlabeled event>"
191+
192+
# Now that we have a group with bad data, return to the current world - where the bug has
193+
# been fixed and the data fix is also in place - and we can see that the group's data
194+
# returns to what it should be
195+
event3 = save_new_event(
196+
{
197+
"exception": {
198+
"values": [{"type": "DogsAreNeverAnError", "value": "Charlie is goofy"}],
199+
},
200+
"fingerprint": ["adopt don't shop"],
201+
},
202+
self.project,
203+
)
204+
205+
assert event3.group_id == event2.group_id == event1.group_id
206+
207+
# Pull the group again to get updated data
208+
group = Group.objects.get(id=event3.group_id)
209+
210+
# Title data is updated with values from newest event, and is back to the structure it was
211+
# before the bug
212+
assert group.title == event3.title == "DogsAreNeverAnError: Charlie is goofy"
213+
assert (
214+
group.data["title"] == event3.data["title"] == "DogsAreNeverAnError: Charlie is goofy"
215+
)
216+
assert group.data["metadata"].get("title") is event3.data["metadata"].get("title") is None
217+
assert group.message == "Charlie is goofy DogsAreNeverAnError"
218+
219+
# This is the same as the data-fixing test above, except that the fix is left in place when
220+
# the bug happens, and so the bad titles never get saved on the group
221+
def test_bug_regression_no_longer_breaks_titles(self):
222+
# An event before the bug was introduced
223+
event1 = save_new_event(
224+
{
225+
"exception": {
226+
"values": [{"type": "DogsAreNeverAnError", "value": "Dogs are great!"}],
227+
},
228+
# Use a fingerprint to guarantee all events end up in the same group
229+
"fingerprint": ["adopt don't shop"],
230+
},
231+
self.project,
232+
)
233+
234+
group = Group.objects.get(id=event1.group_id)
235+
236+
assert group.title == event1.title == "DogsAreNeverAnError: Dogs are great!"
237+
assert group.data["title"] == event1.data["title"] == "DogsAreNeverAnError: Dogs are great!"
238+
assert group.data["metadata"].get("title") is event1.data["metadata"].get("title") is None
239+
assert group.message == "Dogs are great! DogsAreNeverAnError"
240+
241+
# Simulate the bug, but with the fix in place
242+
with mock.patch(
243+
"sentry.event_manager.get_event_type",
244+
return_value=DefaultEvent(),
245+
):
246+
event2 = save_new_event(
247+
{
248+
"exception": {
249+
"values": [{"type": "DogsAreNeverAnError", "value": "Maisey is silly"}],
250+
},
251+
"fingerprint": ["adopt don't shop"],
252+
},
253+
self.project,
254+
)
255+
256+
assert event2.group_id == event1.group_id
257+
258+
# Pull the group again to get updated data
259+
group = Group.objects.get(id=event2.group_id)
260+
261+
# The event may be messed up, but it didn't mess up the group
262+
assert event2.title == "<unlabeled event>"
263+
assert group.title == "DogsAreNeverAnError: Dogs are great!"
264+
assert event2.data["title"] == "<unlabeled event>"
265+
assert group.data["title"] == "DogsAreNeverAnError: Dogs are great!"
266+
assert group.data["metadata"].get("title") is None
267+
assert event2.data["metadata"].get("title") == "<unlabeled event>"
268+
assert group.message == "Dogs are great! DogsAreNeverAnError"
269+
270+
# An event after the bug was fixed
271+
event3 = save_new_event(
272+
{
273+
"exception": {
274+
"values": [{"type": "DogsAreNeverAnError", "value": "Charlie is goofy"}],
275+
},
276+
"fingerprint": ["adopt don't shop"],
277+
},
278+
self.project,
279+
)
280+
281+
assert event3.group_id == event2.group_id == event1.group_id
282+
283+
# Pull the group again to get updated data
284+
group = Group.objects.get(id=event3.group_id)
285+
286+
# Title data is updated with values from newest event
287+
assert group.title == event3.title == "DogsAreNeverAnError: Charlie is goofy"
288+
assert (
289+
group.data["title"] == event3.data["title"] == "DogsAreNeverAnError: Charlie is goofy"
290+
)
291+
assert group.data["metadata"].get("title") is event3.data["metadata"].get("title") is None
292+
assert group.message == "Charlie is goofy DogsAreNeverAnError"
293+
294+
295+
@django_db_all
296+
@pytest.mark.parametrize(
297+
["existing_title", "incoming_title", "expected_title"],
298+
[
299+
("Dogs are great!", "Adopt don't shop", "Adopt don't shop"),
300+
("Dogs are great!", "<untitled>", "Dogs are great!"),
301+
("Dogs are great!", None, "Dogs are great!"),
302+
("<unlabeled event>", "Adopt don't shop", "Adopt don't shop"),
303+
("<unlabeled event>", "<untitled>", "<untitled>"),
304+
("<unlabeled event>", None, None),
305+
(None, "Adopt don't shop", "Adopt don't shop"),
306+
(None, "<untitled>", None),
307+
(None, None, None),
308+
],
309+
)
310+
def test_get_updated_group_title(existing_title, incoming_title, expected_title):
311+
existing_data = {"title": existing_title} if existing_title is not None else {}
312+
incoming_data = {"title": incoming_title} if incoming_title is not None else {}
313+
314+
assert _get_updated_group_title(existing_data, incoming_data) == expected_title
315+
316+
116317
class EventManagerGroupingMetricsTest(TestCase):
117318
@mock.patch("sentry.event_manager.metrics.incr")
118319
def test_records_avg_calculations_per_event_metrics(self, mock_metrics_incr: MagicMock):

tests/sentry/event_manager/test_severity.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from sentry import options
1313
from sentry.event_manager import (
14-
NON_TITLE_EVENT_TITLES,
14+
PLACEHOLDER_EVENT_TITLES,
1515
SEER_ERROR_COUNT_KEY,
1616
EventManager,
1717
_get_severity_score,
@@ -207,7 +207,7 @@ def test_unusable_event_title(
207207
mock_logger_warning: MagicMock,
208208
mock_urlopen: MagicMock,
209209
) -> None:
210-
for title in NON_TITLE_EVENT_TITLES:
210+
for title in PLACEHOLDER_EVENT_TITLES:
211211
manager = EventManager(make_event(exception={"values": []}, platform="python"))
212212
event = manager.save(self.project.id)
213213
# `title` is a property with no setter, but it pulls from `metadata`, so it's equivalent

0 commit comments

Comments
 (0)