Skip to content

Commit 07c3634

Browse files
committed
copy _save_aggregate logic to new function
1 parent df6d7df commit 07c3634

File tree

1 file changed

+238
-0
lines changed

1 file changed

+238
-0
lines changed

src/sentry/event_manager.py

Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,6 +1593,244 @@ def _save_aggregate(
15931593
return GroupInfo(group, is_new, is_regression)
15941594

15951595

1596+
def _save_aggregate_new(
1597+
event: Event,
1598+
job: Job,
1599+
release: Release | None,
1600+
received_timestamp: int | float,
1601+
metric_tags: MutableTags,
1602+
) -> GroupInfo | None:
1603+
project = event.project
1604+
1605+
primary_hashes, secondary_hashes, hashes = get_hash_values(project, job, metric_tags)
1606+
1607+
# Now that we've used the current and possibly secondary grouping config(s) to calculate the
1608+
# hashes, we're free to perform a config update if needed. Future events will use the new
1609+
# config, but will also be grandfathered into the current config for a week, so as not to
1610+
# erroneously create new groups.
1611+
update_grouping_config_if_needed(project)
1612+
1613+
_materialize_metadata_many([job])
1614+
metadata = dict(job["event_metadata"])
1615+
1616+
group_creation_kwargs = _get_group_creation_kwargs(job)
1617+
1618+
# Because this logic is not complex enough we want to special case the situation where we
1619+
# migrate from a hierarchical hash to a non hierarchical hash. The reason being that
1620+
# there needs to be special logic to not create orphaned hashes in migration cases
1621+
# but it wants a different logic to implement splitting of hierarchical hashes.
1622+
migrate_off_hierarchical = bool(
1623+
secondary_hashes
1624+
and secondary_hashes.hierarchical_hashes
1625+
and not primary_hashes.hierarchical_hashes
1626+
)
1627+
1628+
flat_grouphashes = [
1629+
GroupHash.objects.get_or_create(project=project, hash=hash)[0] for hash in hashes.hashes
1630+
]
1631+
1632+
# The root_hierarchical_hash is the least specific hash within the tree, so
1633+
# typically hierarchical_hashes[0], unless a hash `n` has been split in
1634+
# which case `root_hierarchical_hash = hierarchical_hashes[n + 1]`. Chosing
1635+
# this for select_for_update mostly provides sufficient synchronization
1636+
# when groups are created and also relieves contention by locking a more
1637+
# specific hash than `hierarchical_hashes[0]`.
1638+
existing_grouphash, root_hierarchical_hash = find_existing_grouphash(
1639+
project, flat_grouphashes, hashes.hierarchical_hashes
1640+
)
1641+
1642+
if root_hierarchical_hash is not None:
1643+
root_hierarchical_grouphash = GroupHash.objects.get_or_create(
1644+
project=project, hash=root_hierarchical_hash
1645+
)[0]
1646+
1647+
metadata.update(
1648+
hashes.group_metadata_from_hash(
1649+
existing_grouphash.hash
1650+
if existing_grouphash is not None
1651+
else root_hierarchical_hash
1652+
)
1653+
)
1654+
1655+
else:
1656+
root_hierarchical_grouphash = None
1657+
1658+
# In principle the group gets the same metadata as the event, so common
1659+
# attributes can be defined in eventtypes.
1660+
#
1661+
# Additionally the `last_received` key is set for group metadata, later in
1662+
# _save_aggregate
1663+
group_creation_kwargs["data"] = materialize_metadata(
1664+
event.data,
1665+
get_event_type(event.data),
1666+
metadata,
1667+
)
1668+
group_creation_kwargs["data"]["last_received"] = received_timestamp
1669+
1670+
if existing_grouphash is None:
1671+
if killswitch_matches_context(
1672+
"store.load-shed-group-creation-projects",
1673+
{
1674+
"project_id": project.id,
1675+
"platform": event.platform,
1676+
},
1677+
):
1678+
raise HashDiscarded("Load shedding group creation", reason="load_shed")
1679+
1680+
with sentry_sdk.start_span(
1681+
op="event_manager.create_group_transaction"
1682+
) as span, metrics.timer(
1683+
"event_manager.create_group_transaction"
1684+
) as metric_tags, transaction.atomic(
1685+
router.db_for_write(GroupHash)
1686+
):
1687+
span.set_tag("create_group_transaction.outcome", "no_group")
1688+
metric_tags["create_group_transaction.outcome"] = "no_group"
1689+
1690+
all_grouphash_ids = [h.id for h in flat_grouphashes]
1691+
if root_hierarchical_grouphash is not None:
1692+
all_grouphash_ids.append(root_hierarchical_grouphash.id)
1693+
1694+
all_grouphashes = list(
1695+
GroupHash.objects.filter(id__in=all_grouphash_ids).select_for_update()
1696+
)
1697+
1698+
flat_grouphashes = [gh for gh in all_grouphashes if gh.hash in hashes.hashes]
1699+
1700+
existing_grouphash, root_hierarchical_hash = find_existing_grouphash(
1701+
project, flat_grouphashes, hashes.hierarchical_hashes
1702+
)
1703+
1704+
if root_hierarchical_hash is not None:
1705+
root_hierarchical_grouphash = GroupHash.objects.get_or_create(
1706+
project=project, hash=root_hierarchical_hash
1707+
)[0]
1708+
else:
1709+
root_hierarchical_grouphash = None
1710+
1711+
if existing_grouphash is None:
1712+
group = _create_group(project, event, **group_creation_kwargs)
1713+
1714+
if (
1715+
features.has("projects:first-event-severity-calculation", event.project)
1716+
and group.data.get("metadata", {}).get("severity") is None
1717+
):
1718+
logger.error(
1719+
"Group created without severity score",
1720+
extra={
1721+
"event_id": event.data["event_id"],
1722+
"group_id": group.id,
1723+
},
1724+
)
1725+
1726+
if root_hierarchical_grouphash is not None:
1727+
new_hashes = [root_hierarchical_grouphash]
1728+
else:
1729+
new_hashes = list(flat_grouphashes)
1730+
1731+
GroupHash.objects.filter(id__in=[h.id for h in new_hashes]).exclude(
1732+
state=GroupHash.State.LOCKED_IN_MIGRATION
1733+
).update(group=group)
1734+
1735+
is_new = True
1736+
is_regression = False
1737+
1738+
span.set_tag("create_group_transaction.outcome", "new_group")
1739+
metric_tags["create_group_transaction.outcome"] = "new_group"
1740+
1741+
metrics.incr(
1742+
"group.created",
1743+
skip_internal=True,
1744+
tags={
1745+
"platform": event.platform or "unknown",
1746+
"sdk": normalized_sdk_tag_from_event(event),
1747+
},
1748+
)
1749+
1750+
# This only applies to events with stacktraces
1751+
frame_mix = event.get_event_metadata().get("in_app_frame_mix")
1752+
if frame_mix:
1753+
metrics.incr(
1754+
"grouping.in_app_frame_mix",
1755+
sample_rate=1.0,
1756+
tags={
1757+
"platform": event.platform or "unknown",
1758+
"sdk": normalized_sdk_tag_from_event(event),
1759+
"frame_mix": frame_mix,
1760+
},
1761+
)
1762+
1763+
return GroupInfo(group, is_new, is_regression)
1764+
1765+
group = Group.objects.get(id=existing_grouphash.group_id)
1766+
if group.issue_category != GroupCategory.ERROR:
1767+
logger.info(
1768+
"event_manager.category_mismatch",
1769+
extra={
1770+
"issue_category": group.issue_category,
1771+
"event_type": "error",
1772+
},
1773+
)
1774+
return None
1775+
1776+
is_new = False
1777+
1778+
# For the migration from hierarchical to non hierarchical we want to associate
1779+
# all group hashes
1780+
if migrate_off_hierarchical:
1781+
new_hashes = [h for h in flat_grouphashes if h.group_id is None]
1782+
if root_hierarchical_grouphash and root_hierarchical_grouphash.group_id is None:
1783+
new_hashes.append(root_hierarchical_grouphash)
1784+
elif root_hierarchical_grouphash is None:
1785+
# No hierarchical grouping was run, only consider flat hashes
1786+
new_hashes = [h for h in flat_grouphashes if h.group_id is None]
1787+
elif root_hierarchical_grouphash.group_id is None:
1788+
# The root hash is not assigned to a group.
1789+
# We ran multiple grouping algorithms
1790+
# (see secondary grouping), and the hierarchical hash is new
1791+
new_hashes = [root_hierarchical_grouphash]
1792+
else:
1793+
new_hashes = []
1794+
1795+
if new_hashes:
1796+
# There may still be secondary hashes that we did not use to find an
1797+
# existing group. A classic example is when grouping makes changes to
1798+
# the app-hash (changes to in_app logic), but the system hash stays
1799+
# stable and is used to find an existing group. Associate any new
1800+
# hashes with the group such that event saving continues to be
1801+
# resilient against grouping algorithm changes.
1802+
#
1803+
# There is a race condition here where two processes could "steal"
1804+
# hashes from each other. In practice this should not be user-visible
1805+
# as group creation is synchronized. Meaning the only way hashes could
1806+
# jump between groups is if there were two processes that:
1807+
#
1808+
# 1) have BOTH found an existing group
1809+
# (otherwise at least one of them would be in the group creation
1810+
# codepath which has transaction isolation/acquires row locks)
1811+
# 2) AND are looking at the same set, or an overlapping set of hashes
1812+
# (otherwise they would not operate on the same rows)
1813+
# 3) yet somehow also sort their event into two different groups each
1814+
# (otherwise the update would not change anything)
1815+
#
1816+
# We think this is a very unlikely situation. A previous version of
1817+
# _save_aggregate had races around group creation which made this race
1818+
# more user visible. For more context, see 84c6f75a and d0e22787, as
1819+
# well as GH-5085.
1820+
GroupHash.objects.filter(id__in=[h.id for h in new_hashes]).exclude(
1821+
state=GroupHash.State.LOCKED_IN_MIGRATION
1822+
).update(group=group)
1823+
1824+
is_regression = _process_existing_aggregate(
1825+
group=group,
1826+
event=event,
1827+
incoming_group_values=group_creation_kwargs,
1828+
release=release,
1829+
)
1830+
1831+
return GroupInfo(group, is_new, is_regression)
1832+
1833+
15961834
def _create_group(project: Project, event: Event, **kwargs: Any) -> Group:
15971835
try:
15981836
short_id = project.next_short_id()

0 commit comments

Comments
 (0)