@@ -1593,6 +1593,244 @@ def _save_aggregate(
1593
1593
return GroupInfo (group , is_new , is_regression )
1594
1594
1595
1595
1596
+ def _save_aggregate_new (
1597
+ event : Event ,
1598
+ job : Job ,
1599
+ release : Release | None ,
1600
+ received_timestamp : int | float ,
1601
+ metric_tags : MutableTags ,
1602
+ ) -> GroupInfo | None :
1603
+ project = event .project
1604
+
1605
+ primary_hashes , secondary_hashes , hashes = get_hash_values (project , job , metric_tags )
1606
+
1607
+ # Now that we've used the current and possibly secondary grouping config(s) to calculate the
1608
+ # hashes, we're free to perform a config update if needed. Future events will use the new
1609
+ # config, but will also be grandfathered into the current config for a week, so as not to
1610
+ # erroneously create new groups.
1611
+ update_grouping_config_if_needed (project )
1612
+
1613
+ _materialize_metadata_many ([job ])
1614
+ metadata = dict (job ["event_metadata" ])
1615
+
1616
+ group_creation_kwargs = _get_group_creation_kwargs (job )
1617
+
1618
+ # Because this logic is not complex enough we want to special case the situation where we
1619
+ # migrate from a hierarchical hash to a non hierarchical hash. The reason being that
1620
+ # there needs to be special logic to not create orphaned hashes in migration cases
1621
+ # but it wants a different logic to implement splitting of hierarchical hashes.
1622
+ migrate_off_hierarchical = bool (
1623
+ secondary_hashes
1624
+ and secondary_hashes .hierarchical_hashes
1625
+ and not primary_hashes .hierarchical_hashes
1626
+ )
1627
+
1628
+ flat_grouphashes = [
1629
+ GroupHash .objects .get_or_create (project = project , hash = hash )[0 ] for hash in hashes .hashes
1630
+ ]
1631
+
1632
+ # The root_hierarchical_hash is the least specific hash within the tree, so
1633
+ # typically hierarchical_hashes[0], unless a hash `n` has been split in
1634
+ # which case `root_hierarchical_hash = hierarchical_hashes[n + 1]`. Chosing
1635
+ # this for select_for_update mostly provides sufficient synchronization
1636
+ # when groups are created and also relieves contention by locking a more
1637
+ # specific hash than `hierarchical_hashes[0]`.
1638
+ existing_grouphash , root_hierarchical_hash = find_existing_grouphash (
1639
+ project , flat_grouphashes , hashes .hierarchical_hashes
1640
+ )
1641
+
1642
+ if root_hierarchical_hash is not None :
1643
+ root_hierarchical_grouphash = GroupHash .objects .get_or_create (
1644
+ project = project , hash = root_hierarchical_hash
1645
+ )[0 ]
1646
+
1647
+ metadata .update (
1648
+ hashes .group_metadata_from_hash (
1649
+ existing_grouphash .hash
1650
+ if existing_grouphash is not None
1651
+ else root_hierarchical_hash
1652
+ )
1653
+ )
1654
+
1655
+ else :
1656
+ root_hierarchical_grouphash = None
1657
+
1658
+ # In principle the group gets the same metadata as the event, so common
1659
+ # attributes can be defined in eventtypes.
1660
+ #
1661
+ # Additionally the `last_received` key is set for group metadata, later in
1662
+ # _save_aggregate
1663
+ group_creation_kwargs ["data" ] = materialize_metadata (
1664
+ event .data ,
1665
+ get_event_type (event .data ),
1666
+ metadata ,
1667
+ )
1668
+ group_creation_kwargs ["data" ]["last_received" ] = received_timestamp
1669
+
1670
+ if existing_grouphash is None :
1671
+ if killswitch_matches_context (
1672
+ "store.load-shed-group-creation-projects" ,
1673
+ {
1674
+ "project_id" : project .id ,
1675
+ "platform" : event .platform ,
1676
+ },
1677
+ ):
1678
+ raise HashDiscarded ("Load shedding group creation" , reason = "load_shed" )
1679
+
1680
+ with sentry_sdk .start_span (
1681
+ op = "event_manager.create_group_transaction"
1682
+ ) as span , metrics .timer (
1683
+ "event_manager.create_group_transaction"
1684
+ ) as metric_tags , transaction .atomic (
1685
+ router .db_for_write (GroupHash )
1686
+ ):
1687
+ span .set_tag ("create_group_transaction.outcome" , "no_group" )
1688
+ metric_tags ["create_group_transaction.outcome" ] = "no_group"
1689
+
1690
+ all_grouphash_ids = [h .id for h in flat_grouphashes ]
1691
+ if root_hierarchical_grouphash is not None :
1692
+ all_grouphash_ids .append (root_hierarchical_grouphash .id )
1693
+
1694
+ all_grouphashes = list (
1695
+ GroupHash .objects .filter (id__in = all_grouphash_ids ).select_for_update ()
1696
+ )
1697
+
1698
+ flat_grouphashes = [gh for gh in all_grouphashes if gh .hash in hashes .hashes ]
1699
+
1700
+ existing_grouphash , root_hierarchical_hash = find_existing_grouphash (
1701
+ project , flat_grouphashes , hashes .hierarchical_hashes
1702
+ )
1703
+
1704
+ if root_hierarchical_hash is not None :
1705
+ root_hierarchical_grouphash = GroupHash .objects .get_or_create (
1706
+ project = project , hash = root_hierarchical_hash
1707
+ )[0 ]
1708
+ else :
1709
+ root_hierarchical_grouphash = None
1710
+
1711
+ if existing_grouphash is None :
1712
+ group = _create_group (project , event , ** group_creation_kwargs )
1713
+
1714
+ if (
1715
+ features .has ("projects:first-event-severity-calculation" , event .project )
1716
+ and group .data .get ("metadata" , {}).get ("severity" ) is None
1717
+ ):
1718
+ logger .error (
1719
+ "Group created without severity score" ,
1720
+ extra = {
1721
+ "event_id" : event .data ["event_id" ],
1722
+ "group_id" : group .id ,
1723
+ },
1724
+ )
1725
+
1726
+ if root_hierarchical_grouphash is not None :
1727
+ new_hashes = [root_hierarchical_grouphash ]
1728
+ else :
1729
+ new_hashes = list (flat_grouphashes )
1730
+
1731
+ GroupHash .objects .filter (id__in = [h .id for h in new_hashes ]).exclude (
1732
+ state = GroupHash .State .LOCKED_IN_MIGRATION
1733
+ ).update (group = group )
1734
+
1735
+ is_new = True
1736
+ is_regression = False
1737
+
1738
+ span .set_tag ("create_group_transaction.outcome" , "new_group" )
1739
+ metric_tags ["create_group_transaction.outcome" ] = "new_group"
1740
+
1741
+ metrics .incr (
1742
+ "group.created" ,
1743
+ skip_internal = True ,
1744
+ tags = {
1745
+ "platform" : event .platform or "unknown" ,
1746
+ "sdk" : normalized_sdk_tag_from_event (event ),
1747
+ },
1748
+ )
1749
+
1750
+ # This only applies to events with stacktraces
1751
+ frame_mix = event .get_event_metadata ().get ("in_app_frame_mix" )
1752
+ if frame_mix :
1753
+ metrics .incr (
1754
+ "grouping.in_app_frame_mix" ,
1755
+ sample_rate = 1.0 ,
1756
+ tags = {
1757
+ "platform" : event .platform or "unknown" ,
1758
+ "sdk" : normalized_sdk_tag_from_event (event ),
1759
+ "frame_mix" : frame_mix ,
1760
+ },
1761
+ )
1762
+
1763
+ return GroupInfo (group , is_new , is_regression )
1764
+
1765
+ group = Group .objects .get (id = existing_grouphash .group_id )
1766
+ if group .issue_category != GroupCategory .ERROR :
1767
+ logger .info (
1768
+ "event_manager.category_mismatch" ,
1769
+ extra = {
1770
+ "issue_category" : group .issue_category ,
1771
+ "event_type" : "error" ,
1772
+ },
1773
+ )
1774
+ return None
1775
+
1776
+ is_new = False
1777
+
1778
+ # For the migration from hierarchical to non hierarchical we want to associate
1779
+ # all group hashes
1780
+ if migrate_off_hierarchical :
1781
+ new_hashes = [h for h in flat_grouphashes if h .group_id is None ]
1782
+ if root_hierarchical_grouphash and root_hierarchical_grouphash .group_id is None :
1783
+ new_hashes .append (root_hierarchical_grouphash )
1784
+ elif root_hierarchical_grouphash is None :
1785
+ # No hierarchical grouping was run, only consider flat hashes
1786
+ new_hashes = [h for h in flat_grouphashes if h .group_id is None ]
1787
+ elif root_hierarchical_grouphash .group_id is None :
1788
+ # The root hash is not assigned to a group.
1789
+ # We ran multiple grouping algorithms
1790
+ # (see secondary grouping), and the hierarchical hash is new
1791
+ new_hashes = [root_hierarchical_grouphash ]
1792
+ else :
1793
+ new_hashes = []
1794
+
1795
+ if new_hashes :
1796
+ # There may still be secondary hashes that we did not use to find an
1797
+ # existing group. A classic example is when grouping makes changes to
1798
+ # the app-hash (changes to in_app logic), but the system hash stays
1799
+ # stable and is used to find an existing group. Associate any new
1800
+ # hashes with the group such that event saving continues to be
1801
+ # resilient against grouping algorithm changes.
1802
+ #
1803
+ # There is a race condition here where two processes could "steal"
1804
+ # hashes from each other. In practice this should not be user-visible
1805
+ # as group creation is synchronized. Meaning the only way hashes could
1806
+ # jump between groups is if there were two processes that:
1807
+ #
1808
+ # 1) have BOTH found an existing group
1809
+ # (otherwise at least one of them would be in the group creation
1810
+ # codepath which has transaction isolation/acquires row locks)
1811
+ # 2) AND are looking at the same set, or an overlapping set of hashes
1812
+ # (otherwise they would not operate on the same rows)
1813
+ # 3) yet somehow also sort their event into two different groups each
1814
+ # (otherwise the update would not change anything)
1815
+ #
1816
+ # We think this is a very unlikely situation. A previous version of
1817
+ # _save_aggregate had races around group creation which made this race
1818
+ # more user visible. For more context, see 84c6f75a and d0e22787, as
1819
+ # well as GH-5085.
1820
+ GroupHash .objects .filter (id__in = [h .id for h in new_hashes ]).exclude (
1821
+ state = GroupHash .State .LOCKED_IN_MIGRATION
1822
+ ).update (group = group )
1823
+
1824
+ is_regression = _process_existing_aggregate (
1825
+ group = group ,
1826
+ event = event ,
1827
+ incoming_group_values = group_creation_kwargs ,
1828
+ release = release ,
1829
+ )
1830
+
1831
+ return GroupInfo (group , is_new , is_regression )
1832
+
1833
+
1596
1834
def _create_group (project : Project , event : Event , ** kwargs : Any ) -> Group :
1597
1835
try :
1598
1836
short_id = project .next_short_id ()
0 commit comments