Skip to content

Commit 6ad005a

Browse files
authored
ref(grouping): Add optional variants attribute to CalculatedHashes class (#68360)
As part of calculating grouping hashes for an event, we first put together "variants," which contain information about the results of applying various possible types of grouping (based on stacktrace, based on message, etc) to the event, as well as about how those results relate to each other in terms of what takes precedence and why, and though we store the resulting hashes on the event, we don't store the much larger `variants` data structure. This means that when we want access to variant data, we need to recalculate it. So far, it's only powered two features (other than the initial hash calculations) - showing grouping info at the bottom of the issue details page and making sure that seer-derived similar issues on the Similar Issues tab are based on the same normalized stacktrace used for grouping. In both cases, the relative infrequency with which those features are used and their separation from ingestion have made it worthwhile for us to pay the time penalty of the recalculation rather than store a whole bunch more data on every event. Now we want to use seer to find similar issues during ingestion as well, though. In the long run, that may mean that we end up storing some variant information on the event. In the short run, it means that at minimum, we don't want to have to calculate the variants twice during ingest. This therefore propagates variant information farther on in the ingest pipeline, by adding it to the `CalculatedHashes` data structure we use to carry hash values from the actual calculation machinery to where those values are used.
1 parent 974eae5 commit 6ad005a

File tree

3 files changed

+70
-2
lines changed

3 files changed

+70
-2
lines changed

src/sentry/eventstore/models.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,10 @@ def get_hashes(self, force_config: StrategyConfiguration | None = None) -> Calcu
368368
hierarchical_hashes = [hash_ for _, hash_ in hierarchical_hashes]
369369

370370
return CalculatedHashes(
371-
hashes=flat_hashes, hierarchical_hashes=hierarchical_hashes, tree_labels=tree_labels
371+
hashes=flat_hashes,
372+
hierarchical_hashes=hierarchical_hashes,
373+
tree_labels=tree_labels,
374+
variants=[*flat_variants, *hierarchical_variants],
372375
)
373376

374377
def get_sorted_grouping_variants(self, force_config: StrategyConfiguration | None = None):

src/sentry/grouping/result.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Any, Optional, TypedDict
44

55
from sentry.db.models import NodeData
6+
from sentry.grouping.variants import BaseVariant
67
from sentry.utils.safe import get_path, safe_execute, set_path
78

89
EventMetadata = dict[str, Any]
@@ -99,6 +100,7 @@ class CalculatedHashes:
99100
hashes: Sequence[str]
100101
hierarchical_hashes: Sequence[str]
101102
tree_labels: Sequence[TreeLabel | None]
103+
variants: list[tuple[str, BaseVariant]] | None = None
102104

103105
def write_to_event(self, event_data: NodeData) -> None:
104106
event_data["hashes"] = self.hashes

tests/sentry/eventstore/test_models.py

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
from sentry import eventstore, nodestore
77
from sentry.db.models.fields.node import NodeData, NodeIntegrityFailure
88
from sentry.eventstore.models import Event, GroupEvent
9-
from sentry.grouping.api import GroupingConfig
9+
from sentry.grouping.api import GroupingConfig, get_grouping_variants_for_event
1010
from sentry.grouping.enhancer import Enhancements
11+
from sentry.grouping.result import CalculatedHashes
12+
from sentry.grouping.utils import hash_from_values
1113
from sentry.interfaces.user import User
1214
from sentry.issues.issue_occurrence import IssueOccurrence
1315
from sentry.models.environment import Environment
@@ -21,6 +23,8 @@
2123

2224
pytestmark = [requires_snuba]
2325

26+
NEWSTYLE_GROUPING_CONFIG = "newstyle:2023-01-11"
27+
2428

2529
class EventTest(TestCase, PerformanceIssueTestCase):
2630
def test_pickling_compat(self):
@@ -394,6 +398,65 @@ def test_grouping_reset(self):
394398
v.as_dict()["hash"] for v in variants2.values()
395399
)
396400

401+
def test_get_hashes_pulls_existing_hashes(self):
402+
hashes = ["04e89719410791836f0a0bbf03bf0d2e"]
403+
event = Event(
404+
event_id="11212012123120120415201309082013",
405+
data={
406+
"message": "Dogs are great!",
407+
"hashes": hashes,
408+
},
409+
project_id=self.project.id,
410+
)
411+
412+
assert event.get_hashes() == CalculatedHashes(hashes, [], [], None)
413+
414+
def test_get_hashes_gets_hashes_and_variants_if_none_on_event(self):
415+
self.project.update_option("sentry:grouping_config", NEWSTYLE_GROUPING_CONFIG)
416+
event = Event(
417+
event_id="11212012123120120415201309082013",
418+
data={"message": "Dogs are great!"},
419+
project_id=self.project.id,
420+
)
421+
422+
calculated_hashes = event.get_hashes()
423+
expected_hash_values = [hash_from_values(["Dogs are great!"])]
424+
expected_variants = list(get_grouping_variants_for_event(event).items())
425+
426+
assert calculated_hashes.hashes == expected_hash_values
427+
assert (
428+
calculated_hashes.variants is not None
429+
and len(calculated_hashes.variants) == len(expected_variants) == 1
430+
)
431+
432+
variant_key, variant = calculated_hashes.variants[0]
433+
expected_variant_key, expected_variant = expected_variants[0]
434+
variant_dict = variant._get_metadata_as_dict()
435+
expected_variant_dict = expected_variant._get_metadata_as_dict()
436+
437+
assert variant_key == expected_variant_key == "default"
438+
assert (
439+
variant_dict["config"]["id"]
440+
== expected_variant_dict["config"]["id"]
441+
== NEWSTYLE_GROUPING_CONFIG
442+
)
443+
assert (
444+
variant_dict["component"]["id"] == expected_variant_dict["component"]["id"] == "default"
445+
)
446+
assert (
447+
len(variant_dict["component"]["values"])
448+
== len(expected_variant_dict["component"]["values"])
449+
== 1
450+
)
451+
452+
component_value = variant_dict["component"]["values"][0]
453+
expected_component_value = expected_variant_dict["component"]["values"][0]
454+
455+
assert component_value["id"] == expected_component_value["id"] == "message"
456+
assert (
457+
component_value["values"] == expected_component_value["values"] == ["Dogs are great!"]
458+
)
459+
397460

398461
class EventGroupsTest(TestCase):
399462
def test_none(self):

0 commit comments

Comments
 (0)