Skip to content

Commit 00c1984

Browse files
lobsterkatiec298lee
authored andcommitted
chore(grouping): Various small pre-seer-call fixes (#68560)
This is a bunch of small, mostly cosmetic fixes pulled out of upcoming seer-grouping-related PRs to make them easier to review. Included changes (none behavior-changing): - Add a docstring to `get_relevant_metrics_calls` test helper. - Remove empty `if TYPE_CHECKING` block accidentally leftover from splitting up the `grouping.ingest` module. - Add parentheses to clarify a ternary in `assign_to_group` tests. - Add a comment about removing `extract_hashes` once the hierarchical grouping code is gone and one about the locking outcome metric. - Copy the explanation of the locking mechanism from `_save_aggregate_new` into `_save_aggregate`. - Rename `create_and_seek_grouphashes` to `get_hashes_and_grouphashes`. - Make the linter chill out by: - using `startswith` rather than `[:1]` in `get_grouping_component_variants`, - removing the unused `projects` parameter in both `_get_or_create_group_environment_many` and `_get_or_create_group_release_many`, and - renaming variables in `get_default_grouping_config_dict` and `get_grouping_component_variants` to shop shadowing built-ins.
1 parent db63e9f commit 00c1984

File tree

7 files changed

+40
-19
lines changed

7 files changed

+40
-19
lines changed

src/sentry/event_manager.py

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -570,10 +570,10 @@ def save_error_events(
570570
job["event"].data.bind_ref(job["event"])
571571

572572
_get_or_create_environment_many(jobs, projects)
573-
_get_or_create_group_environment_many(jobs, projects)
573+
_get_or_create_group_environment_many(jobs)
574574
_get_or_create_release_associated_models(jobs, projects)
575575
_increment_release_associated_counts_many(jobs, projects)
576-
_get_or_create_group_release_many(jobs, projects)
576+
_get_or_create_group_release_many(jobs)
577577
_tsdb_record_all_metrics(jobs)
578578

579579
UserReport.objects.filter(project_id=project.id, event_id=job["event"].event_id).update(
@@ -946,7 +946,7 @@ def _get_or_create_environment_many(jobs: Sequence[Job], projects: ProjectsMappi
946946

947947

948948
@sentry_sdk.tracing.trace
949-
def _get_or_create_group_environment_many(jobs: Sequence[Job], projects: ProjectsMapping) -> None:
949+
def _get_or_create_group_environment_many(jobs: Sequence[Job]) -> None:
950950
for job in jobs:
951951
_get_or_create_group_environment(job["environment"], job["release"], job["groups"])
952952

@@ -1029,7 +1029,7 @@ def _increment_release_associated_counts(
10291029
)
10301030

10311031

1032-
def _get_or_create_group_release_many(jobs: Sequence[Job], projects: ProjectsMapping) -> None:
1032+
def _get_or_create_group_release_many(jobs: Sequence[Job]) -> None:
10331033
for job in jobs:
10341034
_get_or_create_group_release(
10351035
job["environment"], job["release"], job["event"], job["groups"]
@@ -1443,19 +1443,35 @@ def _save_aggregate(
14431443
metrics.timer("event_manager.create_group_transaction") as metric_tags,
14441444
transaction.atomic(router.db_for_write(GroupHash)),
14451445
):
1446+
# These values will get overridden with whatever happens inside the lock if we do manage
1447+
# to acquire it, so it should only end up with `wait-for-lock` if we don't
14461448
span.set_tag("outcome", "wait_for_lock")
14471449
metric_tags["outcome"] = "wait_for_lock"
14481450

14491451
all_grouphash_ids = [h.id for h in flat_grouphashes]
14501452
if root_hierarchical_grouphash is not None:
14511453
all_grouphash_ids.append(root_hierarchical_grouphash.id)
14521454

1455+
# If we're in this branch, we checked our grouphashes and didn't find one with a group
1456+
# attached. We thus want to create a new group, but we need to guard against another
1457+
# event with the same hash coming in before we're done here and also thinking it needs
1458+
# to create a new group. To prevent this, we're using double-checked locking
1459+
# (https://en.wikipedia.org/wiki/Double-checked_locking).
1460+
1461+
# First, try to lock the relevant rows in the `GroupHash` table. If another (identically
1462+
# hashed) event is also in the process of creating a group and has grabbed the lock
1463+
# before us, we'll block here until it's done. If not, we've now got the lock and other
1464+
# identically-hashed events will have to wait for us.
14531465
all_grouphashes = list(
14541466
GroupHash.objects.filter(id__in=all_grouphash_ids).select_for_update()
14551467
)
14561468

14571469
flat_grouphashes = [gh for gh in all_grouphashes if gh.hash in hashes.hashes]
14581470

1471+
# Now check again to see if any of our grouphashes have a group. In the first race
1472+
# condition scenario above, we'll have been blocked long enough for the other event to
1473+
# have created the group and updated our grouphashes with a group id, which means this
1474+
# time, we'll find something.
14591475
existing_grouphash, root_hierarchical_hash = find_existing_grouphash(
14601476
project, flat_grouphashes, hashes.hierarchical_hashes
14611477
)
@@ -1467,6 +1483,8 @@ def _save_aggregate(
14671483
else:
14681484
root_hierarchical_grouphash = None
14691485

1486+
# If we still haven't found a matching grouphash, we're now safe to go ahead and create
1487+
# the group.
14701488
if existing_grouphash is None:
14711489
group = _create_group(project, event, **group_creation_kwargs)
14721490

@@ -1606,7 +1624,7 @@ def _save_aggregate_new(
16061624
group_processing_kwargs = _get_group_processing_kwargs(job)
16071625

16081626
# Try looking for an existing group using the current grouping config
1609-
primary = create_and_seek_grouphashes(job, run_primary_grouping, metric_tags)
1627+
primary = get_hashes_and_grouphashes(job, run_primary_grouping, metric_tags)
16101628

16111629
# If we've found one, great. No need to do any more calculations
16121630
if primary.existing_grouphash:
@@ -1616,7 +1634,7 @@ def _save_aggregate_new(
16161634
result = "found_primary"
16171635
# If we haven't, try again using the secondary config
16181636
else:
1619-
secondary = create_and_seek_grouphashes(job, maybe_run_secondary_grouping, metric_tags)
1637+
secondary = get_hashes_and_grouphashes(job, maybe_run_secondary_grouping, metric_tags)
16201638
all_grouphashes = primary.grouphashes + secondary.grouphashes
16211639

16221640
# Now we know for sure whether or not a group already exists, so handle both cases
@@ -1659,7 +1677,7 @@ def _save_aggregate_new(
16591677
return group_info
16601678

16611679

1662-
def create_and_seek_grouphashes(
1680+
def get_hashes_and_grouphashes(
16631681
job: Job,
16641682
hash_calculation_function: Callable[
16651683
[Project, Job, MutableTags],
@@ -1768,6 +1786,8 @@ def create_group_with_grouphashes(
17681786
metrics.timer("event_manager.create_group_transaction") as metrics_timer_tags,
17691787
transaction.atomic(router.db_for_write(GroupHash)),
17701788
):
1789+
# These values will get overridden with whatever happens inside the lock if we do manage to
1790+
# acquire it, so it should only end up with `wait-for-lock` if we don't
17711791
span.set_tag("outcome", "wait_for_lock")
17721792
metrics_timer_tags["outcome"] = "wait_for_lock"
17731793

src/sentry/grouping/api.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -199,13 +199,13 @@ def get_projects_default_fingerprinting_bases(
199199
return bases
200200

201201

202-
def get_default_grouping_config_dict(id=None) -> GroupingConfig:
202+
def get_default_grouping_config_dict(config_id=None) -> GroupingConfig:
203203
"""Returns the default grouping config."""
204-
if id is None:
204+
if config_id is None:
205205
from sentry.projectoptions.defaults import DEFAULT_GROUPING_CONFIG
206206

207-
id = DEFAULT_GROUPING_CONFIG
208-
return {"id": id, "enhancements": get_default_enhancements(id)}
207+
config_id = DEFAULT_GROUPING_CONFIG
208+
return {"id": config_id, "enhancements": get_default_enhancements(config_id)}
209209

210210

211211
def load_grouping_config(config_dict=None) -> StrategyConfiguration:

src/sentry/grouping/ingest/config.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import logging
44
import time
55
from collections.abc import MutableMapping
6-
from typing import TYPE_CHECKING, Any
6+
from typing import Any
77

88
from django.conf import settings
99
from django.core.cache import cache
@@ -13,9 +13,6 @@
1313
from sentry.models.project import Project
1414
from sentry.projectoptions.defaults import BETA_GROUPING_CONFIG, DEFAULT_GROUPING_CONFIG
1515

16-
if TYPE_CHECKING:
17-
pass
18-
1916
logger = logging.getLogger("sentry.events.grouping")
2017

2118
Job = MutableMapping[str, Any]

src/sentry/grouping/ingest/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
Job = MutableMapping[str, Any]
2121

2222

23+
# TODO This can go away once hierarchical grouping is gone
2324
def extract_hashes(calculated_hashes: CalculatedHashes | None) -> list[str]:
2425
return [] if not calculated_hashes else list(calculated_hashes.hashes)
2526

src/sentry/grouping/strategies/base.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ def get_grouping_component_variants(
217217
prevent_contribution = None
218218

219219
for variant, component in variants.items():
220-
is_mandatory = variant[:1] == "!"
220+
is_mandatory = variant.startswith("!")
221221
variant = variant.lstrip("!")
222222

223223
if is_mandatory:
@@ -252,8 +252,8 @@ def get_grouping_component_variants(
252252
),
253253
)
254254
else:
255-
hash = component.get_hash()
256-
duplicate_of = mandatory_contributing_hashes.get(hash)
255+
hash_value = component.get_hash()
256+
duplicate_of = mandatory_contributing_hashes.get(hash_value)
257257
if duplicate_of is not None:
258258
component.update(
259259
contributes=False,

tests/sentry/event_manager/grouping/test_assign_to_group.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ def get_results_from_saving_event(
218218

219219
if existing_group_id:
220220
event_assigned_to_given_existing_group = (
221-
new_event.group_id == existing_group_id if existing_group_id else None
221+
(new_event.group_id == existing_group_id) if existing_group_id else None
222222
)
223223

224224
if secondary_hash_calculated:

tests/sentry/event_manager/test_event_manager_grouping.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@
1919

2020

2121
def get_relevant_metrics_calls(mock_fn: MagicMock, key: str) -> list[mock._Call]:
22+
"""
23+
Given a mock metrics function, grab only the calls which record the metric with the given key.
24+
"""
2225
return [call for call in mock_fn.call_args_list if call.args[0] == key]
2326

2427

0 commit comments

Comments
 (0)