Skip to content

Commit e5ef425

Browse files
kconsandrewshie-sentry
authored andcommitted
ref(processing): Avoid repeated calls to features.has in tight loops (#91691)
Use a frozen dataclass instance for the `features.has` calls we make repeatedly with the same subjects. There's a minor efficiency gain, but the real benefit is that `features.has` generates multiple spans, and there's a hard limit of 1000 spans per transaction in the SDK (https://github.com/getsentry/sentry-python/blob/master/sentry_sdk/scope.py#L1092-L1093) so avoiding 500+ redundant `features.has` spans can help us get much more useful trace data.
1 parent 1e45922 commit e5ef425

File tree

2 files changed

+80
-29
lines changed

2 files changed

+80
-29
lines changed

src/sentry/rules/processing/delayed_processing.py

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import uuid
44
from collections import defaultdict
55
from collections.abc import Sequence
6+
from dataclasses import dataclass
67
from datetime import datetime, timedelta, timezone
78
from typing import Any, DefaultDict, NamedTuple
89

@@ -137,6 +138,29 @@ def get_slow_conditions(rule: Rule) -> list[EventFrequencyConditionData]:
137138
return slow_conditions # type: ignore[return-value]
138139

139140

141+
@dataclass(frozen=True)
142+
class LogConfig:
143+
"""
144+
LogConfig efficiently caches the results of features.has calls; these are project/org
145+
based, should be stable within our task, and caching them helps avoid generating
146+
excessive spans and saves a bit of time.
147+
"""
148+
149+
# Cached value of features.has("projects:num-events-issue-debugging", project)
150+
num_events_issue_debugging: bool
151+
# Cached value of features.has("organizations:workflow-engine-process-workflows", organization)
152+
workflow_engine_process_workflows: bool
153+
154+
@classmethod
155+
def create(cls, project: Project) -> "LogConfig":
156+
return cls(
157+
num_events_issue_debugging=features.has("projects:num-events-issue-debugging", project),
158+
workflow_engine_process_workflows=features.has(
159+
"organizations:workflow-engine-process-workflows", project.organization
160+
),
161+
)
162+
163+
140164
def generate_unique_queries(
141165
condition_data: EventFrequencyConditionData, environment_id: int
142166
) -> list[UniqueConditionQuery]:
@@ -229,6 +253,7 @@ def parse_rulegroup_to_event_data(
229253

230254

231255
def build_group_to_groupevent(
256+
log_config: LogConfig,
232257
parsed_rulegroup_to_event_data: dict[tuple[int, int], dict[str, str]],
233258
bulk_event_id_to_events: dict[str, Event],
234259
bulk_occurrence_id_to_occurrence: dict[str, IssueOccurrence],
@@ -238,7 +263,7 @@ def build_group_to_groupevent(
238263

239264
project = fetch_project(project_id)
240265
if project:
241-
if features.has("projects:num-events-issue-debugging", project):
266+
if log_config.num_events_issue_debugging:
242267
logger.info(
243268
"delayed_processing.build_group_to_groupevent_input",
244269
extra={
@@ -268,7 +293,7 @@ def build_group_to_groupevent(
268293
group = group_id_to_group.get(int(rule_group[1]))
269294

270295
if not group or not event:
271-
if features.has("projects:num-events-issue-debugging", project):
296+
if log_config.num_events_issue_debugging:
272297
logger.info(
273298
"delayed_processing.missing_event_or_group",
274299
extra={
@@ -290,6 +315,7 @@ def build_group_to_groupevent(
290315

291316

292317
def get_group_to_groupevent(
318+
log_config: LogConfig,
293319
parsed_rulegroup_to_event_data: dict[tuple[int, int], dict[str, str]],
294320
project_id: int,
295321
group_ids: set[int],
@@ -330,6 +356,7 @@ def get_group_to_groupevent(
330356
}
331357

332358
return build_group_to_groupevent(
359+
log_config,
333360
relevant_rulegroup_to_event_data,
334361
bulk_event_id_to_events,
335362
bulk_occurrence_id_to_occurrence,
@@ -457,6 +484,7 @@ def get_rules_to_fire(
457484

458485

459486
def fire_rules(
487+
log_config: LogConfig,
460488
rules_to_fire: DefaultDict[Rule, set[int]],
461489
parsed_rulegroup_to_event_data: dict[tuple[int, int], dict[str, str]],
462490
alert_rules: list[Rule],
@@ -475,11 +503,12 @@ def fire_rules(
475503
frequency = rule.data.get("frequency") or Rule.DEFAULT_FREQUENCY
476504
freq_offset = now - timedelta(minutes=frequency)
477505
group_to_groupevent = get_group_to_groupevent(
478-
parsed_rulegroup_to_event_data, project.id, group_ids
506+
log_config, parsed_rulegroup_to_event_data, project.id, group_ids
479507
)
480-
if features.has(
481-
"organizations:workflow-engine-process-workflows", project.organization
482-
) or features.has("projects:num-events-issue-debugging", project):
508+
if (
509+
log_config.num_events_issue_debugging
510+
or log_config.workflow_engine_process_workflows
511+
):
483512
serialized_groups = {
484513
group.id: group_event.event_id
485514
for group, group_event in group_to_groupevent.items()
@@ -530,10 +559,10 @@ def fire_rules(
530559
rule, group, groupevent.event_id, notification_uuid
531560
)
532561

533-
if features.has(
534-
"organizations:workflow-engine-process-workflows-logs",
535-
project.organization,
536-
) or features.has("projects:num-events-issue-debugging", project):
562+
if (
563+
log_config.workflow_engine_process_workflows
564+
or log_config.num_events_issue_debugging
565+
):
537566
logger.info(
538567
"post_process.delayed_processing.triggered_rule",
539568
extra={
@@ -558,7 +587,7 @@ def fire_rules(
558587
if results is None:
559588
not_sent += 1
560589

561-
if features.has("projects:num-events-issue-debugging", project):
590+
if log_config.num_events_issue_debugging:
562591
logger.info(
563592
"delayed_processing.rules_fired",
564593
extra={
@@ -571,7 +600,10 @@ def fire_rules(
571600

572601

573602
def cleanup_redis_buffer(
574-
project: Project, rules_to_groups: DefaultDict[int, set[int]], batch_key: str | None
603+
log_config: LogConfig,
604+
project: Project,
605+
rules_to_groups: DefaultDict[int, set[int]],
606+
batch_key: str | None,
575607
) -> None:
576608
hashes_to_delete = [
577609
f"{rule}:{group}" for rule, groups in rules_to_groups.items() for group in groups
@@ -581,7 +613,7 @@ def cleanup_redis_buffer(
581613
filters["batch_key"] = batch_key
582614

583615
buffer.backend.delete_hash(model=Project, filters=filters, fields=hashes_to_delete)
584-
if features.has("projects:num-events-issue-debugging", project):
616+
if log_config.num_events_issue_debugging:
585617
logger.info(
586618
"delayed_processing.cleanup_redis_buffer",
587619
extra={"hashes_to_delete": hashes_to_delete, "project_id": project.id},
@@ -616,6 +648,8 @@ def apply_delayed(project_id: int, batch_key: str | None = None, *args: Any, **k
616648
if not project:
617649
return
618650

651+
log_config = LogConfig.create(project)
652+
619653
rulegroup_to_event_data = fetch_rulegroup_to_event_data(project_id, batch_key)
620654
rules_to_groups = get_rules_to_groups(rulegroup_to_event_data)
621655
alert_rules = fetch_alert_rules(list(rules_to_groups.keys()))
@@ -638,10 +672,7 @@ def apply_delayed(project_id: int, batch_key: str | None = None, *args: Any, **k
638672
):
639673
condition_group_results = get_condition_group_results(condition_groups, project)
640674

641-
has_workflow_engine = features.has(
642-
"organizations:workflow-engine-process-workflows", project.organization
643-
)
644-
if has_workflow_engine or features.has("projects:num-events-issue-debugging", project):
675+
if log_config.workflow_engine_process_workflows or log_config.num_events_issue_debugging:
645676
serialized_results = (
646677
{str(query): count_dict for query, count_dict in condition_group_results.items()}
647678
if condition_group_results
@@ -668,7 +699,10 @@ def apply_delayed(project_id: int, batch_key: str | None = None, *args: Any, **k
668699
rules_to_fire = get_rules_to_fire(
669700
condition_group_results, rules_to_slow_conditions, rules_to_groups, project.id
670701
)
671-
if has_workflow_engine or features.has("projects:num-events-issue-debugging", project):
702+
if (
703+
log_config.workflow_engine_process_workflows
704+
or log_config.num_events_issue_debugging
705+
):
672706
logger.info(
673707
"delayed_processing.rules_to_fire",
674708
extra={
@@ -694,13 +728,15 @@ def apply_delayed(project_id: int, batch_key: str | None = None, *args: Any, **k
694728
):
695729
parsed_rulegroup_to_event_data = parse_rulegroup_to_event_data(rulegroup_to_event_data)
696730
with metrics.timer("delayed_processing.fire_rules.duration"):
697-
fire_rules(rules_to_fire, parsed_rulegroup_to_event_data, alert_rules, project)
731+
fire_rules(
732+
log_config, rules_to_fire, parsed_rulegroup_to_event_data, alert_rules, project
733+
)
698734

699735
with sentry_sdk.start_span(
700736
op="delayed_processing.cleanup_redis_buffer",
701737
name="Clean up redis buffer in delayed processing",
702738
):
703-
cleanup_redis_buffer(project, rules_to_groups, batch_key)
739+
cleanup_redis_buffer(log_config, project, rules_to_groups, batch_key)
704740

705741

706742
@delayed_processing_registry.register("delayed_processing") # default delayed processing

tests/sentry/rules/processing/test_delayed_processing.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from sentry.rules.processing.buffer_processing import process_in_batches
2222
from sentry.rules.processing.delayed_processing import (
2323
DataAndGroups,
24+
LogConfig,
2425
UniqueConditionQuery,
2526
apply_delayed,
2627
bulk_fetch_events,
@@ -306,6 +307,9 @@ class GetGroupToGroupEventTest(CreateEventTestCase):
306307
def setUp(self):
307308
super().setUp()
308309
self.project = self.create_project()
310+
self.log_config = LogConfig(
311+
workflow_engine_process_workflows=True, num_events_issue_debugging=True
312+
)
309313
self.rule = self.create_alert_rule(self.organization, [self.project])
310314

311315
# Create some groups
@@ -337,13 +341,17 @@ def setUp(self):
337341

338342
def test_basic(self):
339343
self.parsed_data.pop((self.rule.id, self.group2.id))
340-
result = get_group_to_groupevent(self.parsed_data, self.project.id, {self.group1.id})
344+
result = get_group_to_groupevent(
345+
self.log_config, self.parsed_data, self.project.id, {self.group1.id}
346+
)
341347

342348
assert len(result) == 1
343349
assert result[self.group1] == self.event1
344350

345351
def test_many(self):
346-
result = get_group_to_groupevent(self.parsed_data, self.project.id, self.group_ids)
352+
result = get_group_to_groupevent(
353+
self.log_config, self.parsed_data, self.project.id, self.group_ids
354+
)
347355

348356
assert len(result) == 2
349357
assert result[self.group1] == self.event1
@@ -361,21 +369,23 @@ def test_missing_event(self):
361369
},
362370
}
363371

364-
result = get_group_to_groupevent(parsed_data, self.project.id, self.group_ids)
372+
result = get_group_to_groupevent(
373+
self.log_config, parsed_data, self.project.id, self.group_ids
374+
)
365375

366376
assert len(result) == 1
367377
assert result[self.group1] == self.event1
368378

369379
def test_invalid_project_id(self):
370-
result = get_group_to_groupevent(self.parsed_data, 0, self.group_ids)
380+
result = get_group_to_groupevent(self.log_config, self.parsed_data, 0, self.group_ids)
371381
assert len(result) == 0
372382

373383
def test_empty_group_ids(self):
374-
result = get_group_to_groupevent({}, self.project.id, set())
384+
result = get_group_to_groupevent(self.log_config, self.parsed_data, self.project.id, set())
375385
assert len(result) == 0
376386

377387
def test_invalid_group_ids(self):
378-
result = get_group_to_groupevent(self.parsed_data, self.project.id, {0})
388+
result = get_group_to_groupevent(self.log_config, self.parsed_data, self.project.id, {0})
379389
assert len(result) == 0
380390

381391
def test_filtered_group_ids(self):
@@ -394,7 +404,9 @@ def mock_bulk_fetch_events(event_ids, project_id):
394404
side_effect=mock_bulk_fetch_events,
395405
):
396406
# Call get_group_to_groupevent with only group1
397-
result = get_group_to_groupevent(self.parsed_data, self.project.id, {self.group1.id})
407+
result = get_group_to_groupevent(
408+
self.log_config, self.parsed_data, self.project.id, {self.group1.id}
409+
)
398410

399411
# Verify only event1 was requested
400412
assert requested_event_ids == {self.event1.event_id}
@@ -1381,13 +1393,16 @@ def setUp(self):
13811393
self.project = self.create_project()
13821394
self.group = self.create_group(self.project)
13831395
self.rule = self.create_alert_rule()
1396+
self.log_config = LogConfig(
1397+
workflow_engine_process_workflows=True, num_events_issue_debugging=True
1398+
)
13841399

13851400
def test_cleanup_redis(self):
13861401
self.push_to_hash(self.project.id, self.rule.id, self.group.id)
13871402
rules_to_groups: defaultdict[int, set[int]] = defaultdict(set)
13881403
rules_to_groups[self.rule.id].add(self.group.id)
13891404

1390-
cleanup_redis_buffer(self.project, rules_to_groups, None)
1405+
cleanup_redis_buffer(self.log_config, self.project, rules_to_groups, None)
13911406
rule_group_data = buffer.backend.get_hash(Project, {"project_id": self.project.id})
13921407
assert rule_group_data == {}
13931408

@@ -1414,7 +1429,7 @@ def test_batched_cleanup(self, mock_apply_delayed):
14141429
rule_group_data = buffer.backend.get_hash(Project, {"project_id": self.project.id})
14151430
assert rule_group_data == {}
14161431

1417-
cleanup_redis_buffer(self.project, rules_to_groups, batch_one_key)
1432+
cleanup_redis_buffer(self.log_config, self.project, rules_to_groups, batch_one_key)
14181433

14191434
# Verify the batch we "executed" is removed
14201435
rule_group_data = buffer.backend.get_hash(

0 commit comments

Comments
 (0)