diff --git a/src/sentry/api/endpoints/project_rules_configuration.py b/src/sentry/api/endpoints/project_rules_configuration.py index 3519f238fc28ac..3de7e95b5608d1 100644 --- a/src/sentry/api/endpoints/project_rules_configuration.py +++ b/src/sentry/api/endpoints/project_rules_configuration.py @@ -39,7 +39,7 @@ def get(self, request: Request, project) -> Response: # TODO: conditions need to be based on actions for rule_type, rule_cls in rules: - node = rule_cls(project) + node = rule_cls(project=project) # skip over conditions if they are not in the migrated set for a project with alert-filters if project_has_filters and node.id in MIGRATED_CONDITIONS: continue diff --git a/src/sentry/buffer/redis.py b/src/sentry/buffer/redis.py index ad6bab81e73de9..e042572a8ecd01 100644 --- a/src/sentry/buffer/redis.py +++ b/src/sentry/buffer/redis.py @@ -289,7 +289,7 @@ def push_to_hash( def get_hash( self, model: type[models.Model], field: dict[str, models.Model | str | int] - ) -> dict[str, str]: + ) -> dict[str, int]: key = self._make_key(model, field) return self._execute_redis_operation(key, RedisOperation.HASH_GET_ALL) diff --git a/src/sentry/rules/base.py b/src/sentry/rules/base.py index 18fae675c0497c..aae7d45845c362 100644 --- a/src/sentry/rules/base.py +++ b/src/sentry/rules/base.py @@ -3,19 +3,21 @@ import abc import logging from collections import namedtuple -from collections.abc import Callable, Sequence -from typing import Any, ClassVar +from collections.abc import Callable, Mapping, Sequence +from typing import TYPE_CHECKING, Any, ClassVar from django import forms from sentry.eventstore.models import GroupEvent from sentry.models.project import Project -from sentry.models.rule import Rule from sentry.models.rulefirehistory import RuleFireHistory from sentry.snuba.dataset import Dataset from sentry.types.condition_activity import ConditionActivity from sentry.types.rules import RuleFuture +if TYPE_CHECKING: + from sentry.models.rule import Rule + """ Rules apply either before an event gets stored, or immediately after. @@ -60,7 +62,7 @@ class RuleBase(abc.ABC): def __init__( self, project: Project, - data: dict[str, Any] | None = None, + data: Mapping[str, Any] | None = None, rule: Rule | None = None, rule_fire_history: RuleFireHistory | None = None, ) -> None: @@ -81,10 +83,7 @@ def get_option(self, key: str, default: str | None = None) -> str: return self.data.get(key, default) def get_form_instance(self) -> forms.Form: - data: dict[str, Any] | None = None - if self.had_data: - data = self.data - return self.form_cls(data) + return self.form_cls(self.data if self.had_data else None) def render_label(self) -> str: return self.label.format(**self.data) diff --git a/src/sentry/rules/conditions/base.py b/src/sentry/rules/conditions/base.py index 468147d864980d..09d371e6705d01 100644 --- a/src/sentry/rules/conditions/base.py +++ b/src/sentry/rules/conditions/base.py @@ -1,12 +1,19 @@ import abc from collections.abc import Sequence from datetime import datetime +from typing import TypedDict from sentry.eventstore.models import GroupEvent from sentry.rules.base import EventState, RuleBase from sentry.types.condition_activity import ConditionActivity +class GenericCondition(TypedDict): + # the ID in the rules registry that maps to a condition class + # e.g. "sentry.rules.conditions.every_event.EveryEventCondition" + id: str + + class EventCondition(RuleBase, abc.ABC): rule_type = "condition/event" diff --git a/src/sentry/rules/conditions/event_frequency.py b/src/sentry/rules/conditions/event_frequency.py index a4c30f2136bea1..b66f0c454ae3f2 100644 --- a/src/sentry/rules/conditions/event_frequency.py +++ b/src/sentry/rules/conditions/event_frequency.py @@ -5,12 +5,13 @@ import logging import re from collections import defaultdict -from collections.abc import Callable, Mapping, Sequence +from collections.abc import Callable, Mapping from datetime import datetime, timedelta -from typing import Any +from typing import Any, Literal, NotRequired from django import forms from django.core.cache import cache +from django.db.models.enums import TextChoices from django.utils import timezone from sentry import release_health, tsdb @@ -20,7 +21,7 @@ from sentry.models.group import Group from sentry.receivers.rules import DEFAULT_RULE_LABEL, DEFAULT_RULE_LABEL_NEW from sentry.rules import EventState -from sentry.rules.conditions.base import EventCondition +from sentry.rules.conditions.base import EventCondition, GenericCondition from sentry.tsdb.base import TSDBModel from sentry.types.condition_activity import ( FREQUENCY_CONDITION_BUCKET_SIZE, @@ -31,7 +32,7 @@ from sentry.utils.iterators import chunked from sentry.utils.snuba import options_override -standard_intervals = { +STANDARD_INTERVALS: dict[str, tuple[str, timedelta]] = { "1m": ("one minute", timedelta(minutes=1)), "5m": ("5 minutes", timedelta(minutes=5)), "15m": ("15 minutes", timedelta(minutes=15)), @@ -40,7 +41,7 @@ "1w": ("one week", timedelta(days=7)), "30d": ("30 days", timedelta(days=30)), } -comparison_intervals = { +COMPARISON_INTERVALS: dict[str, tuple[str, timedelta]] = { "5m": ("5 minutes", timedelta(minutes=5)), "15m": ("15 minutes", timedelta(minutes=15)), "1h": ("one hour", timedelta(hours=1)), @@ -48,17 +49,37 @@ "1w": ("one week", timedelta(days=7)), "30d": ("30 days", timedelta(days=30)), } -COMPARISON_TYPE_COUNT = "count" -COMPARISON_TYPE_PERCENT = "percent" -comparison_types = { - COMPARISON_TYPE_COUNT: COMPARISON_TYPE_COUNT, - COMPARISON_TYPE_PERCENT: COMPARISON_TYPE_PERCENT, -} + SNUBA_LIMIT = 10000 +class ComparisonType(TextChoices): + COUNT = "count" + PERCENT = "percent" + + +class EventFrequencyConditionData(GenericCondition): + """ + The base typed dict for all condition data representing EventFrequency issue + alert rule conditions + """ + + # Either the count or percentage. + value: int + # The interval to compare the value against such as 5m, 1h, 3w, etc. + # e.g. # of issues is more than {value} in {interval}. + interval: str + # NOTE: Some of tne earliest COUNT conditions were created without the + # comparisonType field, although modern rules will always have it. + comparisonType: NotRequired[Literal[ComparisonType.COUNT, ComparisonType.PERCENT]] + # The previous interval to compare the curr interval against. This is only + # present in PERCENT conditions. + # e.g. # of issues is 50% higher in {interval} compared to {comparisonInterval} + comparisonInterval: NotRequired[str] + + class EventFrequencyForm(forms.Form): - intervals = standard_intervals + intervals = STANDARD_INTERVALS interval = forms.ChoiceField( choices=[ (key, label) @@ -69,13 +90,13 @@ class EventFrequencyForm(forms.Form): ) value = forms.IntegerField(widget=forms.TextInput()) comparisonType = forms.ChoiceField( - choices=tuple(comparison_types.items()), + choices=ComparisonType, required=False, ) comparisonInterval = forms.ChoiceField( choices=[ (key, label) - for key, (label, _) in sorted(comparison_intervals.items(), key=lambda item: item[1][1]) + for key, (label, _) in sorted(COMPARISON_INTERVALS.items(), key=lambda item: item[1][1]) ], required=False, ) @@ -88,8 +109,8 @@ def clean(self) -> dict[str, Any] | None: # Don't store an empty string here if the value isn't passed if cleaned_data.get("comparisonInterval") == "": del cleaned_data["comparisonInterval"] - cleaned_data["comparisonType"] = cleaned_data.get("comparisonType") or COMPARISON_TYPE_COUNT - if cleaned_data["comparisonType"] == COMPARISON_TYPE_PERCENT and not cleaned_data.get( + cleaned_data["comparisonType"] = cleaned_data.get("comparisonType") or ComparisonType.COUNT + if cleaned_data["comparisonType"] == ComparisonType.PERCENT and not cleaned_data.get( "comparisonInterval" ): msg = forms.ValidationError("comparisonInterval is required when comparing by percent") @@ -99,10 +120,15 @@ def clean(self) -> dict[str, Any] | None: class BaseEventFrequencyCondition(EventCondition, abc.ABC): - intervals = standard_intervals + intervals = STANDARD_INTERVALS form_cls = EventFrequencyForm - def __init__(self, *args: Any, **kwargs: Any) -> None: + def __init__( + self, + data: EventFrequencyConditionData | None = None, + *args: Any, + **kwargs: Any, + ) -> None: self.tsdb = kwargs.pop("tsdb", tsdb) self.form_fields = { "value": {"type": "number", "placeholder": 100}, @@ -118,7 +144,9 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: }, } - super().__init__(*args, **kwargs) + # MyPy refuses to make TypedDict compatible with MutableMapping + # https://github.com/python/mypy/issues/4976 + super().__init__(data=data, *args, **kwargs) # type:ignore[misc] def _get_options(self) -> tuple[str | None, float | None]: interval, value = None, None @@ -150,19 +178,19 @@ def passes_activity_frequency( if not (interval and value is not None): return False interval_delta = self.intervals[interval][1] - comparison_type = self.get_option("comparisonType", COMPARISON_TYPE_COUNT) + comparison_type = self.get_option("comparisonType", ComparisonType.COUNT) # extrapolate if interval less than bucket size # if comparing percent increase, both intervals will be increased, so do not extrapolate value if interval_delta < FREQUENCY_CONDITION_BUCKET_SIZE: - if comparison_type != COMPARISON_TYPE_PERCENT: + if comparison_type != ComparisonType.PERCENT: value *= int(FREQUENCY_CONDITION_BUCKET_SIZE / interval_delta) interval_delta = FREQUENCY_CONDITION_BUCKET_SIZE result = bucket_count(activity.timestamp - interval_delta, activity.timestamp, buckets) - if comparison_type == COMPARISON_TYPE_PERCENT: - comparison_interval = comparison_intervals[self.get_option("comparisonInterval")][1] + if comparison_type == ComparisonType.PERCENT: + comparison_interval = COMPARISON_INTERVALS[self.get_option("comparisonInterval")][1] comparison_end = activity.timestamp - comparison_interval comparison_result = bucket_count( @@ -175,7 +203,7 @@ def passes_activity_frequency( def get_preview_aggregate(self) -> tuple[str, str]: raise NotImplementedError - def query(self, event: GroupEvent, start: datetime, end: datetime, environment_id: str) -> int: + def query(self, event: GroupEvent, start: datetime, end: datetime, environment_id: int) -> int: """ Queries Snuba for a unique condition for a single group. """ @@ -190,7 +218,7 @@ def query(self, event: GroupEvent, start: datetime, end: datetime, environment_i return query_result def query_hook( - self, event: GroupEvent, start: datetime, end: datetime, environment_id: str + self, event: GroupEvent, start: datetime, end: datetime, environment_id: int ) -> int: """ Abstract method that specifies how to query Snuba for a single group @@ -199,7 +227,7 @@ def query_hook( raise NotImplementedError def batch_query( - self, group_ids: Sequence[int], start: datetime, end: datetime, environment_id: str + self, group_ids: set[int], start: datetime, end: datetime, environment_id: int ) -> dict[int, int]: """ Queries Snuba for a unique condition for multiple groups. @@ -215,7 +243,7 @@ def batch_query( return batch_query_result def batch_query_hook( - self, group_ids: Sequence[int], start: datetime, end: datetime, environment_id: str + self, group_ids: set[int], start: datetime, end: datetime, environment_id: int ) -> dict[int, int]: """ Abstract method that specifies how to query Snuba for multiple groups @@ -223,7 +251,7 @@ def batch_query_hook( """ raise NotImplementedError - def get_rate(self, event: GroupEvent, interval: str, environment_id: str) -> int: + def get_rate(self, event: GroupEvent, interval: str, environment_id: int) -> int: _, duration = self.intervals[interval] end = timezone.now() # For conditions with interval >= 1 hour we don't need to worry about read your writes @@ -233,9 +261,9 @@ def get_rate(self, event: GroupEvent, interval: str, environment_id: str) -> int option_override_cm = options_override({"consistent": False}) with option_override_cm: result: int = self.query(event, end - duration, end, environment_id=environment_id) - comparison_type = self.get_option("comparisonType", COMPARISON_TYPE_COUNT) - if comparison_type == COMPARISON_TYPE_PERCENT: - comparison_interval = comparison_intervals[self.get_option("comparisonInterval")][1] + comparison_type = self.get_option("comparisonType", ComparisonType.COUNT) + if comparison_type == ComparisonType.PERCENT: + comparison_interval = COMPARISON_INTERVALS[self.get_option("comparisonInterval")][1] comparison_end = end - comparison_interval # TODO: Figure out if there's a way we can do this less frequently. All queries are # automatically cached for 10s. We could consider trying to cache this and the main @@ -255,7 +283,7 @@ def get_snuba_query_result( model: TSDBModel, start: datetime, end: datetime, - environment_id: str, + environment_id: int, referrer_suffix: str, ) -> Mapping[int, int]: result: Mapping[int, int] = tsdb_function( @@ -278,7 +306,7 @@ def get_chunked_result( groups: list[Group], start: datetime, end: datetime, - environment_id: str, + environment_id: int, referrer_suffix: str, ) -> dict[int, int]: batch_totals: dict[int, int] = defaultdict(int) @@ -319,7 +347,7 @@ class EventFrequencyCondition(BaseEventFrequencyCondition): label = "The issue is seen more than {value} times in {interval}" def query_hook( - self, event: GroupEvent, start: datetime, end: datetime, environment_id: str + self, event: GroupEvent, start: datetime, end: datetime, environment_id: int ) -> int: sums: Mapping[int, int] = self.get_snuba_query_result( tsdb_function=self.tsdb.get_sums, @@ -334,7 +362,7 @@ def query_hook( return sums[event.group_id] def batch_query_hook( - self, group_ids: Sequence[int], start: datetime, end: datetime, environment_id: str + self, group_ids: set[int], start: datetime, end: datetime, environment_id: int ) -> dict[int, int]: batch_sums: dict[int, int] = defaultdict(int) groups = Group.objects.filter(id__in=group_ids) @@ -376,7 +404,7 @@ class EventUniqueUserFrequencyCondition(BaseEventFrequencyCondition): label = "The issue is seen by more than {value} users in {interval}" def query_hook( - self, event: GroupEvent, start: datetime, end: datetime, environment_id: str + self, event: GroupEvent, start: datetime, end: datetime, environment_id: int ) -> int: totals: Mapping[int, int] = self.get_snuba_query_result( tsdb_function=self.tsdb.get_distinct_counts_totals, @@ -391,7 +419,7 @@ def query_hook( return totals[event.group_id] def batch_query_hook( - self, group_ids: Sequence[int], start: datetime, end: datetime, environment_id: str + self, group_ids: set[int], start: datetime, end: datetime, environment_id: int ) -> dict[int, int]: batch_totals: dict[int, int] = defaultdict(int) groups = Group.objects.filter(id__in=group_ids) @@ -428,7 +456,7 @@ def get_preview_aggregate(self) -> tuple[str, str]: return "uniq", "user" -percent_intervals = { +PERCENT_INTERVALS: dict[str, tuple[str, timedelta]] = { "1m": ("1 minute", timedelta(minutes=1)), "5m": ("5 minutes", timedelta(minutes=5)), "10m": ("10 minutes", timedelta(minutes=10)), @@ -436,7 +464,7 @@ def get_preview_aggregate(self) -> tuple[str, str]: "1h": ("1 hour", timedelta(minutes=60)), } -percent_intervals_to_display = { +PERCENT_INTERVALS_TO_DISPLAY: dict[str, tuple[str, timedelta]] = { "5m": ("5 minutes", timedelta(minutes=5)), "10m": ("10 minutes", timedelta(minutes=10)), "30m": ("30 minutes", timedelta(minutes=30)), @@ -446,12 +474,12 @@ def get_preview_aggregate(self) -> tuple[str, str]: class EventFrequencyPercentForm(EventFrequencyForm): - intervals = percent_intervals_to_display + intervals = PERCENT_INTERVALS_TO_DISPLAY interval = forms.ChoiceField( choices=[ (key, label) for key, (label, duration) in sorted( - percent_intervals_to_display.items(), + PERCENT_INTERVALS_TO_DISPLAY.items(), key=lambda key____label__duration: key____label__duration[1][1], ) ] @@ -462,7 +490,7 @@ def clean(self) -> dict[str, Any] | None: cleaned_data = super().clean() if ( cleaned_data - and cleaned_data["comparisonType"] == COMPARISON_TYPE_COUNT + and cleaned_data["comparisonType"] == ComparisonType.COUNT and cleaned_data.get("value", 0) > 100 ): self.add_error( @@ -479,7 +507,7 @@ class EventFrequencyPercentCondition(BaseEventFrequencyCondition): logger = logging.getLogger("sentry.rules.event_frequency") def __init__(self, *args: Any, **kwargs: Any) -> None: - self.intervals = percent_intervals + self.intervals = PERCENT_INTERVALS self.form_cls = EventFrequencyPercentForm super().__init__(*args, **kwargs) @@ -490,14 +518,14 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: "choices": [ (key, label) for key, (label, duration) in sorted( - percent_intervals_to_display.items(), + PERCENT_INTERVALS_TO_DISPLAY.items(), key=lambda key____label__duration: key____label__duration[1][1], ) ], } def query_hook( - self, event: GroupEvent, start: datetime, end: datetime, environment_id: str + self, event: GroupEvent, start: datetime, end: datetime, environment_id: int ) -> int: project_id = event.project_id cache_key = f"r.c.spc:{project_id}-{environment_id}" @@ -516,7 +544,7 @@ def query_hook( if session_count_last_hour >= MIN_SESSIONS_TO_FIRE: interval_in_minutes = ( - percent_intervals[self.get_option("interval")][1].total_seconds() // 60 + PERCENT_INTERVALS[self.get_option("interval")][1].total_seconds() // 60 ) avg_sessions_in_interval = session_count_last_hour / (60 / interval_in_minutes) issue_count = self.get_snuba_query_result( diff --git a/src/sentry/rules/processing/delayed_processing.py b/src/sentry/rules/processing/delayed_processing.py index ca6b374658c7ec..5bf17432952134 100644 --- a/src/sentry/rules/processing/delayed_processing.py +++ b/src/sentry/rules/processing/delayed_processing.py @@ -1,11 +1,25 @@ +import contextlib import logging -from collections.abc import Mapping +from collections import defaultdict +from datetime import datetime, timedelta +from typing import DefaultDict, NamedTuple from sentry.buffer.redis import BufferHookEvent, RedisBuffer, redis_buffer_registry from sentry.models.project import Project +from sentry.models.rule import Rule +from sentry.rules.conditions.base import EventCondition +from sentry.rules.conditions.event_frequency import ( + BaseEventFrequencyCondition, + ComparisonType, + EventFrequencyConditionData, + percent_increase, +) +from sentry.rules.processing.processor import is_condition_slow, split_conditions_and_filters +from sentry.silo.base import SiloMode +from sentry.tasks.base import instrumented_task from sentry.utils import metrics from sentry.utils.query import RangeQuerySetWrapper -from sentry.utils.safe import safe_execute +from sentry.utils.snuba import options_override logger = logging.getLogger("sentry.rules.delayed_processing") @@ -13,22 +27,183 @@ PROJECT_ID_BUFFER_LIST_KEY = "project_id_buffer_list" +def get_slow_conditions(rule: Rule) -> list[EventFrequencyConditionData]: + """ + Returns the slow conditions of a rule model instance. + """ + conditions_and_filters = rule.data.get("conditions", ()) + conditions, _ = split_conditions_and_filters(conditions_and_filters) + slow_conditions = [cond for cond in conditions if is_condition_slow(cond)] + + # MyPy refuses to make TypedDict compatible with MutableMapping + # https://github.com/python/mypy/issues/4976 + return slow_conditions # type: ignore[return-value] + + @redis_buffer_registry.add_handler(BufferHookEvent.FLUSH) def process_delayed_alert_conditions(buffer: RedisBuffer) -> None: with metrics.timer("delayed_processing.process_all_conditions.duration"): project_ids = buffer.get_set(PROJECT_ID_BUFFER_LIST_KEY) for project in RangeQuerySetWrapper(Project.objects.filter(id__in=project_ids)): - rulegroup_event_mapping = buffer.get_hash(model=Project, field={"id": project.id}) with metrics.timer("delayed_processing.process_project.duration"): - safe_execute( - apply_delayed, - project, - rulegroup_event_mapping, - _with_transaction=False, + apply_delayed.delay(project=project, buffer=buffer) + + +@instrumented_task( + name="sentry.delayed_processing.tasks.apply_delayed", + queue="dynamicsampling", + default_retry_delay=5, + max_retries=5, + soft_time_limit=50, + time_limit=60, # 1 minute + silo_mode=SiloMode.REGION, +) +def apply_delayed(project: Project, buffer: RedisBuffer) -> None: + """ """ + + # XXX(schew2381): This is from + # https://github.com/getsentry/sentry/blob/fbfd6800cf067f171840c427df7d5c2864b91fb0/src/sentry/rules/processor.py#L209-L212 + # Do we need to check this before we start the steps (ask dan, I think the + # answer is now b/c we check it before triggering actions)) + + # frequency = rule.data.get("frequency") or Rule.DEFAULT_FREQUENCY + # now = datetime.now() + # freq_offset = now - timedelta(minutes=frequency) + # if status.last_active and status.last_active > freq_offset: + # return + + # STEP 1: Fetch the rulegroup_to_event mapping for the project from redis + + # The mapping looks like: {rule.id}:{group.id} -> {event.id} + # TODO: Updating return type of get_hash + rulegroup_to_event = buffer.get_hash(model=Project, field={"id": project.id}) + + # STEP 2: Map each rule to the groups that must be checked for that rule. + rules_to_groups: DefaultDict[int, set[int]] = defaultdict(set) + for rule_group in rulegroup_to_event.keys(): + rule_id, group_id = rule_group.split(":") + rules_to_groups[int(rule_id)].add(int(group_id)) + + # STEP 3: Fetch the Rule models we need to check + rules = Rule.objects.filter(id__in=list(rules_to_groups.keys())) + + # STEP 4: Create a map of unique conditions to a tuple containing the JSON + # information needed to instantiate that condition class and the group_ids that + # must be checked for that condition. We don't query per rule condition because + # condition of the same class, interval, and environment can share a single scan. + + # Map unique conditions to the group IDs that need to checked for that + # condition. We also store a pointer to that condition's JSON so we can + # instantiate the class later + class UniqueCondition(NamedTuple): + cls_id: str + interval: str + environment_id: int + + class DataAndGroups(NamedTuple): + data: EventFrequencyConditionData + group_ids: set[int] + + condition_groups: dict[UniqueCondition, DataAndGroups] = {} + + for rule in rules: + # We only want to a rule's fast conditions because rules are only added + # to the buffer if we've already checked their fast conditions. + slow_conditions = get_slow_conditions(rule) + for condition_data in slow_conditions: + unique_condition = UniqueCondition( + condition_data["id"], condition_data["interval"], rule.environment_id + ) + + # Add to set the set of group_ids if there are already group_ids for + # that apply to the unique condition + if data_and_groups := condition_groups.get(unique_condition): + data_and_groups.group_ids.update(rules_to_groups[rule.id]) + # Otherwise, create the tuple containing the condition data and the + # set of group_ids that apply to the unique condition + else: + condition_groups[unique_condition] = DataAndGroups( + condition_data, rules_to_groups[rule.id] + ) + + # Step 5: Instantiate each unique condition, and evaluate the relevant + # group_ids that apply for that condition + + # XXX: Probably want to safe execute somewhere in this step before making + # the query + condition_group_results: dict[UniqueCondition, dict[int, int]] = {} + for unique_condition, (condition_data, group_ids) in condition_groups.items(): + condition_cls = rules.get(unique_condition.cls_id) + + if condition_cls is None: + logger.warning("Unregistered condition %r", condition_data["id"]) + return None + + condition_inst: BaseEventFrequencyCondition = condition_cls( + project=project, data=condition_data, rule=rule + ) + if not isinstance(condition_inst, EventCondition): + logger.warning("Unregistered condition %r", condition_data["id"]) + return None + + _, duration = condition_inst.intervals[unique_condition.interval] + end = datetime.now() + # For conditions with interval >= 1 hour we don't need to worry about read your writes + # consistency. Disable it so that we can scale to more nodes. + option_override_cm: contextlib.AbstractContextManager[object] = contextlib.nullcontext() + if duration >= timedelta(hours=1): + option_override_cm = options_override({"consistent": False}) + + with option_override_cm: + results = condition_inst.batch_query( + group_ids, end - duration, end, environment_id=unique_condition.environment_id + ) + + # If the condition is a percent comparison, we need to query the + # previous interval to compare against the current interval + comparison_type = condition_data.get("comparisonType", ComparisonType.COUNT) + if comparison_type == ComparisonType.PERCENT: + comparison_interval = condition_inst.intervals[unique_condition.interval][1] + comparison_end = end - comparison_interval + + comparison_results = condition_inst.batch_query( + group_ids, + comparison_end - duration, + comparison_end, + environment_id=unique_condition.environment_id, ) + results = { + group_id: percent_increase(results[group_id], comparison_results[group_id]) + for group_id in group_ids + } + + condition_group_results[unique_condition] = results + + # Step 6: For each rule and group applying to that rule, check if the group + # meets the conditions of the rule (basically doing BaseEventFrequencyCondition.passes) + for rule in rules: + # don't know why mypy is complaining : (expression has type "int", variable has type "str") + for group_id in rules_to_groups[rule.id]: # type: ignore[assignment] + pass + + # get: + # 1. rule conditions + result of condition check in results dict + # 2. the predicate func (any, all) to iterate over for conditions + # 3. The interval rules value to compare the result of the query + # against (e.g. # issues is > 50, 50 is the value). This is + # stored in DataAndGroups.data["value"], see EventFrequencyConditionData + + # Store all rule/group pairs we need to activate + + # Step 8: Bulk fetch the events of the rule/group pairs we need to trigger + # actions for, then trigger those actions + + # Was thinking we could do something like this where we get futures, + # then safe execute them + # https://github.com/getsentry/sentry/blob/3075c03e0819dd2c974897cc3014764c43151db5/src/sentry/tasks/post_process.py#L1115-L1128 -def apply_delayed(project: Project, rule_group_pairs: Mapping[str, str]) -> None: - pass + # XXX: Be sure to do this before triggering actions! + # https://github.com/getsentry/sentry/blob/fbfd6800cf067f171840c427df7d5c2864b91fb0/src/sentry/rules/processor.py#L247-L254 diff --git a/src/sentry/rules/processing/processor.py b/src/sentry/rules/processing/processor.py index 3d7a0e60b40925..565d5d221f06e1 100644 --- a/src/sentry/rules/processing/processor.py +++ b/src/sentry/rules/processing/processor.py @@ -25,6 +25,8 @@ from sentry.utils.hashlib import hash_values from sentry.utils.safe import safe_execute +logger = logging.getLogger("sentry.rules") + SLOW_CONDITION_MATCHES = ["event_frequency"] @@ -38,16 +40,40 @@ def get_match_function(match_name: str) -> Callable[..., bool] | None: return None -def is_condition_slow(condition: Mapping[str, str]) -> bool: +def is_condition_slow( + condition: Mapping[str, Any], +) -> bool: + """ + Returns whether a condition is considered slow. Note that the slow condition + mapping take the form on EventFrequencyConditionData. + """ for slow_conditions in SLOW_CONDITION_MATCHES: if slow_conditions in condition["id"]: return True return False -class RuleProcessor: - logger = logging.getLogger("sentry.rules") +def split_conditions_and_filters( + data: Mapping[str, Any], +) -> tuple[list[Mapping[str, Any]], list[Mapping[str, Any]]]: + conditions_and_filters = data.get("conditions", []) + conditions = [] + filters = [] + for condition_or_filter in conditions_and_filters: + id = condition_or_filter["id"] + rule_cls = rules.get(id) + if rule_cls is None: + logger.warning("Unregistered condition or filter %r", id) + continue + + if rule_cls.rule_type == EventFilter.rule_type: + filters.append(condition_or_filter) + elif rule_cls.rule_type == EventCondition.rule_type: + conditions.append(condition_or_filter) + return conditions, filters + +class RuleProcessor: def __init__( self, event: GroupEvent, @@ -126,7 +152,7 @@ def bulk_get_rule_status(self, rules: Sequence[Rule]) -> Mapping[int, GroupRuleS if missing_rule_ids: # Shouldn't happen, but log just in case - self.logger.error( + logger.error( "Failed to fetch some GroupRuleStatuses in RuleProcessor", extra={"missing_rule_ids": missing_rule_ids, "group_id": self.group.id}, ) @@ -138,16 +164,19 @@ def bulk_get_rule_status(self, rules: Sequence[Rule]) -> Mapping[int, GroupRuleS return rule_statuses def condition_matches( - self, condition: dict[str, Any], state: EventState, rule: Rule + self, + condition: Mapping[str, Any], + state: EventState, + rule: Rule, ) -> bool | None: condition_cls = rules.get(condition["id"]) if condition_cls is None: - self.logger.warning("Unregistered condition %r", condition["id"]) + logger.warning("Unregistered condition %r", condition["id"]) return None - condition_inst = condition_cls(self.project, data=condition, rule=rule) + condition_inst = condition_cls(project=self.project, data=condition, rule=rule) if not isinstance(condition_inst, (EventCondition, EventFilter)): - self.logger.warning("Unregistered condition %r", condition["id"]) + logger.warning("Unregistered condition %r", condition["id"]) return None passes: bool = safe_execute( condition_inst.passes, @@ -157,15 +186,6 @@ def condition_matches( ) return passes - def get_rule_type(self, condition: Mapping[str, Any]) -> str | None: - rule_cls = rules.get(condition["id"]) - if rule_cls is None: - self.logger.warning("Unregistered condition or filter %r", condition["id"]) - return None - - rule_type: str = rule_cls.rule_type - return rule_type - def get_state(self) -> EventState: return EventState( is_new=self.is_new, @@ -196,7 +216,7 @@ def apply_rule(self, rule: Rule, status: GroupRuleStatus) -> None: condition_match = rule.data.get("action_match") or Rule.DEFAULT_CONDITION_MATCH filter_match = rule.data.get("filter_match") or Rule.DEFAULT_FILTER_MATCH - rule_condition_list = rule.data.get("conditions", ()) + conditions_and_filters = rule.data.get("conditions", ()) frequency = rule.data.get("frequency") or Rule.DEFAULT_FREQUENCY try: environment = self.event.get_environment() @@ -213,13 +233,7 @@ def apply_rule(self, rule: Rule, status: GroupRuleStatus) -> None: state = self.get_state() - condition_list = [] - filter_list = [] - for rule_cond in rule_condition_list: - if self.get_rule_type(rule_cond) == "condition/event": - condition_list.append(rule_cond) - else: - filter_list.append(rule_cond) + condition_list, filter_list = split_conditions_and_filters(conditions_and_filters) # Sort `condition_list` so that most expensive conditions run last. condition_list.sort(key=lambda condition: is_condition_slow(condition)) @@ -230,13 +244,13 @@ def apply_rule(self, rule: Rule, status: GroupRuleStatus) -> None: ): if not predicate_list: continue - predicate_iter = (self.condition_matches(f, state, rule) for f in predicate_list) + predicate_iter = [self.condition_matches(f, state, rule) for f in predicate_list] predicate_func = get_match_function(match) if predicate_func: if not predicate_func(predicate_iter): return else: - self.logger.error( + logger.error( f"Unsupported {name}_match {match!r} for rule {rule.id}", filter_match, rule.id, @@ -286,7 +300,7 @@ def activate_downstream_actions( notification_uuid=notification_uuid, ) if results is None: - self.logger.warning("Action %s did not return any futures", action["id"]) + logger.warning("Action %s did not return any futures", action["id"]) continue for future in results: