diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index 64539789d26f90..6f1a7bc1fd332b 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -23,7 +23,7 @@ preprod: 0001_emerge_upload_models replays: 0001_squashed_0005_drop_replay_index -sentry: 0924_dashboard_add_unique_constraint_for_user_org_position +sentry: 0925_backfill_open_periods social_auth: 0001_squashed_0002_default_auto_field diff --git a/src/sentry/migrations/0925_backfill_open_periods.py b/src/sentry/migrations/0925_backfill_open_periods.py new file mode 100644 index 00000000000000..0b890ed2d49135 --- /dev/null +++ b/src/sentry/migrations/0925_backfill_open_periods.py @@ -0,0 +1,206 @@ +# Generated by Django 5.2.1 on 2025-05-30 00:42 + +import logging +from collections import defaultdict +from datetime import datetime +from enum import Enum +from typing import Any + +from django.conf import settings +from django.db import IntegrityError, migrations, router, transaction +from django.db.backends.base.schema import BaseDatabaseSchemaEditor +from django.db.migrations.state import StateApps + +from sentry.new_migrations.migrations import CheckedMigration +from sentry.utils import redis +from sentry.utils.iterators import chunked +from sentry.utils.query import RangeQuerySetWrapperWithProgressBarApprox + +logger = logging.getLogger(__name__) + +CHUNK_SIZE = 100 + + +# copied constants and enums +class ActivityType(Enum): + SET_REGRESSION = 6 + SET_RESOLVED = 1 + SET_RESOLVED_IN_RELEASE = 13 + SET_RESOLVED_BY_AGE = 15 + SET_RESOLVED_IN_COMMIT = 16 + SET_RESOLVED_IN_PULL_REQUEST = 21 + + +RESOLVED_ACTIVITY_TYPES = [ + ActivityType.SET_RESOLVED.value, + ActivityType.SET_RESOLVED_IN_RELEASE.value, + ActivityType.SET_RESOLVED_BY_AGE.value, + ActivityType.SET_RESOLVED_IN_COMMIT.value, + ActivityType.SET_RESOLVED_IN_PULL_REQUEST.value, +] + + +class GroupStatus: + UNRESOLVED = 0 + RESOLVED = 1 + + +# end copy + + +def get_open_periods_for_group( + apps: StateApps, + group_id: int, + status: int, + project_id: int, + first_seen: datetime, + activities: list[Any], + GroupOpenPeriod: Any, +) -> list[Any]: + # No activities means the group has been open since the first_seen date + if not activities: + return [ + GroupOpenPeriod( + group_id=group_id, + project_id=project_id, + date_started=first_seen, + ) + ] + + open_periods = [] + regression_time: datetime | None = first_seen + for activity in activities: + if activity.type == ActivityType.SET_REGRESSION.value and regression_time is None: + regression_time = activity.datetime + + elif activity.type in RESOLVED_ACTIVITY_TYPES and regression_time is not None: + open_periods.append( + GroupOpenPeriod( + group_id=group_id, + project_id=project_id, + date_started=regression_time, + date_ended=activity.datetime, + resolution_activity=activity, + user_id=activity.user_id, + ) + ) + + regression_time = None + + # Handle currently open period if the group is unresolved + if status == GroupStatus.UNRESOLVED and regression_time is not None: + open_periods.append( + GroupOpenPeriod( + group_id=group_id, + project_id=project_id, + date_started=regression_time, + ) + ) + + return open_periods + + +def _backfill_group_open_periods( + apps: StateApps, group_data: list[tuple[int, datetime, int, int]] +) -> None: + GroupOpenPeriod = apps.get_model("sentry", "GroupOpenPeriod") + Activity = apps.get_model("sentry", "Activity") + + group_ids = [group_id for group_id, _, _, _ in group_data] + groups_with_open_periods = set( + GroupOpenPeriod.objects.filter(group_id__in=group_ids) + .values_list("group_id", flat=True) + .distinct() + ) + + group_ids = [group_id for group_id in group_ids if group_id not in groups_with_open_periods] + # Filter to REGRESSION and SET_RESOLVED_XX activties to find the bounds of each open period. + # The only UNRESOLVED activity we would care about is the first UNRESOLVED activity for the group creation, + # but we don't create an entry for that. + + activities = defaultdict(list) + for activity in Activity.objects.filter( + group_id__in=group_ids, + type__in=[ActivityType.SET_REGRESSION.value, *RESOLVED_ACTIVITY_TYPES], + ).order_by("datetime"): + activities[activity.group_id].append(activity) + + open_periods = [] + for group_id, first_seen, status, project_id in group_data: + # Skip groups that already have open periods + if group_id in groups_with_open_periods: + continue + + open_periods.extend( + get_open_periods_for_group( + apps, + group_id, + status, + project_id, + first_seen, + activities[group_id], + GroupOpenPeriod, + ) + ) + + with transaction.atomic(router.db_for_write(GroupOpenPeriod)): + try: + GroupOpenPeriod.objects.bulk_create(open_periods) + except IntegrityError as e: + logger.exception( + "Error creating open period", + extra={"group_ids": group_ids, "error": e}, + ) + + +def backfill_group_open_periods(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None: + Group = apps.get_model("sentry", "Group") + + backfill_key = "backfill_group_open_periods_from_activity_1" + redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER) + + progress_id = int(redis_client.get(backfill_key) or 0) + for group_data in chunked( + RangeQuerySetWrapperWithProgressBarApprox( + Group.objects.filter(id__gt=progress_id).values_list( + "id", "first_seen", "status", "project_id" + ), + result_value_getter=lambda item: item[0], + ), + CHUNK_SIZE, + ): + logger.info( + "Processing batch for group open period backfill", + extra={"last_group_id": group_data[-1][0]}, + ) + _backfill_group_open_periods(apps, group_data) + # Save progress to redis in case we have to restart + redis_client.set(backfill_key, group_data[-1][0], ex=60 * 60 * 24 * 7) + + +class Migration(CheckedMigration): + # This flag is used to mark that a migration shouldn't be automatically run in production. + # This should only be used for operations where it's safe to run the migration after your + # code has deployed. So this should not be used for most operations that alter the schema + # of a table. + # Here are some things that make sense to mark as post deployment: + # - Large data migrations. Typically we want these to be run manually so that they can be + # monitored and not block the deploy for a long period of time while they run. + # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to + # run this outside deployments so that we don't block them. Note that while adding an index + # is a schema change, it's completely safe to run the operation after the code has deployed. + # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment + + is_post_deployment = True + + dependencies = [ + ("sentry", "0924_dashboard_add_unique_constraint_for_user_org_position"), + ] + + operations = [ + migrations.RunPython( + backfill_group_open_periods, + migrations.RunPython.noop, + hints={"tables": ["sentry_groupopenperiod"]}, + ), + ] diff --git a/tests/sentry/migrations/test_0925_backfill_open_periods.py b/tests/sentry/migrations/test_0925_backfill_open_periods.py new file mode 100644 index 00000000000000..f32c599ae3cb41 --- /dev/null +++ b/tests/sentry/migrations/test_0925_backfill_open_periods.py @@ -0,0 +1,331 @@ +from datetime import timedelta + +from django.utils import timezone + +from sentry.models.activity import Activity +from sentry.models.group import Group, GroupStatus +from sentry.models.groupopenperiod import GroupOpenPeriod +from sentry.models.organization import Organization +from sentry.testutils.cases import TestMigrations +from sentry.types.activity import ActivityType +from sentry.types.group import GroupSubStatus + + +class BackfillGroupOpenPeriodsTest(TestMigrations): + migrate_from = "0924_dashboard_add_unique_constraint_for_user_org_position" + migrate_to = "0925_backfill_open_periods" + + def _create_resolved_group(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + resolution_activity_1 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED.value, + datetime=self.now - timedelta(days=2), + ) + group.update(status=GroupStatus.UNRESOLVED, substatus=GroupSubStatus.REGRESSED) + regressed_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_REGRESSION.value, + datetime=self.now - timedelta(days=1), + ) + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity_2 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED.value, + datetime=self.now, + ) + return ( + group, + [group.first_seen, regressed_activity.datetime], + [ + resolution_activity_1.datetime, + resolution_activity_2.datetime, + ], + [resolution_activity_1, resolution_activity_2], + ) + + def _create_regressed_group(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED.value, + datetime=self.now - timedelta(days=2), + ) + group.update(status=GroupStatus.UNRESOLVED, substatus=GroupSubStatus.REGRESSED) + regressed_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_REGRESSION.value, + datetime=self.now - timedelta(days=1), + ) + return ( + group, + [group.first_seen, regressed_activity.datetime], + [resolution_activity.datetime, None], + [resolution_activity, None], + ) + + def _create_ignored_group(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + group.update(status=GroupStatus.IGNORED, substatus=GroupSubStatus.UNTIL_ESCALATING) + Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_IGNORED.value, + datetime=self.now - timedelta(days=2), + ) + return ( + group, + [group.first_seen], + [None], + [None], + ) + + def _create_resolved_group_with_open_period(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + gop = GroupOpenPeriod.objects.create( + group=group, + project=self.project, + date_started=group.first_seen, + ) + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED.value, + datetime=self.now, + ) + gop.update( + resolution_activity=resolution_activity, + date_ended=resolution_activity.datetime, + ) + return ( + group, + [group.first_seen], + [resolution_activity.datetime], + [resolution_activity], + ) + + def _create_auto_resolved_group(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED_BY_AGE.value, + datetime=self.now, + ) + + return ( + group, + [group.first_seen], + [resolution_activity.datetime], + [resolution_activity], + ) + + def _create_auto_resolved_regressed_group(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED_BY_AGE.value, + datetime=self.now - timedelta(days=2), + ) + + group.update(status=GroupStatus.UNRESOLVED, substatus=GroupSubStatus.REGRESSED) + regressed_activity = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_REGRESSION.value, + datetime=self.now - timedelta(days=1), + ) + + return ( + group, + [group.first_seen, regressed_activity.datetime], + [resolution_activity.datetime, None], + [resolution_activity, None], + ) + + def _create_regressed_group_with_auto_resolved_cycles(self): + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=6), + ) + + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity_1 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED_BY_AGE.value, + datetime=self.now - timedelta(days=5), + ) + + group.update(status=GroupStatus.UNRESOLVED, substatus=GroupSubStatus.REGRESSED) + regressed_activity_1 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_REGRESSION.value, + datetime=self.now - timedelta(days=4), + ) + + group.update(status=GroupStatus.RESOLVED, substatus=None) + resolution_activity_2 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_RESOLVED_BY_AGE.value, + datetime=self.now - timedelta(days=3), + ) + + group.update(status=GroupStatus.UNRESOLVED, substatus=GroupSubStatus.REGRESSED) + regressed_activity_2 = Activity.objects.create( + group=group, + project=self.project, + type=ActivityType.SET_REGRESSION.value, + datetime=self.now - timedelta(days=2), + ) + + return ( + group, + [group.first_seen, regressed_activity_1.datetime, regressed_activity_2.datetime], + [resolution_activity_1.datetime, resolution_activity_2.datetime, None], + [resolution_activity_1, resolution_activity_2, None], + ) + + def setup_before_migration(self, app): + self.now = timezone.now() + self.organization = Organization.objects.create(name="test", slug="test") + self.project = self.create_project(organization=self.organization) + + self.test_cases = [] + + # Create a group that has been resolved and then regressed and resolved again + group, starts, ends, activities = self._create_resolved_group() + assert group.status == GroupStatus.RESOLVED + assert group.substatus is None + self.test_cases.append(("resolved_group", group, starts, ends, activities)) + + # Create a group that has been resolved and then regressed + group, starts, ends, activities = self._create_regressed_group() + assert group.status == GroupStatus.UNRESOLVED + assert group.substatus == GroupSubStatus.REGRESSED + self.test_cases.append(("regressed_group", group, starts, ends, activities)) + + # Create a new group that has never been resolved + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=3), + ) + assert group.status == GroupStatus.UNRESOLVED + assert group.substatus == GroupSubStatus.NEW + self.test_cases.append(("new_group", group, [group.first_seen], [None], [None])) + + # Create a group that has been ignored until escalating + group, starts, ends, activities = self._create_ignored_group() + assert group.status == GroupStatus.IGNORED + assert group.substatus == GroupSubStatus.UNTIL_ESCALATING + self.test_cases.append(("ignored_group", group, starts, ends, activities)) + + # Create an unresolved group that already has an open period + group = Group.objects.create( + project=self.project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + first_seen=self.now - timedelta(days=5), + ) + GroupOpenPeriod.objects.create( + group=group, + project=self.project, + date_started=group.first_seen, + ) + assert GroupOpenPeriod.objects.filter(group=group).count() == 1 + self.test_cases.append( + ("unresolved_group_with_open_period", group, [group.first_seen], [None], [None]) + ) + + # Create a resolved group that already has an open period + group, starts, ends, activities = self._create_resolved_group_with_open_period() + assert GroupOpenPeriod.objects.filter(group=group).count() == 1 + assert group.status == GroupStatus.RESOLVED + assert group.substatus is None + self.test_cases.append(("resolved_group_with_open_period", group, starts, ends, activities)) + + # Create a group that has been auto-resolved + group, starts, ends, activities = self._create_auto_resolved_group() + assert group.status == GroupStatus.RESOLVED + assert group.substatus is None + self.test_cases.append(("auto_resolved_group", group, starts, ends, activities)) + + # Create a group that has been auto-resolved and then regressed + group, starts, ends, activities = self._create_auto_resolved_regressed_group() + assert group.status == GroupStatus.UNRESOLVED + assert group.substatus == GroupSubStatus.REGRESSED + self.test_cases.append(("auto_resolved_regressed_group", group, starts, ends, activities)) + + # Create a group that has been regressed and then auto-resolved and then regressed again + group, starts, ends, activities = self._create_regressed_group_with_auto_resolved_cycles() + assert group.status == GroupStatus.UNRESOLVED + assert group.substatus == GroupSubStatus.REGRESSED + self.test_cases.append( + ("regressed_group_with_auto_resolved_cycles", group, starts, ends, activities) + ) + + def test(self): + for description, group, starts, ends, activities in self.test_cases: + group.refresh_from_db() + open_periods = GroupOpenPeriod.objects.filter(group=group).order_by("date_started") + assert len(open_periods) == len( + starts + ), f"{description}: Expected {len(starts)} open periods, got {len(open_periods)}" + for i, open_period in enumerate(open_periods): + assert ( + open_period.date_started == starts[i] + ), f"{description}: Expected open period start date {starts[i]}, got {open_period.date_started}" + assert ( + open_period.date_ended == ends[i] + ), f"{description}: Expected open period end date {ends[i]}, got {open_period.date_ended}" + assert ( + open_period.resolution_activity == activities[i] + ), f"{description}: Expected resolution activity {activities[i]}, got {open_period.resolution_activity}"