Skip to content

Commit 42423d1

Browse files
authored
fix(aci): Rework logic to backfill open periods (#92532)
This PR changes the backfill logic to make sure it works with the regressed/auto-resolved cycles that tripped us up earlier. There's also a handful of cases where we see successive regressions / resolutions and the old logic didn't know how to handle that. These changes will set the open period based on the first regression/first resolution if there are multiple in a row. Makes for an easier review if you compare against the first commit which just copies the changes from migration 0878
1 parent 01074a2 commit 42423d1

File tree

3 files changed

+538
-1
lines changed

3 files changed

+538
-1
lines changed

migrations_lockfile.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ preprod: 0001_emerge_upload_models
2323

2424
replays: 0001_squashed_0005_drop_replay_index
2525

26-
sentry: 0924_dashboard_add_unique_constraint_for_user_org_position
26+
sentry: 0925_backfill_open_periods
2727

2828
social_auth: 0001_squashed_0002_default_auto_field
2929

Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
# Generated by Django 5.2.1 on 2025-05-30 00:42
2+
3+
import logging
4+
from collections import defaultdict
5+
from datetime import datetime
6+
from enum import Enum
7+
from typing import Any
8+
9+
from django.conf import settings
10+
from django.db import IntegrityError, migrations, router, transaction
11+
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
12+
from django.db.migrations.state import StateApps
13+
14+
from sentry.new_migrations.migrations import CheckedMigration
15+
from sentry.utils import redis
16+
from sentry.utils.iterators import chunked
17+
from sentry.utils.query import RangeQuerySetWrapperWithProgressBarApprox
18+
19+
logger = logging.getLogger(__name__)
20+
21+
CHUNK_SIZE = 100
22+
23+
24+
# copied constants and enums
25+
class ActivityType(Enum):
26+
SET_REGRESSION = 6
27+
SET_RESOLVED = 1
28+
SET_RESOLVED_IN_RELEASE = 13
29+
SET_RESOLVED_BY_AGE = 15
30+
SET_RESOLVED_IN_COMMIT = 16
31+
SET_RESOLVED_IN_PULL_REQUEST = 21
32+
33+
34+
RESOLVED_ACTIVITY_TYPES = [
35+
ActivityType.SET_RESOLVED.value,
36+
ActivityType.SET_RESOLVED_IN_RELEASE.value,
37+
ActivityType.SET_RESOLVED_BY_AGE.value,
38+
ActivityType.SET_RESOLVED_IN_COMMIT.value,
39+
ActivityType.SET_RESOLVED_IN_PULL_REQUEST.value,
40+
]
41+
42+
43+
class GroupStatus:
44+
UNRESOLVED = 0
45+
RESOLVED = 1
46+
47+
48+
# end copy
49+
50+
51+
def get_open_periods_for_group(
52+
apps: StateApps,
53+
group_id: int,
54+
status: int,
55+
project_id: int,
56+
first_seen: datetime,
57+
activities: list[Any],
58+
GroupOpenPeriod: Any,
59+
) -> list[Any]:
60+
# No activities means the group has been open since the first_seen date
61+
if not activities:
62+
return [
63+
GroupOpenPeriod(
64+
group_id=group_id,
65+
project_id=project_id,
66+
date_started=first_seen,
67+
)
68+
]
69+
70+
open_periods = []
71+
regression_time: datetime | None = first_seen
72+
for activity in activities:
73+
if activity.type == ActivityType.SET_REGRESSION.value and regression_time is None:
74+
regression_time = activity.datetime
75+
76+
elif activity.type in RESOLVED_ACTIVITY_TYPES and regression_time is not None:
77+
open_periods.append(
78+
GroupOpenPeriod(
79+
group_id=group_id,
80+
project_id=project_id,
81+
date_started=regression_time,
82+
date_ended=activity.datetime,
83+
resolution_activity=activity,
84+
user_id=activity.user_id,
85+
)
86+
)
87+
88+
regression_time = None
89+
90+
# Handle currently open period if the group is unresolved
91+
if status == GroupStatus.UNRESOLVED and regression_time is not None:
92+
open_periods.append(
93+
GroupOpenPeriod(
94+
group_id=group_id,
95+
project_id=project_id,
96+
date_started=regression_time,
97+
)
98+
)
99+
100+
return open_periods
101+
102+
103+
def _backfill_group_open_periods(
104+
apps: StateApps, group_data: list[tuple[int, datetime, int, int]]
105+
) -> None:
106+
GroupOpenPeriod = apps.get_model("sentry", "GroupOpenPeriod")
107+
Activity = apps.get_model("sentry", "Activity")
108+
109+
group_ids = [group_id for group_id, _, _, _ in group_data]
110+
groups_with_open_periods = set(
111+
GroupOpenPeriod.objects.filter(group_id__in=group_ids)
112+
.values_list("group_id", flat=True)
113+
.distinct()
114+
)
115+
116+
group_ids = [group_id for group_id in group_ids if group_id not in groups_with_open_periods]
117+
# Filter to REGRESSION and SET_RESOLVED_XX activties to find the bounds of each open period.
118+
# The only UNRESOLVED activity we would care about is the first UNRESOLVED activity for the group creation,
119+
# but we don't create an entry for that.
120+
121+
activities = defaultdict(list)
122+
for activity in Activity.objects.filter(
123+
group_id__in=group_ids,
124+
type__in=[ActivityType.SET_REGRESSION.value, *RESOLVED_ACTIVITY_TYPES],
125+
).order_by("datetime"):
126+
activities[activity.group_id].append(activity)
127+
128+
open_periods = []
129+
for group_id, first_seen, status, project_id in group_data:
130+
# Skip groups that already have open periods
131+
if group_id in groups_with_open_periods:
132+
continue
133+
134+
open_periods.extend(
135+
get_open_periods_for_group(
136+
apps,
137+
group_id,
138+
status,
139+
project_id,
140+
first_seen,
141+
activities[group_id],
142+
GroupOpenPeriod,
143+
)
144+
)
145+
146+
with transaction.atomic(router.db_for_write(GroupOpenPeriod)):
147+
try:
148+
GroupOpenPeriod.objects.bulk_create(open_periods)
149+
except IntegrityError as e:
150+
logger.exception(
151+
"Error creating open period",
152+
extra={"group_ids": group_ids, "error": e},
153+
)
154+
155+
156+
def backfill_group_open_periods(apps: StateApps, schema_editor: BaseDatabaseSchemaEditor) -> None:
157+
Group = apps.get_model("sentry", "Group")
158+
159+
backfill_key = "backfill_group_open_periods_from_activity_1"
160+
redis_client = redis.redis_clusters.get(settings.SENTRY_MONITORS_REDIS_CLUSTER)
161+
162+
progress_id = int(redis_client.get(backfill_key) or 0)
163+
for group_data in chunked(
164+
RangeQuerySetWrapperWithProgressBarApprox(
165+
Group.objects.filter(id__gt=progress_id).values_list(
166+
"id", "first_seen", "status", "project_id"
167+
),
168+
result_value_getter=lambda item: item[0],
169+
),
170+
CHUNK_SIZE,
171+
):
172+
logger.info(
173+
"Processing batch for group open period backfill",
174+
extra={"last_group_id": group_data[-1][0]},
175+
)
176+
_backfill_group_open_periods(apps, group_data)
177+
# Save progress to redis in case we have to restart
178+
redis_client.set(backfill_key, group_data[-1][0], ex=60 * 60 * 24 * 7)
179+
180+
181+
class Migration(CheckedMigration):
182+
# This flag is used to mark that a migration shouldn't be automatically run in production.
183+
# This should only be used for operations where it's safe to run the migration after your
184+
# code has deployed. So this should not be used for most operations that alter the schema
185+
# of a table.
186+
# Here are some things that make sense to mark as post deployment:
187+
# - Large data migrations. Typically we want these to be run manually so that they can be
188+
# monitored and not block the deploy for a long period of time while they run.
189+
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
190+
# run this outside deployments so that we don't block them. Note that while adding an index
191+
# is a schema change, it's completely safe to run the operation after the code has deployed.
192+
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
193+
194+
is_post_deployment = True
195+
196+
dependencies = [
197+
("sentry", "0924_dashboard_add_unique_constraint_for_user_org_position"),
198+
]
199+
200+
operations = [
201+
migrations.RunPython(
202+
backfill_group_open_periods,
203+
migrations.RunPython.noop,
204+
hints={"tables": ["sentry_groupopenperiod"]},
205+
),
206+
]

0 commit comments

Comments
 (0)