Skip to content

Commit a2ea48a

Browse files
authored
chore(dashboards): Discover split for self hosted dashboard discover widgets (#92135)
The discover split for dashboards has been done for all users except self-hosted users. This migration converts all `Discover` widget types to either `Transaction` or `Error` using the `_get_and_save_split_decision_for_dashboard_widget` function. This function will try to appropriately categorize a widget query as `Transaction` or `Error` and will default to `Error` if a decision cannot be made. This uses the same logic of the original discover split job.
1 parent fc2a333 commit a2ea48a

File tree

3 files changed

+339
-1
lines changed

3 files changed

+339
-1
lines changed

migrations_lockfile.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ nodestore: 0001_squashed_0002_nodestore_no_dictfield
2121

2222
replays: 0001_squashed_0005_drop_replay_index
2323

24-
sentry: 0912_make_organizationmemberteam_replica_is_active_true
24+
sentry: 0913_split_discover_dataset_dashboards_self_hosted
2525

2626
social_auth: 0001_squashed_0002_default_auto_field
2727

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
# Generated by Django 5.2.1 on 2025-05-20 17:45
2+
3+
from enum import Enum
4+
5+
from django.db import migrations
6+
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
7+
from django.db.migrations.state import StateApps
8+
from django.db.models import Q
9+
10+
from sentry.discover.dashboard_widget_split import _get_and_save_split_decision_for_dashboard_widget
11+
from sentry.models.dashboard_widget import TypesClass
12+
from sentry.new_migrations.migrations import CheckedMigration
13+
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar
14+
15+
16+
class DashboardWidgetTypes(TypesClass):
17+
DISCOVER = 0
18+
"""
19+
Old way of accessing error events and transaction events simultaneously @deprecated. Use ERROR_EVENTS or TRANSACTION_LIKE instead.
20+
"""
21+
ISSUE = 1
22+
RELEASE_HEALTH = 2
23+
METRICS = 3
24+
ERROR_EVENTS = 100
25+
"""
26+
Error side of the split from Discover.
27+
"""
28+
TRANSACTION_LIKE = 101
29+
"""
30+
This targets transaction-like data from the split from discover. It may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.
31+
"""
32+
SPANS = 102
33+
LOGS = 103
34+
"""
35+
These represent the logs trace item type on the EAP dataset.
36+
"""
37+
38+
TYPES = [
39+
(DISCOVER, "discover"),
40+
(ISSUE, "issue"),
41+
(
42+
RELEASE_HEALTH,
43+
"metrics",
44+
),
45+
(ERROR_EVENTS, "error-events"),
46+
(TRANSACTION_LIKE, "transaction-like"),
47+
(SPANS, "spans"),
48+
(LOGS, "logs"),
49+
]
50+
TYPE_NAMES = [t[1] for t in TYPES]
51+
52+
53+
class DatasetSourcesTypes(Enum):
54+
"""
55+
Ambiguous queries that haven't been or couldn't be categorized into a
56+
specific dataset.
57+
"""
58+
59+
UNKNOWN = 0
60+
"""
61+
Dataset inferred by either running the query or using heuristics.
62+
"""
63+
INFERRED = 1
64+
"""
65+
Canonical dataset, user explicitly selected it.
66+
"""
67+
USER = 2
68+
"""
69+
Was an ambiguous dataset forced to split (i.e. we picked a default)
70+
"""
71+
FORCED = 3
72+
"""
73+
Dataset inferred by split script, version 1
74+
"""
75+
SPLIT_VERSION_1 = 4
76+
"""
77+
Dataset inferred by split script, version 2
78+
"""
79+
SPLIT_VERSION_2 = 5
80+
81+
@classmethod
82+
def as_choices(cls) -> tuple[tuple[int, str], ...]:
83+
return tuple((source.value, source.name.lower()) for source in cls)
84+
85+
@classmethod
86+
def as_text_choices(cls) -> tuple[tuple[str, int], ...]:
87+
return tuple((source.name.lower(), source.value) for source in cls)
88+
89+
90+
class DashboardWidgetDisplayTypes(TypesClass):
91+
LINE_CHART = 0
92+
AREA_CHART = 1
93+
STACKED_AREA_CHART = 2
94+
BAR_CHART = 3
95+
TABLE = 4
96+
BIG_NUMBER = 6
97+
TOP_N = 7
98+
TYPES = [
99+
(LINE_CHART, "line"),
100+
(AREA_CHART, "area"),
101+
(STACKED_AREA_CHART, "stacked_area"),
102+
(BAR_CHART, "bar"),
103+
(TABLE, "table"),
104+
(BIG_NUMBER, "big_number"),
105+
(TOP_N, "top_n"),
106+
]
107+
TYPE_NAMES = [t[1] for t in TYPES]
108+
109+
110+
def split_discover_dataset_dashboards_self_hosted(
111+
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
112+
) -> None:
113+
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
114+
catch_all_unsplit_widgets = Q(
115+
widget__widget_type=DashboardWidgetTypes.DISCOVER,
116+
) & ~Q(
117+
widget__discover_widget_split__in=[
118+
DashboardWidgetTypes.ERROR_EVENTS,
119+
DashboardWidgetTypes.TRANSACTION_LIKE,
120+
]
121+
)
122+
123+
queryset = DashboardWidgetQuery.objects.filter(
124+
catch_all_unsplit_widgets,
125+
).select_related("widget__dashboard__organization")
126+
127+
for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
128+
try:
129+
_get_and_save_split_decision_for_dashboard_widget(widget_query, dry_run=False)
130+
except Exception:
131+
widget_query.widget.discover_widget_split = DashboardWidgetTypes.ERROR_EVENTS
132+
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
133+
widget_query.widget.save()
134+
135+
136+
def reverse_split_discover_dataset_dashboards_self_hosted(
137+
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
138+
) -> None:
139+
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
140+
all_split_widgets = Q(
141+
widget__discover_widget_split__in=[
142+
DashboardWidgetTypes.ERROR_EVENTS,
143+
DashboardWidgetTypes.TRANSACTION_LIKE,
144+
]
145+
)
146+
147+
queryset = DashboardWidgetQuery.objects.filter(all_split_widgets)
148+
149+
for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
150+
widget_query.widget.discover_widget_split = None
151+
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
152+
widget_query.widget.save()
153+
154+
155+
class Migration(CheckedMigration):
156+
# This flag is used to mark that a migration shouldn't be automatically run in production.
157+
# This should only be used for operations where it's safe to run the migration after your
158+
# code has deployed. So this should not be used for most operations that alter the schema
159+
# of a table.
160+
# Here are some things that make sense to mark as post deployment:
161+
# - Large data migrations. Typically we want these to be run manually so that they can be
162+
# monitored and not block the deploy for a long period of time while they run.
163+
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
164+
# run this outside deployments so that we don't block them. Note that while adding an index
165+
# is a schema change, it's completely safe to run the operation after the code has deployed.
166+
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
167+
168+
is_post_deployment = True
169+
170+
dependencies = [
171+
("sentry", "0912_make_organizationmemberteam_replica_is_active_true"),
172+
]
173+
174+
operations = [
175+
migrations.RunPython(
176+
split_discover_dataset_dashboards_self_hosted,
177+
reverse_code=reverse_split_discover_dataset_dashboards_self_hosted,
178+
hints={"tables": ["sentry_dashboardwidget"]},
179+
)
180+
]
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
from sentry.hybridcloud.models.outbox import outbox_context
2+
from sentry.models.dashboard_widget import (
3+
DashboardWidgetDisplayTypes,
4+
DashboardWidgetTypes,
5+
DatasetSourcesTypes,
6+
)
7+
from sentry.models.organization import Organization
8+
from sentry.testutils.cases import SnubaTestCase, TestMigrations
9+
from sentry.testutils.helpers.datetime import before_now
10+
from sentry.utils.samples import load_data
11+
12+
13+
class SplitDiscoverDatasetDashboardsSelfHostedTest(TestMigrations, SnubaTestCase):
14+
migrate_from = "0912_make_organizationmemberteam_replica_is_active_true"
15+
migrate_to = "0913_split_discover_dataset_dashboards_self_hosted"
16+
17+
def setup_before_migration(self, apps):
18+
User = apps.get_model("sentry", "User")
19+
Dashboard = apps.get_model("sentry", "Dashboard")
20+
DashboardWidget = apps.get_model("sentry", "DashboardWidget")
21+
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
22+
23+
with outbox_context(flush=False):
24+
self.organization = Organization.objects.create(name="test", slug="test")
25+
self.user = User.objects.create(email="test@test.com", is_superuser=False)
26+
self.project = self.create_project(
27+
name="test_project", slug="test_project", organization=self.organization
28+
)
29+
self.environment = self.create_environment(
30+
name="test_environment", project=self.project, organization=self.organization
31+
)
32+
33+
self.dashboard = Dashboard.objects.create(
34+
title="test",
35+
organization_id=self.organization.id,
36+
created_by_id=self.user.id,
37+
)
38+
39+
self.discover_error_widget = DashboardWidget.objects.create(
40+
dashboard_id=self.dashboard.id,
41+
title="test discover widget",
42+
widget_type=DashboardWidgetTypes.DISCOVER,
43+
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
44+
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
45+
interval="1d",
46+
order=0,
47+
)
48+
49+
self.discover_error_widget_query = DashboardWidgetQuery.objects.create(
50+
widget_id=self.discover_error_widget.id,
51+
name="test discover widget query",
52+
fields=["count()"],
53+
aggregates=["count()"],
54+
columns=[],
55+
conditions="environment:foo",
56+
orderby=["-count()"],
57+
order=0,
58+
)
59+
60+
self.migrated_discover_widget = DashboardWidget.objects.create(
61+
dashboard_id=self.dashboard.id,
62+
title="test migrated discover widget",
63+
widget_type=DashboardWidgetTypes.DISCOVER,
64+
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
65+
discover_widget_split=DashboardWidgetTypes.TRANSACTION_LIKE,
66+
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
67+
interval="1d",
68+
order=1,
69+
)
70+
71+
self.migrated_discover_widget_query = DashboardWidgetQuery.objects.create(
72+
widget_id=self.migrated_discover_widget.id,
73+
name="test migrated discover widget query",
74+
fields=["count()"],
75+
aggregates=["count()"],
76+
columns=[],
77+
conditions="environment:foo",
78+
orderby=["-count()"],
79+
order=1,
80+
)
81+
82+
self.discover_transaction_widget = DashboardWidget.objects.create(
83+
dashboard_id=self.dashboard.id,
84+
title="test discover transaction widget",
85+
widget_type=DashboardWidgetTypes.DISCOVER,
86+
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
87+
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
88+
interval="1d",
89+
order=2,
90+
)
91+
92+
self.discover_transaction_widget_query = DashboardWidgetQuery.objects.create(
93+
widget_id=self.discover_transaction_widget.id,
94+
name="test discover transaction widget query",
95+
fields=["count()", "transaction.duration"],
96+
aggregates=["count()"],
97+
columns=[],
98+
conditions="environment:foo",
99+
orderby=["-count()"],
100+
order=2,
101+
)
102+
103+
self.discover_ambiguous_widget = DashboardWidget.objects.create(
104+
dashboard_id=self.dashboard.id,
105+
title="test discover ambiguous widget",
106+
widget_type=DashboardWidgetTypes.DISCOVER,
107+
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
108+
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
109+
interval="1d",
110+
order=3,
111+
)
112+
113+
self.discover_ambiguous_widget_query = DashboardWidgetQuery.objects.create(
114+
widget_id=self.discover_ambiguous_widget.id,
115+
name="test discover ambiguous widget query",
116+
fields=["count()", "transaction"],
117+
aggregates=["count()"],
118+
columns=[],
119+
conditions="environment:test_environment",
120+
orderby=["-count()"],
121+
order=3,
122+
)
123+
124+
# Now store test data that should only affect the ambiguous widget
125+
self.nine_mins_ago = before_now(minutes=9)
126+
self.ten_mins_ago = before_now(minutes=10)
127+
128+
data = load_data("transaction", timestamp=self.ten_mins_ago)
129+
data["transaction"] = "/to_other/"
130+
data["environment"] = self.environment.name
131+
data["transaction.duration"] = 1000
132+
self.store_event(data, project_id=self.project.id, assert_no_errors=False)
133+
134+
data = load_data("transaction", timestamp=self.ten_mins_ago)
135+
data["transaction"] = "/to_other/2"
136+
data["environment"] = self.environment.name
137+
data["transaction.duration"] = 2000
138+
self.store_event(data, project_id=self.project.id, assert_no_errors=False)
139+
140+
def test(self):
141+
self.discover_error_widget.refresh_from_db()
142+
self.migrated_discover_widget.refresh_from_db()
143+
self.discover_transaction_widget.refresh_from_db()
144+
self.discover_ambiguous_widget.refresh_from_db()
145+
146+
assert self.discover_error_widget.discover_widget_split == DashboardWidgetTypes.ERROR_EVENTS
147+
assert (
148+
self.migrated_discover_widget.discover_widget_split
149+
== DashboardWidgetTypes.TRANSACTION_LIKE
150+
)
151+
assert (
152+
self.discover_transaction_widget.discover_widget_split
153+
== DashboardWidgetTypes.TRANSACTION_LIKE
154+
)
155+
assert (
156+
self.discover_ambiguous_widget.discover_widget_split
157+
== DashboardWidgetTypes.TRANSACTION_LIKE
158+
)

0 commit comments

Comments
 (0)