getsentry · nikkikapadia · May 21, 2025 · May 22, 2025 · May 22, 2025 · May 22, 2025
diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt
@@ -21,7 +21,7 @@ nodestore: 0001_squashed_0002_nodestore_no_dictfield
 
 replays: 0001_squashed_0005_drop_replay_index
 
-sentry: 0912_make_organizationmemberteam_replica_is_active_true
+sentry: 0913_split_discover_dataset_dashboards_self_hosted
 
 social_auth: 0001_squashed_0002_default_auto_field
 

@@ -0,0 +1,180 @@
+# Generated by Django 5.2.1 on 2025-05-20 17:45
+
+from enum import Enum
+
+from django.db import migrations
+from django.db.backends.base.schema import BaseDatabaseSchemaEditor
+from django.db.migrations.state import StateApps
+from django.db.models import Q
+
+from sentry.discover.dashboard_widget_split import _get_and_save_split_decision_for_dashboard_widget
+from sentry.models.dashboard_widget import TypesClass
+from sentry.new_migrations.migrations import CheckedMigration
+from sentry.utils.query import RangeQuerySetWrapperWithProgressBar
+
+
+class DashboardWidgetTypes(TypesClass):
+    DISCOVER = 0
+    """
+    Old way of accessing error events and transaction events simultaneously @deprecated. Use ERROR_EVENTS or TRANSACTION_LIKE instead.
+    """
+    ISSUE = 1
+    RELEASE_HEALTH = 2
+    METRICS = 3
+    ERROR_EVENTS = 100
+    """
+     Error side of the split from Discover.
+    """
+    TRANSACTION_LIKE = 101
+    """
+    This targets transaction-like data from the split from discover. Itt may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.
+    """
+    SPANS = 102
+    """
+    These represent the logs trace item type on the EAP dataset.
+    """
+    LOGS = 103
+
+    TYPES = [
+        (DISCOVER, "discover"),
+        (ISSUE, "issue"),
+        (
+            RELEASE_HEALTH,
+            "metrics",
+        ),
+        (ERROR_EVENTS, "error-events"),
+        (TRANSACTION_LIKE, "transaction-like"),
+        (SPANS, "spans"),
+        (LOGS, "logs"),
+    ]
+    TYPE_NAMES = [t[1] for t in TYPES]
+
+
+class DatasetSourcesTypes(Enum):
+    """
+    Ambiguous queries that haven't been or couldn't be categorized into a
+    specific dataset.
+    """
+
+    UNKNOWN = 0
+    """
+     Dataset inferred by either running the query or using heuristics.
+    """
+    INFERRED = 1
+    """
+     Canonical dataset, user explicitly selected it.
+    """
+    USER = 2
+    """
+     Was an ambiguous dataset forced to split (i.e. we picked a default)
+    """
+    FORCED = 3
+    """
+     Dataset inferred by split script, version 1
+    """
+    SPLIT_VERSION_1 = 4
+    """
+     Dataset inferred by split script, version 2
+    """
+    SPLIT_VERSION_2 = 5
+
+    @classmethod
+    def as_choices(cls):
+        return tuple((source.value, source.name.lower()) for source in cls)
+
+    @classmethod
+    def as_text_choices(cls):
+        return tuple((source.name.lower(), source.value) for source in cls)
+
+
+class DashboardWidgetDisplayTypes(TypesClass):
+    LINE_CHART = 0
+    AREA_CHART = 1
+    STACKED_AREA_CHART = 2
+    BAR_CHART = 3
+    TABLE = 4
+    BIG_NUMBER = 6
+    TOP_N = 7
+    TYPES = [
+        (LINE_CHART, "line"),
+        (AREA_CHART, "area"),
+        (STACKED_AREA_CHART, "stacked_area"),
+        (BAR_CHART, "bar"),
+        (TABLE, "table"),
+        (BIG_NUMBER, "big_number"),
+        (TOP_N, "top_n"),
+    ]
+    TYPE_NAMES = [t[1] for t in TYPES]
+
+
+def split_discover_dataset_dashboards_self_hosted(
+    apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
+) -> None:
+    DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
+    catch_all_unsplit_widgets = Q(
+        widget__widget_type=DashboardWidgetTypes.DISCOVER,
+    ) & ~Q(
+        widget__discover_widget_split__in=[
+            DashboardWidgetTypes.ERROR_EVENTS,
+            DashboardWidgetTypes.TRANSACTION_LIKE,
+        ]
+    )
+
+    queryset = DashboardWidgetQuery.objects.filter(
+        catch_all_unsplit_widgets,
+    ).select_related("widget__dashboard__organization")
+
+    for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
+        try:
+            _get_and_save_split_decision_for_dashboard_widget(widget_query, dry_run=False)
+        except Exception:
+            widget_query.widget.discover_widget_split = DashboardWidgetTypes.ERROR_EVENTS
+            widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
+            widget_query.widget.save()
+
+
+def reverse_split_discover_dataset_dashboards_self_hosted(
+    apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
+) -> None:
+    DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
+    all_split_widgets = Q(
+        widget__discover_widget_split__in=[
+            DashboardWidgetTypes.ERROR_EVENTS,
+            DashboardWidgetTypes.TRANSACTION_LIKE,
+        ]
+    )
+
+    queryset = DashboardWidgetQuery.objects.filter(all_split_widgets)
+
+    for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
+        widget_query.widget.discover_widget_split = None
+        widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
+        widget_query.widget.save()
+
+
+class Migration(CheckedMigration):
+    # This flag is used to mark that a migration shouldn't be automatically run in production.
+    # This should only be used for operations where it's safe to run the migration after your
+    # code has deployed. So this should not be used for most operations that alter the schema
+    # of a table.
+    # Here are some things that make sense to mark as post deployment:
+    # - Large data migrations. Typically we want these to be run manually so that they can be
+    #   monitored and not block the deploy for a long period of time while they run.
+    # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
+    #   run this outside deployments so that we don't block them. Note that while adding an index
+    #   is a schema change, it's completely safe to run the operation after the code has deployed.
+    # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment
+
+    is_post_deployment = True
+
+    dependencies = [
+        ("sentry", "0912_make_organizationmemberteam_replica_is_active_true"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            split_discover_dataset_dashboards_self_hosted,
+            reverse_code=reverse_split_discover_dataset_dashboards_self_hosted,
+            hints={"tables": ["sentry_dashboardwidget"]},
+        )
+    ]
diff --git a/tests/sentry/migrations/test_0913_split_discover_dataset_dashboards_self_hosted.py b/tests/sentry/migrations/test_0913_split_discover_dataset_dashboards_self_hosted.py
@@ -0,0 +1,158 @@
+from sentry.hybridcloud.models.outbox import outbox_context
+from sentry.models.dashboard_widget import (
+    DashboardWidgetDisplayTypes,
+    DashboardWidgetTypes,
+    DatasetSourcesTypes,
+)
+from sentry.models.organization import Organization
+from sentry.testutils.cases import SnubaTestCase, TestMigrations
+from sentry.testutils.helpers.datetime import before_now
+from sentry.utils.samples import load_data
+
+
+class SplitDiscoverDatasetDashboardsSelfHostedTest(TestMigrations, SnubaTestCase):
+    migrate_from = "0912_make_organizationmemberteam_replica_is_active_true"
+    migrate_to = "0913_split_discover_dataset_dashboards_self_hosted"
+
+    def setup_before_migration(self, apps):
+        User = apps.get_model("sentry", "User")
+        Dashboard = apps.get_model("sentry", "Dashboard")
+        DashboardWidget = apps.get_model("sentry", "DashboardWidget")
+        DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
+
+        with outbox_context(flush=False):
+            self.organization = Organization.objects.create(name="test", slug="test")
+            self.user = User.objects.create(email="test@test.com", is_superuser=False)
+            self.project = self.create_project(
+                name="test_project", slug="test_project", organization=self.organization
+            )
+            self.environment = self.create_environment(
+                name="test_environment", project=self.project, organization=self.organization
+            )
+
+            self.dashboard = Dashboard.objects.create(
+                title="test",
+                organization_id=self.organization.id,
+                created_by_id=self.user.id,
+            )
+
+            self.discover_error_widget = DashboardWidget.objects.create(
+                dashboard_id=self.dashboard.id,
+                title="test discover widget",
+                widget_type=DashboardWidgetTypes.DISCOVER,
+                dataset_source=DatasetSourcesTypes.UNKNOWN.value,
+                display_type=DashboardWidgetDisplayTypes.LINE_CHART,
+                interval="1d",
+                order=0,
+            )
+
+            self.discover_error_widget_query = DashboardWidgetQuery.objects.create(
+                widget_id=self.discover_error_widget.id,
+                name="test discover widget query",
+                fields=["count()"],
+                aggregates=["count()"],
+                columns=[],
+                conditions="environment:foo",
+                orderby=["-count()"],
+                order=0,
+            )
+
+            self.migrated_discover_widget = DashboardWidget.objects.create(
+                dashboard_id=self.dashboard.id,
+                title="test migrated discover widget",
+                widget_type=DashboardWidgetTypes.DISCOVER,
+                dataset_source=DatasetSourcesTypes.UNKNOWN.value,
+                discover_widget_split=DashboardWidgetTypes.TRANSACTION_LIKE,
+                display_type=DashboardWidgetDisplayTypes.LINE_CHART,
+                interval="1d",
+                order=1,
+            )
+
+            self.migrated_discover_widget_query = DashboardWidgetQuery.objects.create(
+                widget_id=self.migrated_discover_widget.id,
+                name="test migrated discover widget query",
+                fields=["count()"],
+                aggregates=["count()"],
+                columns=[],
+                conditions="environment:foo",
+                orderby=["-count()"],
+                order=1,
+            )
+
+            self.discover_transaction_widget = DashboardWidget.objects.create(
+                dashboard_id=self.dashboard.id,
+                title="test discover transaction widget",
+                widget_type=DashboardWidgetTypes.DISCOVER,
+                dataset_source=DatasetSourcesTypes.UNKNOWN.value,
+                display_type=DashboardWidgetDisplayTypes.LINE_CHART,
+                interval="1d",
+                order=2,
+            )
+
+            self.discover_transaction_widget_query = DashboardWidgetQuery.objects.create(
+                widget_id=self.discover_transaction_widget.id,
+                name="test discover transaction widget query",
+                fields=["count()", "transaction.duration"],
+                aggregates=["count()"],
+                columns=[],
+                conditions="environment:foo",
+                orderby=["-count()"],
+                order=2,
+            )
+
+            self.discover_ambiguous_widget = DashboardWidget.objects.create(
+                dashboard_id=self.dashboard.id,
+                title="test discover ambiguous widget",
+                widget_type=DashboardWidgetTypes.DISCOVER,
+                dataset_source=DatasetSourcesTypes.UNKNOWN.value,
+                display_type=DashboardWidgetDisplayTypes.LINE_CHART,
+                interval="1d",
+                order=3,
+            )
+
+            self.discover_ambiguous_widget_query = DashboardWidgetQuery.objects.create(
+                widget_id=self.discover_ambiguous_widget.id,
+                name="test discover ambiguous widget query",
+                fields=["count()", "transaction"],
+                aggregates=["count()"],
+                columns=[],
+                conditions="environment:test_environment",
+                orderby=["-count()"],
+                order=3,
+            )
+
+            # Now store test data that should only affect the ambiguous widget
+            self.nine_mins_ago = before_now(minutes=9)
+            self.ten_mins_ago = before_now(minutes=10)
+
+            data = load_data("transaction", timestamp=self.ten_mins_ago)
+            data["transaction"] = "/to_other/"
+            data["environment"] = self.environment.name
+            data["transaction.duration"] = 1000
+            self.store_event(data, project_id=self.project.id, assert_no_errors=False)
+
+            data = load_data("transaction", timestamp=self.ten_mins_ago)
+            data["transaction"] = "/to_other/2"
+            data["environment"] = self.environment.name
+            data["transaction.duration"] = 2000
+            self.store_event(data, project_id=self.project.id, assert_no_errors=False)
+
+    def test(self):
+        self.discover_error_widget.refresh_from_db()
+        self.migrated_discover_widget.refresh_from_db()
+        self.discover_transaction_widget.refresh_from_db()
+        self.discover_ambiguous_widget.refresh_from_db()
+
+        assert self.discover_error_widget.discover_widget_split == DashboardWidgetTypes.ERROR_EVENTS
+        assert (
+            self.migrated_discover_widget.discover_widget_split
+            == DashboardWidgetTypes.TRANSACTION_LIKE
+        )
+        assert (
+            self.discover_transaction_widget.discover_widget_split
+            == DashboardWidgetTypes.TRANSACTION_LIKE
+        )
+        assert (
+            self.discover_ambiguous_widget.discover_widget_split
+            == DashboardWidgetTypes.TRANSACTION_LIKE
+        )