Skip to content

chore(dashboards): Discover split for self hosted dashboard discover widgets #92135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 26, 2025
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ nodestore: 0001_squashed_0002_nodestore_no_dictfield

replays: 0001_squashed_0005_drop_replay_index

sentry: 0912_make_organizationmemberteam_replica_is_active_true
sentry: 0913_split_discover_dataset_dashboards_self_hosted

social_auth: 0001_squashed_0002_default_auto_field

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Generated by Django 5.2.1 on 2025-05-20 17:45

from enum import Enum

from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Q

from sentry.discover.dashboard_widget_split import _get_and_save_split_decision_for_dashboard_widget
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We generally don't import from application code into migrations, as import errors can creep in. I remember this came up in a previous discover dataset split discussion, and there was enough logic in this that cloning it into the migrations wasn't simple. I'm assuming that is still the case, and that we're going to be careful with this function for as long as this migration lives (which could be years).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup same case, we'll be keeping an eye on this function

from sentry.models.dashboard_widget import TypesClass
from sentry.new_migrations.migrations import CheckedMigration
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar


class DashboardWidgetTypes(TypesClass):
DISCOVER = 0
"""
Old way of accessing error events and transaction events simultaneously @deprecated. Use ERROR_EVENTS or TRANSACTION_LIKE instead.
"""
ISSUE = 1
RELEASE_HEALTH = 2
METRICS = 3
ERROR_EVENTS = 100
"""
Error side of the split from Discover.
"""
TRANSACTION_LIKE = 101
"""
This targets transaction-like data from the split from discover. Itt may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
This targets transaction-like data from the split from discover. Itt may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.
This targets transaction-like data from the split from discover. It may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.

"""
SPANS = 102
"""
These represent the logs trace item type on the EAP dataset.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this supposed to be for logs or spans?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This description is supposed to be for logs. I'll move it to after the logs definition for consistency 👍

"""
LOGS = 103

TYPES = [
(DISCOVER, "discover"),
(ISSUE, "issue"),
(
RELEASE_HEALTH,
"metrics",
),
(ERROR_EVENTS, "error-events"),
(TRANSACTION_LIKE, "transaction-like"),
(SPANS, "spans"),
(LOGS, "logs"),
]
TYPE_NAMES = [t[1] for t in TYPES]


class DatasetSourcesTypes(Enum):
"""
Ambiguous queries that haven't been or couldn't be categorized into a
specific dataset.
"""

UNKNOWN = 0
"""
Dataset inferred by either running the query or using heuristics.
"""
INFERRED = 1
"""
Canonical dataset, user explicitly selected it.
"""
USER = 2
"""
Was an ambiguous dataset forced to split (i.e. we picked a default)
"""
FORCED = 3
"""
Dataset inferred by split script, version 1
"""
SPLIT_VERSION_1 = 4
"""
Dataset inferred by split script, version 2
"""
SPLIT_VERSION_2 = 5

@classmethod
def as_choices(cls) -> tuple[tuple[int, str], ...]:
return tuple((source.value, source.name.lower()) for source in cls)

@classmethod
def as_text_choices(cls) -> tuple[tuple[str, int], ...]:
return tuple((source.name.lower(), source.value) for source in cls)


class DashboardWidgetDisplayTypes(TypesClass):
LINE_CHART = 0
AREA_CHART = 1
STACKED_AREA_CHART = 2
BAR_CHART = 3
TABLE = 4
BIG_NUMBER = 6
TOP_N = 7
TYPES = [
(LINE_CHART, "line"),
(AREA_CHART, "area"),
(STACKED_AREA_CHART, "stacked_area"),
(BAR_CHART, "bar"),
(TABLE, "table"),
(BIG_NUMBER, "big_number"),
(TOP_N, "top_n"),
]
TYPE_NAMES = [t[1] for t in TYPES]


def split_discover_dataset_dashboards_self_hosted(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
catch_all_unsplit_widgets = Q(
widget__widget_type=DashboardWidgetTypes.DISCOVER,
) & ~Q(
widget__discover_widget_split__in=[
DashboardWidgetTypes.ERROR_EVENTS,
DashboardWidgetTypes.TRANSACTION_LIKE,
]
)

queryset = DashboardWidgetQuery.objects.filter(
catch_all_unsplit_widgets,
).select_related("widget__dashboard__organization")

for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
try:
_get_and_save_split_decision_for_dashboard_widget(widget_query, dry_run=False)
except Exception:
widget_query.widget.discover_widget_split = DashboardWidgetTypes.ERROR_EVENTS
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
widget_query.widget.save()


def reverse_split_discover_dataset_dashboards_self_hosted(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
all_split_widgets = Q(
widget__discover_widget_split__in=[
DashboardWidgetTypes.ERROR_EVENTS,
DashboardWidgetTypes.TRANSACTION_LIKE,
]
)

queryset = DashboardWidgetQuery.objects.filter(all_split_widgets)

for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
widget_query.widget.discover_widget_split = None
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
widget_query.widget.save()


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "0912_make_organizationmemberteam_replica_is_active_true"),
]

operations = [
migrations.RunPython(
split_discover_dataset_dashboards_self_hosted,
reverse_code=reverse_split_discover_dataset_dashboards_self_hosted,
hints={"tables": ["sentry_dashboardwidget"]},
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from sentry.hybridcloud.models.outbox import outbox_context
from sentry.models.dashboard_widget import (
DashboardWidgetDisplayTypes,
DashboardWidgetTypes,
DatasetSourcesTypes,
)
from sentry.models.organization import Organization
from sentry.testutils.cases import SnubaTestCase, TestMigrations
from sentry.testutils.helpers.datetime import before_now
from sentry.utils.samples import load_data


class SplitDiscoverDatasetDashboardsSelfHostedTest(TestMigrations, SnubaTestCase):
migrate_from = "0912_make_organizationmemberteam_replica_is_active_true"
migrate_to = "0913_split_discover_dataset_dashboards_self_hosted"

def setup_before_migration(self, apps):
User = apps.get_model("sentry", "User")
Dashboard = apps.get_model("sentry", "Dashboard")
DashboardWidget = apps.get_model("sentry", "DashboardWidget")
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")

with outbox_context(flush=False):
self.organization = Organization.objects.create(name="test", slug="test")
self.user = User.objects.create(email="test@test.com", is_superuser=False)
self.project = self.create_project(
name="test_project", slug="test_project", organization=self.organization
)
self.environment = self.create_environment(
name="test_environment", project=self.project, organization=self.organization
)

self.dashboard = Dashboard.objects.create(
title="test",
organization_id=self.organization.id,
created_by_id=self.user.id,
)

self.discover_error_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=0,
)

self.discover_error_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_error_widget.id,
name="test discover widget query",
fields=["count()"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=0,
)

self.migrated_discover_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test migrated discover widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
discover_widget_split=DashboardWidgetTypes.TRANSACTION_LIKE,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=1,
)

self.migrated_discover_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.migrated_discover_widget.id,
name="test migrated discover widget query",
fields=["count()"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=1,
)

self.discover_transaction_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover transaction widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=2,
)

self.discover_transaction_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_transaction_widget.id,
name="test discover transaction widget query",
fields=["count()", "transaction.duration"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=2,
)

self.discover_ambiguous_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover ambiguous widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=3,
)

self.discover_ambiguous_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_ambiguous_widget.id,
name="test discover ambiguous widget query",
fields=["count()", "transaction"],
aggregates=["count()"],
columns=[],
conditions="environment:test_environment",
orderby=["-count()"],
order=3,
)

# Now store test data that should only affect the ambiguous widget
self.nine_mins_ago = before_now(minutes=9)
self.ten_mins_ago = before_now(minutes=10)

data = load_data("transaction", timestamp=self.ten_mins_ago)
data["transaction"] = "/to_other/"
data["environment"] = self.environment.name
data["transaction.duration"] = 1000
self.store_event(data, project_id=self.project.id, assert_no_errors=False)

data = load_data("transaction", timestamp=self.ten_mins_ago)
data["transaction"] = "/to_other/2"
data["environment"] = self.environment.name
data["transaction.duration"] = 2000
self.store_event(data, project_id=self.project.id, assert_no_errors=False)

def test(self):
self.discover_error_widget.refresh_from_db()
self.migrated_discover_widget.refresh_from_db()
self.discover_transaction_widget.refresh_from_db()
self.discover_ambiguous_widget.refresh_from_db()

assert self.discover_error_widget.discover_widget_split == DashboardWidgetTypes.ERROR_EVENTS
assert (
self.migrated_discover_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
assert (
self.discover_transaction_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
assert (
self.discover_ambiguous_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
Loading