Skip to content

chore(dashboards): Discover split for self hosted dashboard discover widgets #92135

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ nodestore: 0001_squashed_0002_nodestore_no_dictfield

replays: 0001_squashed_0005_drop_replay_index

sentry: 0912_make_organizationmemberteam_replica_is_active_true
sentry: 0913_split_discover_dataset_dashboards_self_hosted

social_auth: 0001_squashed_0002_default_auto_field

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# Generated by Django 5.2.1 on 2025-05-20 17:45

from enum import Enum

from django.db import migrations
from django.db.backends.base.schema import BaseDatabaseSchemaEditor
from django.db.migrations.state import StateApps
from django.db.models import Q

from sentry.discover.dashboard_widget_split import _get_and_save_split_decision_for_dashboard_widget
from sentry.models.dashboard_widget import TypesClass
from sentry.new_migrations.migrations import CheckedMigration
from sentry.utils.query import RangeQuerySetWrapperWithProgressBar


class DashboardWidgetTypes(TypesClass):
DISCOVER = 0
"""
Old way of accessing error events and transaction events simultaneously @deprecated. Use ERROR_EVENTS or TRANSACTION_LIKE instead.
"""
ISSUE = 1
RELEASE_HEALTH = 2
METRICS = 3
ERROR_EVENTS = 100
"""
Error side of the split from Discover.
"""
TRANSACTION_LIKE = 101
"""
This targets transaction-like data from the split from discover. Itt may either use 'Transactions' events or 'PerformanceMetrics' depending on on-demand, MEP metrics, etc.
"""
SPANS = 102
"""
These represent the logs trace item type on the EAP dataset.
"""
LOGS = 103

TYPES = [
(DISCOVER, "discover"),
(ISSUE, "issue"),
(
RELEASE_HEALTH,
"metrics",
),
(ERROR_EVENTS, "error-events"),
(TRANSACTION_LIKE, "transaction-like"),
(SPANS, "spans"),
(LOGS, "logs"),
]
TYPE_NAMES = [t[1] for t in TYPES]


class DatasetSourcesTypes(Enum):
"""
Ambiguous queries that haven't been or couldn't be categorized into a
specific dataset.
"""

UNKNOWN = 0
"""
Dataset inferred by either running the query or using heuristics.
"""
INFERRED = 1
"""
Canonical dataset, user explicitly selected it.
"""
USER = 2
"""
Was an ambiguous dataset forced to split (i.e. we picked a default)
"""
FORCED = 3
"""
Dataset inferred by split script, version 1
"""
SPLIT_VERSION_1 = 4
"""
Dataset inferred by split script, version 2
"""
SPLIT_VERSION_2 = 5

@classmethod
def as_choices(cls):
return tuple((source.value, source.name.lower()) for source in cls)

@classmethod
def as_text_choices(cls):
return tuple((source.name.lower(), source.value) for source in cls)


class DashboardWidgetDisplayTypes(TypesClass):
LINE_CHART = 0
AREA_CHART = 1
STACKED_AREA_CHART = 2
BAR_CHART = 3
TABLE = 4
BIG_NUMBER = 6
TOP_N = 7
TYPES = [
(LINE_CHART, "line"),
(AREA_CHART, "area"),
(STACKED_AREA_CHART, "stacked_area"),
(BAR_CHART, "bar"),
(TABLE, "table"),
(BIG_NUMBER, "big_number"),
(TOP_N, "top_n"),
]
TYPE_NAMES = [t[1] for t in TYPES]


def split_discover_dataset_dashboards_self_hosted(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
catch_all_unsplit_widgets = Q(
widget__widget_type=DashboardWidgetTypes.DISCOVER,
) & ~Q(
widget__discover_widget_split__in=[
DashboardWidgetTypes.ERROR_EVENTS,
DashboardWidgetTypes.TRANSACTION_LIKE,
]
)

queryset = DashboardWidgetQuery.objects.filter(
catch_all_unsplit_widgets,
).select_related("widget__dashboard__organization")

for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
try:
_get_and_save_split_decision_for_dashboard_widget(widget_query, dry_run=False)
except Exception:
widget_query.widget.discover_widget_split = DashboardWidgetTypes.ERROR_EVENTS
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
widget_query.widget.save()


def reverse_split_discover_dataset_dashboards_self_hosted(
apps: StateApps, schema_editor: BaseDatabaseSchemaEditor
) -> None:
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")
all_split_widgets = Q(
widget__discover_widget_split__in=[
DashboardWidgetTypes.ERROR_EVENTS,
DashboardWidgetTypes.TRANSACTION_LIKE,
]
)

queryset = DashboardWidgetQuery.objects.filter(all_split_widgets)

for widget_query in RangeQuerySetWrapperWithProgressBar(queryset):
widget_query.widget.discover_widget_split = None
widget_query.widget.dataset_source = DatasetSourcesTypes.UNKNOWN.value
widget_query.widget.save()


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production.
# This should only be used for operations where it's safe to run the migration after your
# code has deployed. So this should not be used for most operations that alter the schema
# of a table.
# Here are some things that make sense to mark as post deployment:
# - Large data migrations. Typically we want these to be run manually so that they can be
# monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# run this outside deployments so that we don't block them. Note that while adding an index
# is a schema change, it's completely safe to run the operation after the code has deployed.
# Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment

is_post_deployment = True

dependencies = [
("sentry", "0912_make_organizationmemberteam_replica_is_active_true"),
]

operations = [
migrations.RunPython(
split_discover_dataset_dashboards_self_hosted,
reverse_code=reverse_split_discover_dataset_dashboards_self_hosted,
hints={"tables": ["sentry_dashboardwidget"]},
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from sentry.hybridcloud.models.outbox import outbox_context
from sentry.models.dashboard_widget import (
DashboardWidgetDisplayTypes,
DashboardWidgetTypes,
DatasetSourcesTypes,
)
from sentry.models.organization import Organization
from sentry.testutils.cases import SnubaTestCase, TestMigrations
from sentry.testutils.helpers.datetime import before_now
from sentry.utils.samples import load_data


class SplitDiscoverDatasetDashboardsSelfHostedTest(TestMigrations, SnubaTestCase):
migrate_from = "0912_make_organizationmemberteam_replica_is_active_true"
migrate_to = "0913_split_discover_dataset_dashboards_self_hosted"

def setup_before_migration(self, apps):
User = apps.get_model("sentry", "User")
Dashboard = apps.get_model("sentry", "Dashboard")
DashboardWidget = apps.get_model("sentry", "DashboardWidget")
DashboardWidgetQuery = apps.get_model("sentry", "DashboardWidgetQuery")

with outbox_context(flush=False):
self.organization = Organization.objects.create(name="test", slug="test")
self.user = User.objects.create(email="test@test.com", is_superuser=False)
self.project = self.create_project(
name="test_project", slug="test_project", organization=self.organization
)
self.environment = self.create_environment(
name="test_environment", project=self.project, organization=self.organization
)

self.dashboard = Dashboard.objects.create(
title="test",
organization_id=self.organization.id,
created_by_id=self.user.id,
)

self.discover_error_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=0,
)

self.discover_error_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_error_widget.id,
name="test discover widget query",
fields=["count()"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=0,
)

self.migrated_discover_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test migrated discover widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
discover_widget_split=DashboardWidgetTypes.TRANSACTION_LIKE,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=1,
)

self.migrated_discover_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.migrated_discover_widget.id,
name="test migrated discover widget query",
fields=["count()"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=1,
)

self.discover_transaction_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover transaction widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=2,
)

self.discover_transaction_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_transaction_widget.id,
name="test discover transaction widget query",
fields=["count()", "transaction.duration"],
aggregates=["count()"],
columns=[],
conditions="environment:foo",
orderby=["-count()"],
order=2,
)

self.discover_ambiguous_widget = DashboardWidget.objects.create(
dashboard_id=self.dashboard.id,
title="test discover ambiguous widget",
widget_type=DashboardWidgetTypes.DISCOVER,
dataset_source=DatasetSourcesTypes.UNKNOWN.value,
display_type=DashboardWidgetDisplayTypes.LINE_CHART,
interval="1d",
order=3,
)

self.discover_ambiguous_widget_query = DashboardWidgetQuery.objects.create(
widget_id=self.discover_ambiguous_widget.id,
name="test discover ambiguous widget query",
fields=["count()", "transaction"],
aggregates=["count()"],
columns=[],
conditions="environment:test_environment",
orderby=["-count()"],
order=3,
)

# Now store test data that should only affect the ambiguous widget
self.nine_mins_ago = before_now(minutes=9)
self.ten_mins_ago = before_now(minutes=10)

data = load_data("transaction", timestamp=self.ten_mins_ago)
data["transaction"] = "/to_other/"
data["environment"] = self.environment.name
data["transaction.duration"] = 1000
self.store_event(data, project_id=self.project.id, assert_no_errors=False)

data = load_data("transaction", timestamp=self.ten_mins_ago)
data["transaction"] = "/to_other/2"
data["environment"] = self.environment.name
data["transaction.duration"] = 2000
self.store_event(data, project_id=self.project.id, assert_no_errors=False)

def test(self):
self.discover_error_widget.refresh_from_db()
self.migrated_discover_widget.refresh_from_db()
self.discover_transaction_widget.refresh_from_db()
self.discover_ambiguous_widget.refresh_from_db()

assert self.discover_error_widget.discover_widget_split == DashboardWidgetTypes.ERROR_EVENTS
assert (
self.migrated_discover_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
assert (
self.discover_transaction_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
assert (
self.discover_ambiguous_widget.discover_widget_split
== DashboardWidgetTypes.TRANSACTION_LIKE
)
Loading