feat(ourlogs): Allow graphing ourlogs with the timeseries APIs (#89306)

colin-sentry · andrewshie-sentry · commit bcb51c6704c8 · 2025-04-22T14:17:28.000-07:00
We want to graph logs in the logs page (amongst other places) - add a
count() aggregate and a shim to make it work with events-stats and
events-timeseries endpoints.
diff --git a/src/sentry/api/endpoints/organization_events_stats.py b/src/sentry/api/endpoints/organization_events_stats.py
@@ -280,10 +280,12 @@ def get(self, request: Request, organization: Organization) -> Response:
             return Response({"detail": f"Metric type must be one of: {metric_types}"}, status=400)
 
         force_metrics_layer = request.GET.get("forceMetricsLayer") == "true"
-        use_rpc = request.GET.get("useRpc", "0") == "1" and dataset == spans_eap
+        use_rpc = (
+            request.GET.get("useRpc", "0") == "1" and dataset == spans_eap
+        ) or dataset == ourlogs
         sampling_mode = request.GET.get("sampling")
         transform_alias_to_input_format = (
-            request.GET.get("transformAliasToInputFormat") == "1" or use_rpc or dataset == ourlogs
+            request.GET.get("transformAliasToInputFormat") == "1" or use_rpc
         )
         sentry_sdk.set_tag("performance.use_rpc", use_rpc)
 
@@ -298,6 +300,8 @@ def _get_event_stats(
         ) -> SnubaTSResult | dict[str, SnubaTSResult]:
             if top_events > 0:
                 if use_rpc:
+                    if scoped_dataset == ourlogs:
+                        raise NotImplementedError("You can not use top_events with logs for now.")
                     return spans_rpc.run_top_events_timeseries_query(
                         params=snuba_params,
                         query_string=query,
@@ -338,7 +342,9 @@ def _get_event_stats(
                 )
 
             if use_rpc:
-                return spans_rpc.run_timeseries_query(
+                if scoped_dataset == spans_eap:
+                    scoped_dataset = spans_rpc
+                return scoped_dataset.run_timeseries_query(
                     params=snuba_params,
                     query_string=query,
                     y_axes=query_columns,
diff --git a/src/sentry/api/endpoints/organization_events_timeseries.py b/src/sentry/api/endpoints/organization_events_timeseries.py
@@ -272,8 +272,10 @@ def get_event_stats(
                 ),
             )
 
-        if dataset == spans_eap:
-            return spans_rpc.run_timeseries_query(
+        if dataset == spans_eap or dataset == ourlogs:
+            if dataset == spans_eap:
+                dataset = spans_rpc
+            return dataset.run_timeseries_query(
                 params=snuba_params,
                 query_string=query,
                 y_axes=query_columns,
diff --git a/src/sentry/search/eap/ourlogs/definitions.py b/src/sentry/search/eap/ourlogs/definitions.py
@@ -5,9 +5,10 @@
     OURLOG_ATTRIBUTE_DEFINITIONS,
     OURLOG_VIRTUAL_CONTEXTS,
 )
+from sentry.search.eap.spans.aggregates import LOG_AGGREGATE_DEFINITIONS
 
 OURLOG_DEFINITIONS = ColumnDefinitions(
-    aggregates={},
+    aggregates=LOG_AGGREGATE_DEFINITIONS,
     conditional_aggregates={},
     formulas={},
     columns=OURLOG_ATTRIBUTE_DEFINITIONS,
diff --git a/src/sentry/search/eap/spans/aggregates.py b/src/sentry/search/eap/spans/aggregates.py
@@ -476,3 +476,20 @@ def resolve_bounded_sample(args: ResolvedArguments) -> tuple[AttributeKey, Trace
         attribute_resolver=transform_vital_score_to_ratio,
     ),
 }
+
+LOG_AGGREGATE_DEFINITIONS = {
+    "count": AggregateDefinition(
+        internal_function=Function.FUNCTION_COUNT,
+        infer_search_type_from_arguments=False,
+        processor=count_processor,
+        default_search_type="integer",
+        arguments=[
+            AttributeArgumentDefinition(
+                attribute_types={
+                    "string",
+                },
+                default_arg="log.body",
+            )
+        ],
+    ),
+}
diff --git a/src/sentry/snuba/ourlogs.py b/src/sentry/snuba/ourlogs.py
@@ -1,15 +1,22 @@
 import logging
+from datetime import timedelta
 
+from sentry_sdk import trace
 from snuba_sdk import Column, Condition
 
 from sentry.search.eap.ourlogs.definitions import OURLOG_DEFINITIONS
 from sentry.search.eap.resolver import SearchResolver
 from sentry.search.eap.types import SearchResolverConfig
-from sentry.search.events.types import EventsResponse, SnubaParams
+from sentry.search.eap.utils import handle_downsample_meta
+from sentry.search.events.types import EventsMeta, EventsResponse, SnubaParams
+from sentry.snuba import rpc_dataset_common
 from sentry.snuba.dataset import Dataset
+from sentry.snuba.discover import zerofill
 from sentry.snuba.metrics.extraction import MetricSpecType
 from sentry.snuba.query_sources import QuerySource
 from sentry.snuba.rpc_dataset_common import run_table_query
+from sentry.utils import snuba_rpc
+from sentry.utils.snuba import SnubaTSResult
 
 logger = logging.getLogger("sentry.snuba.ourlogs")
 
@@ -77,3 +84,56 @@ def query(
         ),
         debug=debug,
     )
+
+
+@trace
+def run_timeseries_query(
+    params: SnubaParams,
+    query_string: str,
+    y_axes: list[str],
+    referrer: str,
+    config: SearchResolverConfig,
+    sampling_mode: str | None,
+    comparison_delta: timedelta | None = None,
+) -> SnubaTSResult:
+    rpc_dataset_common.validate_granularity(params)
+    search_resolver = get_resolver(params, config)
+    rpc_request, aggregates, groupbys = rpc_dataset_common.get_timeseries_query(
+        search_resolver, params, query_string, y_axes, [], referrer, sampling_mode=None
+    )
+
+    """Run the query"""
+    rpc_response = snuba_rpc.timeseries_rpc([rpc_request])[0]
+
+    """Process the results"""
+    result = rpc_dataset_common.ProcessedTimeseries()
+    final_meta: EventsMeta = EventsMeta(
+        fields={}, full_scan=handle_downsample_meta(rpc_response.meta.downsampled_storage_meta)
+    )
+    for resolved_field in aggregates + groupbys:
+        final_meta["fields"][resolved_field.public_alias] = resolved_field.search_type
+
+    for timeseries in rpc_response.result_timeseries:
+        processed = rpc_dataset_common.process_timeseries_list([timeseries])
+        if len(result.timeseries) == 0:
+            result = processed
+        else:
+            for attr in ["timeseries", "confidence", "sample_count", "sampling_rate"]:
+                for existing, new in zip(getattr(result, attr), getattr(processed, attr)):
+                    existing.update(new)
+    if len(result.timeseries) == 0:
+        # The rpc only zerofills for us when there are results, if there aren't any we have to do it ourselves
+        result.timeseries = zerofill(
+            [],
+            params.start_date,
+            params.end_date,
+            params.timeseries_granularity_secs,
+            ["time"],
+        )
+
+    return SnubaTSResult(
+        {"data": result.timeseries, "processed_timeseries": result, "meta": final_meta},
+        params.start,
+        params.end,
+        params.granularity_secs,
+    )
diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_logs.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_logs.py
@@ -0,0 +1,112 @@
+from datetime import timedelta
+
+from django.urls import reverse
+
+from sentry.testutils.helpers.datetime import before_now
+from tests.snuba.api.endpoints.test_organization_events import OrganizationEventsEndpointTestBase
+from tests.snuba.api.endpoints.test_organization_events_timeseries_spans import (
+    AnyConfidence,
+    build_expected_timeseries,
+)
+
+any_confidence = AnyConfidence()
+
+
+class OrganizationEventsStatsOurlogsMetricsEndpointTest(OrganizationEventsEndpointTestBase):
+    endpoint = "sentry-api-0-organization-events-timeseries"
+
+    def setUp(self):
+        super().setUp()
+        self.login_as(user=self.user)
+        self.start = self.day_ago = before_now(days=1).replace(
+            hour=10, minute=0, second=0, microsecond=0
+        )
+        self.end = self.start + timedelta(hours=6)
+        self.two_days_ago = self.day_ago - timedelta(days=1)
+
+        self.url = reverse(
+            self.endpoint,
+            kwargs={"organization_id_or_slug": self.project.organization.slug},
+        )
+
+    def _do_request(self, data, url=None, features=None):
+        if features is None:
+            features = {"organizations:ourlogs": True}
+        features.update(self.features)
+        with self.feature(features):
+            return self.client.get(self.url if url is None else url, data=data, format="json")
+
+    def test_count(self):
+        event_counts = [6, 0, 6, 3, 0, 3]
+        logs = []
+        for hour, count in enumerate(event_counts):
+            logs.extend(
+                [
+                    self.create_ourlog(
+                        {"body": "foo"},
+                        timestamp=self.start + timedelta(hours=hour, minutes=minute),
+                        attributes={"status": {"string_value": "success"}},
+                    )
+                    for minute in range(count)
+                ],
+            )
+        self.store_ourlogs(logs)
+
+        response = self._do_request(
+            data={
+                "start": self.start,
+                "end": self.end,
+                "interval": "1h",
+                "yAxis": "count()",
+                "project": self.project.id,
+                "dataset": "ourlogs",
+            },
+        )
+        assert response.status_code == 200, response.content
+        assert response.data["meta"] == {
+            "dataset": "ourlogs",
+            "start": self.start.timestamp() * 1000,
+            "end": self.end.timestamp() * 1000,
+        }
+        assert len(response.data["timeseries"]) == 1
+        timeseries = response.data["timeseries"][0]
+        assert len(timeseries["values"]) == 6
+        assert timeseries["yaxis"] == "count()"
+        assert timeseries["values"] == build_expected_timeseries(
+            self.start,
+            3_600_000,
+            event_counts,
+            sample_count=event_counts,
+            sample_rate=[1 if val else 0 for val in event_counts],
+            confidence=[any_confidence if val else None for val in event_counts],
+        )
+        assert timeseries["meta"] == {
+            "valueType": "integer",
+            "interval": 3_600_000,
+        }
+
+    def test_zerofill(self):
+        response = self._do_request(
+            data={
+                "start": self.start,
+                "end": self.end,
+                "interval": "1h",
+                "yAxis": "count()",
+                "project": self.project.id,
+                "dataset": "ourlogs",
+            },
+        )
+        assert response.status_code == 200, response.content
+        assert response.data["meta"] == {
+            "dataset": "ourlogs",
+            "start": self.start.timestamp() * 1000,
+            "end": self.end.timestamp() * 1000,
+        }
+        assert len(response.data["timeseries"]) == 1
+        timeseries = response.data["timeseries"][0]
+        assert len(timeseries["values"]) == 7
+        assert timeseries["values"] == build_expected_timeseries(
+            self.start,
+            3_600_000,
+            [0] * 7,
+        )
diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py