feat(timeseries): Query timeseries like discover does (#91978)

wmak · web-flow · commit afa5060a9096 · 2025-05-22T10:43:13.000-04:00
- This updates how we update timeseries to be closer to how they were
  done in discover
- TODO: Verify this works on events-timeseries too
diff --git a/src/sentry/search/eap/types.py b/src/sentry/search/eap/types.py
@@ -24,6 +24,8 @@ class SearchResolverConfig:
     process_results: bool = True
     # If a field is private, it will only be available if it is in the `fields_acl`
     fields_acl: FieldsACL = field(default_factory=lambda: FieldsACL())
+    # Whether to set the timestamp granularities to stable buckets
+    stable_timestamp_quantization: bool = True
 
 
 CONFIDENCES: dict[Reliability.ValueType, Literal["low", "high"]] = {
diff --git a/src/sentry/snuba/entity_subscription.py b/src/sentry/snuba/entity_subscription.py
@@ -273,7 +273,9 @@ def build_rpc_request(
             end=now,
             granularity_secs=self.time_window,
         )
-        search_resolver = spans_rpc.get_resolver(snuba_params, SearchResolverConfig())
+        search_resolver = spans_rpc.get_resolver(
+            snuba_params, SearchResolverConfig(stable_timestamp_quantization=False)
+        )
 
         rpc_request, _, _ = rpc_dataset_common.get_timeseries_query(
             search_resolver=search_resolver,
diff --git a/src/sentry/snuba/rpc_dataset_common.py b/src/sentry/snuba/rpc_dataset_common.py
@@ -1,5 +1,7 @@
 import logging
+import math
 from dataclasses import dataclass, field
+from datetime import datetime
 
 import sentry_sdk
 from google.protobuf.json_format import MessageToJson
@@ -14,8 +16,12 @@
     TraceItemTableResponse,
 )
 from sentry_protos.snuba.v1.request_common_pb2 import PageToken
-from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
-from sentry_protos.snuba.v1.trace_item_filter_pb2 import AndFilter, TraceItemFilter
+from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
+from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
+    AndFilter,
+    ComparisonFilter,
+    TraceItemFilter,
+)
 
 from sentry.exceptions import InvalidSearchQuery
 from sentry.search.eap.columns import (
@@ -24,7 +30,7 @@
     ResolvedConditionalAggregate,
     ResolvedFormula,
 )
-from sentry.search.eap.constants import MAX_ROLLUP_POINTS, VALID_GRANULARITIES
+from sentry.search.eap.constants import DOUBLE, MAX_ROLLUP_POINTS, VALID_GRANULARITIES
 from sentry.search.eap.resolver import SearchResolver
 from sentry.search.eap.types import CONFIDENCES, ConfidenceData, EAPResponse
 from sentry.search.eap.utils import handle_downsample_meta, transform_binary_formula_to_expression
@@ -102,6 +108,64 @@ def categorize_aggregate(
         )
 
 
+def update_timestamps(
+    params: SnubaParams, resolver: SearchResolver
+) -> tuple[TraceItemFilter | None, SnubaParams]:
+    """We need to update snuba params to query a wider period than requested so that we get aligned granularities while
+    still querying the requested period
+
+    This is because quote:
+    "the platform will not be changing its behavior to accommodate this request. The endpoint's capabilities are
+    currently flexible enough to allow the client to build either thing. Whether it's rounding time buckets or not, that
+    behavior is up to you. Creating two separate almost identical endpoints to allow for both behaviors is also not
+    going to happen."
+    """
+    if not resolver.config.stable_timestamp_quantization:
+        return None, params
+    elif (
+        params.start is not None and params.end is not None and params.granularity_secs is not None
+    ):
+        # Doing this via timestamps as its the most direct and matches how its stored under the hood
+        start = int(params.start.replace(tzinfo=None).timestamp())
+        end = int(params.end.replace(tzinfo=None).timestamp())
+        timeseries_definition, _ = resolver.resolve_attribute("timestamp")
+        # Need timestamp as a double even though that's not how resolver does it so we can pass the timestamp in directly
+        timeseries_column = AttributeKey(name=timeseries_definition.internal_name, type=DOUBLE)
+
+        # Create a And statement with the date range that the user selected
+        ts_filter = TraceItemFilter(
+            and_filter=AndFilter(
+                filters=[
+                    TraceItemFilter(
+                        comparison_filter=ComparisonFilter(
+                            key=timeseries_column,
+                            op=ComparisonFilter.OP_GREATER_THAN_OR_EQUALS,
+                            value=AttributeValue(val_int=start),
+                        )
+                    ),
+                    TraceItemFilter(
+                        comparison_filter=ComparisonFilter(
+                            key=timeseries_column,
+                            op=ComparisonFilter.OP_LESS_THAN,
+                            value=AttributeValue(val_int=end),
+                        )
+                    ),
+                ]
+            )
+        )
+
+        # Round the start & end so that we get buckets that match the granularity
+        params.start = datetime.fromtimestamp(
+            math.floor(params.start.timestamp() / params.granularity_secs) * params.granularity_secs
+        )
+        params.end = datetime.fromtimestamp(
+            math.ceil(params.end.timestamp() / params.granularity_secs) * params.granularity_secs
+        )
+        return ts_filter, params
+    else:
+        raise InvalidSearchQuery("start, end and interval are required")
+
+
 def get_timeseries_query(
     search_resolver: SearchResolver,
     params: SnubaParams,
@@ -116,6 +180,7 @@ def get_timeseries_query(
     list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate],
     list[ResolvedAttribute],
 ]:
+    timeseries_filter, params = update_timestamps(params, search_resolver)
     meta = search_resolver.resolve_meta(referrer=referrer, sampling_mode=sampling_mode)
     query, _, query_contexts = search_resolver.resolve_query(query_string)
     (functions, _) = search_resolver.resolve_functions(y_axes)
@@ -136,6 +201,12 @@ def get_timeseries_query(
         else:
             query = extra_conditions
 
+    if timeseries_filter is not None:
+        if query is not None:
+            query = TraceItemFilter(and_filter=AndFilter(filters=[query, timeseries_filter]))
+        else:
+            query = timeseries_filter
+
     return (
         TimeSeriesRequest(
             meta=meta,
diff --git a/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py b/tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py
@@ -1140,7 +1140,7 @@ def test_throughput_epm_hour_rollup(self):
         assert len(data) == 6
         assert response.data["meta"]["dataset"] == self.dataset
 
-        rows = data[0:6]
+        rows = data[:6]
         for test in zip(event_counts, rows):
             self.assertAlmostEqual(test[1][1][0]["count"], test[0] / (3600.0 / 60.0))
 
@@ -1206,7 +1206,7 @@ def test_throughput_epm_hour_rollup_offset_of_hour(self):
         assert response.status_code == 200, response.content
         data = response.data["data"]
         meta = response.data["meta"]
-        assert len(data) == 6
+        assert len(data) == 7
         assert meta["dataset"] == self.dataset
 
         rows = data[0:6]
@@ -2133,3 +2133,62 @@ def test_top_n_is_transaction(self):
         )
         assert response.status_code == 200, response.content
         assert set(response.data.keys()) == {"True", "False"}
+
+    def test_datetime_unaligned_with_regular_buckets(self):
+        """When querying from 10:12-22:12 with 1 hour intervals
+        the returned buckets should be every hour; ie 10am, 11am, 12pm
+        but the data should still be constrained from 22:12-22:12"""
+        spans = []
+        # Create a span at 10:05, this should not be in the result
+        spans.append(
+            self.create_span(
+                {
+                    "description": "foo",
+                    "sentry_tags": {"status": "success"},
+                },
+                start_ts=self.day_ago + timedelta(minutes=5),
+            )
+        )
+        # Create a span at 10:30, this should be in the result
+        spans.append(
+            self.create_span(
+                {
+                    "description": "foo",
+                    "sentry_tags": {"status": "success"},
+                },
+                start_ts=self.day_ago + timedelta(minutes=30),
+            )
+        )
+        # Create a span at 22:05, this should be in the result
+        spans.append(
+            self.create_span(
+                {
+                    "description": "foo",
+                    "sentry_tags": {"status": "success"},
+                },
+                start_ts=self.day_ago + timedelta(hours=12, minutes=5),
+            )
+        )
+        self.store_spans(spans, is_eap=self.is_eap)
+
+        # This should be set to 10:00 the previous day
+        query_start = self.day_ago + timedelta(minutes=12)
+        query_end = self.day_ago + timedelta(hours=12, minutes=12)
+        response = self._do_request(
+            data={
+                "start": query_start,
+                "end": query_end,
+                "interval": "1h",
+                "yAxis": "count()",
+                "project": self.project.id,
+                "dataset": self.dataset,
+            },
+        )
+        assert response.status_code == 200, response.content
+        data = response.data["data"]
+        assert len(data) == 13
+        assert response.data["meta"]["dataset"] == self.dataset
+        # The timestamp of the first event should be 10:00, and there should only be 1 event
+        assert data[0] == (self.day_ago.timestamp(), [{"count": 1}])
+        # The timestamp of the last event should be 22:00 and there should also only be 1 event
+        assert data[-1] == ((self.day_ago + timedelta(hours=12)).timestamp(), [{"count": 1}])
diff --git a/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py b/tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py
@@ -192,17 +192,19 @@ def test_handle_nans_from_snuba_top_n(self):
         interval = 24 * 60 * 60 * 1000
         assert response.status_code == 200, response.content
 
+        rounded_start = seven_days_ago.replace(hour=0, minute=0)
+        rounded_end = rounded_start + timedelta(days=7)
         assert response.data["meta"] == {
             "dataset": "spans",
-            "start": seven_days_ago.timestamp() * 1000,
-            "end": self.end.timestamp() * 1000,
+            "start": rounded_start.timestamp() * 1000,
+            "end": rounded_end.timestamp() * 1000,
         }
         assert len(response.data["timeseries"]) == 4
         timeseries = response.data["timeseries"][0]
         assert len(timeseries["values"]) == 7
         assert timeseries["yAxis"] == "p50(measurements.lcp)"
         assert timeseries["values"] == build_expected_timeseries(
-            seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
+            rounded_start, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
         )
         assert timeseries["groupBy"] == [{"key": "span.description", "value": "bar"}]
         assert timeseries["meta"] == {
@@ -217,7 +219,7 @@ def test_handle_nans_from_snuba_top_n(self):
         assert len(timeseries["values"]) == 7
         assert timeseries["yAxis"] == "avg(measurements.lcp)"
         assert timeseries["values"] == build_expected_timeseries(
-            seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
+            rounded_start, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
         )
         assert timeseries["groupBy"] == [{"key": "span.description", "value": "bar"}]
         assert timeseries["meta"] == {
@@ -232,7 +234,7 @@ def test_handle_nans_from_snuba_top_n(self):
         assert len(timeseries["values"]) == 7
         assert timeseries["yAxis"] == "p50(measurements.lcp)"
         assert timeseries["values"] == build_expected_timeseries(
-            seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
+            rounded_start, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
         )
         assert timeseries["groupBy"] is None
         assert timeseries["meta"] == {
@@ -247,7 +249,7 @@ def test_handle_nans_from_snuba_top_n(self):
         assert len(timeseries["values"]) == 7
         assert timeseries["yAxis"] == "avg(measurements.lcp)"
         assert timeseries["values"] == build_expected_timeseries(
-            seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
+            rounded_start, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
         )
         assert timeseries["groupBy"] is None
         assert timeseries["meta"] == {