Skip to content

Commit afa5060

Browse files
authored
feat(timeseries): Query timeseries like discover does (#91978)
- This updates how we update timeseries to be closer to how they were done in discover - TODO: Verify this works on events-timeseries too
1 parent 62bf068 commit afa5060

File tree

5 files changed

+148
-12
lines changed

5 files changed

+148
-12
lines changed

src/sentry/search/eap/types.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ class SearchResolverConfig:
2424
process_results: bool = True
2525
# If a field is private, it will only be available if it is in the `fields_acl`
2626
fields_acl: FieldsACL = field(default_factory=lambda: FieldsACL())
27+
# Whether to set the timestamp granularities to stable buckets
28+
stable_timestamp_quantization: bool = True
2729

2830

2931
CONFIDENCES: dict[Reliability.ValueType, Literal["low", "high"]] = {

src/sentry/snuba/entity_subscription.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,9 @@ def build_rpc_request(
273273
end=now,
274274
granularity_secs=self.time_window,
275275
)
276-
search_resolver = spans_rpc.get_resolver(snuba_params, SearchResolverConfig())
276+
search_resolver = spans_rpc.get_resolver(
277+
snuba_params, SearchResolverConfig(stable_timestamp_quantization=False)
278+
)
277279

278280
rpc_request, _, _ = rpc_dataset_common.get_timeseries_query(
279281
search_resolver=search_resolver,

src/sentry/snuba/rpc_dataset_common.py

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import logging
2+
import math
23
from dataclasses import dataclass, field
4+
from datetime import datetime
35

46
import sentry_sdk
57
from google.protobuf.json_format import MessageToJson
@@ -14,8 +16,12 @@
1416
TraceItemTableResponse,
1517
)
1618
from sentry_protos.snuba.v1.request_common_pb2 import PageToken
17-
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
18-
from sentry_protos.snuba.v1.trace_item_filter_pb2 import AndFilter, TraceItemFilter
19+
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
20+
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
21+
AndFilter,
22+
ComparisonFilter,
23+
TraceItemFilter,
24+
)
1925

2026
from sentry.exceptions import InvalidSearchQuery
2127
from sentry.search.eap.columns import (
@@ -24,7 +30,7 @@
2430
ResolvedConditionalAggregate,
2531
ResolvedFormula,
2632
)
27-
from sentry.search.eap.constants import MAX_ROLLUP_POINTS, VALID_GRANULARITIES
33+
from sentry.search.eap.constants import DOUBLE, MAX_ROLLUP_POINTS, VALID_GRANULARITIES
2834
from sentry.search.eap.resolver import SearchResolver
2935
from sentry.search.eap.types import CONFIDENCES, ConfidenceData, EAPResponse
3036
from sentry.search.eap.utils import handle_downsample_meta, transform_binary_formula_to_expression
@@ -102,6 +108,64 @@ def categorize_aggregate(
102108
)
103109

104110

111+
def update_timestamps(
112+
params: SnubaParams, resolver: SearchResolver
113+
) -> tuple[TraceItemFilter | None, SnubaParams]:
114+
"""We need to update snuba params to query a wider period than requested so that we get aligned granularities while
115+
still querying the requested period
116+
117+
This is because quote:
118+
"the platform will not be changing its behavior to accommodate this request. The endpoint's capabilities are
119+
currently flexible enough to allow the client to build either thing. Whether it's rounding time buckets or not, that
120+
behavior is up to you. Creating two separate almost identical endpoints to allow for both behaviors is also not
121+
going to happen."
122+
"""
123+
if not resolver.config.stable_timestamp_quantization:
124+
return None, params
125+
elif (
126+
params.start is not None and params.end is not None and params.granularity_secs is not None
127+
):
128+
# Doing this via timestamps as its the most direct and matches how its stored under the hood
129+
start = int(params.start.replace(tzinfo=None).timestamp())
130+
end = int(params.end.replace(tzinfo=None).timestamp())
131+
timeseries_definition, _ = resolver.resolve_attribute("timestamp")
132+
# Need timestamp as a double even though that's not how resolver does it so we can pass the timestamp in directly
133+
timeseries_column = AttributeKey(name=timeseries_definition.internal_name, type=DOUBLE)
134+
135+
# Create a And statement with the date range that the user selected
136+
ts_filter = TraceItemFilter(
137+
and_filter=AndFilter(
138+
filters=[
139+
TraceItemFilter(
140+
comparison_filter=ComparisonFilter(
141+
key=timeseries_column,
142+
op=ComparisonFilter.OP_GREATER_THAN_OR_EQUALS,
143+
value=AttributeValue(val_int=start),
144+
)
145+
),
146+
TraceItemFilter(
147+
comparison_filter=ComparisonFilter(
148+
key=timeseries_column,
149+
op=ComparisonFilter.OP_LESS_THAN,
150+
value=AttributeValue(val_int=end),
151+
)
152+
),
153+
]
154+
)
155+
)
156+
157+
# Round the start & end so that we get buckets that match the granularity
158+
params.start = datetime.fromtimestamp(
159+
math.floor(params.start.timestamp() / params.granularity_secs) * params.granularity_secs
160+
)
161+
params.end = datetime.fromtimestamp(
162+
math.ceil(params.end.timestamp() / params.granularity_secs) * params.granularity_secs
163+
)
164+
return ts_filter, params
165+
else:
166+
raise InvalidSearchQuery("start, end and interval are required")
167+
168+
105169
def get_timeseries_query(
106170
search_resolver: SearchResolver,
107171
params: SnubaParams,
@@ -116,6 +180,7 @@ def get_timeseries_query(
116180
list[ResolvedFormula | ResolvedAggregate | ResolvedConditionalAggregate],
117181
list[ResolvedAttribute],
118182
]:
183+
timeseries_filter, params = update_timestamps(params, search_resolver)
119184
meta = search_resolver.resolve_meta(referrer=referrer, sampling_mode=sampling_mode)
120185
query, _, query_contexts = search_resolver.resolve_query(query_string)
121186
(functions, _) = search_resolver.resolve_functions(y_axes)
@@ -136,6 +201,12 @@ def get_timeseries_query(
136201
else:
137202
query = extra_conditions
138203

204+
if timeseries_filter is not None:
205+
if query is not None:
206+
query = TraceItemFilter(and_filter=AndFilter(filters=[query, timeseries_filter]))
207+
else:
208+
query = timeseries_filter
209+
139210
return (
140211
TimeSeriesRequest(
141212
meta=meta,

tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py

Lines changed: 61 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1140,7 +1140,7 @@ def test_throughput_epm_hour_rollup(self):
11401140
assert len(data) == 6
11411141
assert response.data["meta"]["dataset"] == self.dataset
11421142

1143-
rows = data[0:6]
1143+
rows = data[:6]
11441144
for test in zip(event_counts, rows):
11451145
self.assertAlmostEqual(test[1][1][0]["count"], test[0] / (3600.0 / 60.0))
11461146

@@ -1206,7 +1206,7 @@ def test_throughput_epm_hour_rollup_offset_of_hour(self):
12061206
assert response.status_code == 200, response.content
12071207
data = response.data["data"]
12081208
meta = response.data["meta"]
1209-
assert len(data) == 6
1209+
assert len(data) == 7
12101210
assert meta["dataset"] == self.dataset
12111211

12121212
rows = data[0:6]
@@ -2133,3 +2133,62 @@ def test_top_n_is_transaction(self):
21332133
)
21342134
assert response.status_code == 200, response.content
21352135
assert set(response.data.keys()) == {"True", "False"}
2136+
2137+
def test_datetime_unaligned_with_regular_buckets(self):
2138+
"""When querying from 10:12-22:12 with 1 hour intervals
2139+
the returned buckets should be every hour; ie 10am, 11am, 12pm
2140+
but the data should still be constrained from 22:12-22:12"""
2141+
spans = []
2142+
# Create a span at 10:05, this should not be in the result
2143+
spans.append(
2144+
self.create_span(
2145+
{
2146+
"description": "foo",
2147+
"sentry_tags": {"status": "success"},
2148+
},
2149+
start_ts=self.day_ago + timedelta(minutes=5),
2150+
)
2151+
)
2152+
# Create a span at 10:30, this should be in the result
2153+
spans.append(
2154+
self.create_span(
2155+
{
2156+
"description": "foo",
2157+
"sentry_tags": {"status": "success"},
2158+
},
2159+
start_ts=self.day_ago + timedelta(minutes=30),
2160+
)
2161+
)
2162+
# Create a span at 22:05, this should be in the result
2163+
spans.append(
2164+
self.create_span(
2165+
{
2166+
"description": "foo",
2167+
"sentry_tags": {"status": "success"},
2168+
},
2169+
start_ts=self.day_ago + timedelta(hours=12, minutes=5),
2170+
)
2171+
)
2172+
self.store_spans(spans, is_eap=self.is_eap)
2173+
2174+
# This should be set to 10:00 the previous day
2175+
query_start = self.day_ago + timedelta(minutes=12)
2176+
query_end = self.day_ago + timedelta(hours=12, minutes=12)
2177+
response = self._do_request(
2178+
data={
2179+
"start": query_start,
2180+
"end": query_end,
2181+
"interval": "1h",
2182+
"yAxis": "count()",
2183+
"project": self.project.id,
2184+
"dataset": self.dataset,
2185+
},
2186+
)
2187+
assert response.status_code == 200, response.content
2188+
data = response.data["data"]
2189+
assert len(data) == 13
2190+
assert response.data["meta"]["dataset"] == self.dataset
2191+
# The timestamp of the first event should be 10:00, and there should only be 1 event
2192+
assert data[0] == (self.day_ago.timestamp(), [{"count": 1}])
2193+
# The timestamp of the last event should be 22:00 and there should also only be 1 event
2194+
assert data[-1] == ((self.day_ago + timedelta(hours=12)).timestamp(), [{"count": 1}])

tests/snuba/api/endpoints/test_organization_events_timeseries_spans.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,17 +192,19 @@ def test_handle_nans_from_snuba_top_n(self):
192192
interval = 24 * 60 * 60 * 1000
193193
assert response.status_code == 200, response.content
194194

195+
rounded_start = seven_days_ago.replace(hour=0, minute=0)
196+
rounded_end = rounded_start + timedelta(days=7)
195197
assert response.data["meta"] == {
196198
"dataset": "spans",
197-
"start": seven_days_ago.timestamp() * 1000,
198-
"end": self.end.timestamp() * 1000,
199+
"start": rounded_start.timestamp() * 1000,
200+
"end": rounded_end.timestamp() * 1000,
199201
}
200202
assert len(response.data["timeseries"]) == 4
201203
timeseries = response.data["timeseries"][0]
202204
assert len(timeseries["values"]) == 7
203205
assert timeseries["yAxis"] == "p50(measurements.lcp)"
204206
assert timeseries["values"] == build_expected_timeseries(
205-
seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
207+
rounded_start, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
206208
)
207209
assert timeseries["groupBy"] == [{"key": "span.description", "value": "bar"}]
208210
assert timeseries["meta"] == {
@@ -217,7 +219,7 @@ def test_handle_nans_from_snuba_top_n(self):
217219
assert len(timeseries["values"]) == 7
218220
assert timeseries["yAxis"] == "avg(measurements.lcp)"
219221
assert timeseries["values"] == build_expected_timeseries(
220-
seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
222+
rounded_start, interval, [0, 0, 0, 0, 0, 0, 2], ignore_accuracy=True
221223
)
222224
assert timeseries["groupBy"] == [{"key": "span.description", "value": "bar"}]
223225
assert timeseries["meta"] == {
@@ -232,7 +234,7 @@ def test_handle_nans_from_snuba_top_n(self):
232234
assert len(timeseries["values"]) == 7
233235
assert timeseries["yAxis"] == "p50(measurements.lcp)"
234236
assert timeseries["values"] == build_expected_timeseries(
235-
seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
237+
rounded_start, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
236238
)
237239
assert timeseries["groupBy"] is None
238240
assert timeseries["meta"] == {
@@ -247,7 +249,7 @@ def test_handle_nans_from_snuba_top_n(self):
247249
assert len(timeseries["values"]) == 7
248250
assert timeseries["yAxis"] == "avg(measurements.lcp)"
249251
assert timeseries["values"] == build_expected_timeseries(
250-
seven_days_ago, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
252+
rounded_start, interval, [0, 0, 0, 0, 0, 0, 1], ignore_accuracy=True
251253
)
252254
assert timeseries["groupBy"] is None
253255
assert timeseries["meta"] == {

0 commit comments

Comments
 (0)