From 2f4a4b4bb660a458012299abdca4aa5d4908b239 Mon Sep 17 00:00:00 2001 From: Shruthilaya Jaganathan Date: Tue, 16 Apr 2024 14:08:55 -0400 Subject: [PATCH] feat(spans): Indexed spans for aggregate span waterfall --- .../organization_spans_aggregation.py | 19 +- .../test_organization_spans_aggregation.py | 375 +++++++++++------- 2 files changed, 240 insertions(+), 154 deletions(-) diff --git a/src/sentry/api/endpoints/organization_spans_aggregation.py b/src/sentry/api/endpoints/organization_spans_aggregation.py index a70d15d21bc3ff..68aab2d0d030af 100644 --- a/src/sentry/api/endpoints/organization_spans_aggregation.py +++ b/src/sentry/api/endpoints/organization_spans_aggregation.py @@ -1,7 +1,7 @@ import hashlib from collections import defaultdict, namedtuple from collections.abc import Mapping -from datetime import datetime +from datetime import datetime, timezone from typing import Any, Optional, TypedDict import sentry_sdk @@ -22,6 +22,7 @@ from sentry.utils.snuba import raw_snql_query ALLOWED_BACKENDS = ["indexedSpans", "nodestore"] +CUTOVER_DATE = datetime(2024, 3, 22, tzinfo=timezone.utc) EventSpan = namedtuple( "EventSpan", @@ -336,9 +337,7 @@ class OrganizationSpansAggregationEndpoint(OrganizationEventsEndpointBase): } def get(self, request: Request, organization: Organization) -> Response: - if not features.has( - "organizations:starfish-aggregate-span-waterfall", organization, actor=request.user - ): + if not features.has("organizations:spans-first-ui", organization, actor=request.user): return Response(status=404) try: @@ -346,6 +345,12 @@ def get(self, request: Request, organization: Organization) -> Response: except NoProjects: return Response(status=404) + start = params["start"] + if start and start < CUTOVER_DATE: + backend = "nodestore" + else: + backend = "indexedSpans" + transaction = request.query_params.get("transaction", None) http_method = request.query_params.get("http.method", None) if transaction is None: @@ -353,14 +358,8 @@ def get(self, request: Request, organization: Organization) -> Response: status=status.HTTP_400_BAD_REQUEST, data={"details": "Transaction not provided"} ) - backend = request.query_params.get("backend", "nodestore") sentry_sdk.set_tag("aggregate_spans.backend", backend) - if backend not in ALLOWED_BACKENDS: - return Response( - status=status.HTTP_400_BAD_REQUEST, data={"details": "Backend not supported"} - ) - query = f"transaction:{transaction}" if http_method is not None: query += f" transaction.method:{http_method}" diff --git a/tests/snuba/api/endpoints/test_organization_spans_aggregation.py b/tests/snuba/api/endpoints/test_organization_spans_aggregation.py index 1c0c9256fec4b0..57d04b7dc09bb7 100644 --- a/tests/snuba/api/endpoints/test_organization_spans_aggregation.py +++ b/tests/snuba/api/endpoints/test_organization_spans_aggregation.py @@ -8,7 +8,7 @@ from sentry.api.endpoints.organization_spans_aggregation import NULL_GROUP from sentry.testutils.cases import APITestCase, SnubaTestCase -from sentry.testutils.helpers.datetime import before_now +from sentry.testutils.helpers.datetime import before_now, freeze_time from sentry.utils.samples import load_data MOCK_SNUBA_RESPONSE = { @@ -163,10 +163,154 @@ } -class OrganizationSpansAggregationTest(APITestCase, SnubaTestCase): +@freeze_time("2024-03-26T00:00:00.000Z") +class OrganizationIndexedSpansAggregationTest(APITestCase, SnubaTestCase): url_name = "sentry-api-0-organization-spans-aggregation" FEATURES = [ - "organizations:starfish-aggregate-span-waterfall", + "organizations:spans-first-ui", + "organizations:performance-view", + ] + + def setUp(self): + super().setUp() + self.login_as(user=self.user) + + self.day_ago = before_now(days=1).replace(hour=10, minute=0, second=0, microsecond=0) + + self.url = reverse( + self.url_name, + kwargs={"organization_slug": self.project.organization.slug}, + ) + + @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") + def test_simple(self, mock_query): + mock_query.side_effect = [MOCK_SNUBA_RESPONSE] + with self.feature(self.FEATURES): + response = self.client.get( + self.url, + data={"transaction": "api/0/foo", "statsPeriod": "1d"}, + format="json", + ) + + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["count()"] == 2 + assert data[root_fingerprint]["description"] == "api/0/foo" + assert round(data[root_fingerprint]["avg(duration)"]) == 850 + + assert data[root_fingerprint]["samples"] == { + ("80fe542aea4945ffbe612646987ee449", "root_1"), + ("86b21833d1854d9b811000b91e7fccfa", "root_2"), + } + assert data[root_fingerprint]["sample_spans"] == [ + { + "transaction": "80fe542aea4945ffbe612646987ee449", + "timestamp": 1694625139.1, + "span": "root_1", + "trace": "a" * 32, + }, + { + "transaction": "86b21833d1854d9b811000b91e7fccfa", + "timestamp": 1694625159.1, + "span": "root_2", + "trace": "b" * 32, + }, + ] + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] + assert data[fingerprint]["description"] == "connect" + assert round(data[fingerprint]["avg(duration)"]) == 30 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] + assert data[fingerprint]["description"] == "resolve_orderby" + assert data[fingerprint]["avg(exclusive_time)"] == 15.0 + assert data[fingerprint]["count()"] == 2 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] + assert data[fingerprint]["description"] == "resolve_orderby" + assert data[fingerprint]["avg(exclusive_time)"] == 20.0 + assert data[fingerprint]["count()"] == 1 + + @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") + def test_offset_logic(self, mock_query): + mock_query.side_effect = [MOCK_SNUBA_RESPONSE] + with self.feature(self.FEATURES): + response = self.client.get( + self.url, + data={"transaction": "api/0/foo", "statsPeriod": "1d"}, + format="json", + ) + + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 30.0 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 35.0 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 53.5 + + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 1075.0 + + @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") + def test_null_group_fallback(self, mock_query): + mock_query.side_effect = [MOCK_SNUBA_RESPONSE] + with self.feature(self.FEATURES): + response = self.client.get( + self.url, + data={"transaction": "api/0/foo", "statsPeriod": "1d"}, + format="json", + ) + + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["description"] == "" + assert data[root_fingerprint]["count()"] == 2 + + @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") + def test_http_method_filter(self, mock_query): + with self.feature(self.FEATURES): + self.client.get( + self.url, + data={"transaction": "api/0/foo", "http.method": "GET", "statsPeriod": "1d"}, + format="json", + ) + + assert ( + Condition( + lhs=Function( + function="ifNull", + parameters=[ + Column( + name="sentry_tags[transaction.method]", + ), + "", + ], + alias=None, + ), + op=Op.EQ, + rhs="GET", + ) + in mock_query.mock_calls[0].args[0].query.where + ) + + +@freeze_time("2024-03-20T00:00:00.000Z") +class OrganizationNodestoreSpansAggregationTest(APITestCase, SnubaTestCase): + url_name = "sentry-api-0-organization-spans-aggregation" + FEATURES = [ + "organizations:spans-first-ui", "organizations:performance-view", ] @@ -418,179 +562,124 @@ def setUp(self): kwargs={"organization_slug": self.project.organization.slug}, ) - @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") - def test_simple(self, mock_query): - mock_query.side_effect = [MOCK_SNUBA_RESPONSE] - for backend in ["indexedSpans", "nodestore"]: - with self.feature(self.FEATURES): - response = self.client.get( - self.url, - data={"transaction": "api/0/foo", "backend": backend}, - format="json", - ) - - assert response.data - data = response.data - root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] - assert root_fingerprint in data - assert data[root_fingerprint]["count()"] == 2 - assert data[root_fingerprint]["description"] == "api/0/foo" - assert round(data[root_fingerprint]["avg(duration)"]) == 850 - - if backend == "indexedSpans": - assert data[root_fingerprint]["samples"] == { - ("80fe542aea4945ffbe612646987ee449", "root_1"), - ("86b21833d1854d9b811000b91e7fccfa", "root_2"), - } - assert data[root_fingerprint]["sample_spans"] == [ - { - "transaction": "80fe542aea4945ffbe612646987ee449", - "timestamp": 1694625139.1, - "span": "root_1", - "trace": "a" * 32, - }, - { - "transaction": "86b21833d1854d9b811000b91e7fccfa", - "timestamp": 1694625159.1, - "span": "root_2", - "trace": "b" * 32, - }, - ] - else: - assert data[root_fingerprint]["samples"] == { - ( - self.root_event_1.event_id, - self.span_ids_event_1["A"], - ), - ( - self.root_event_2.event_id, - self.span_ids_event_2["A"], - ), - } - assert data[root_fingerprint]["sample_spans"] == [ - { - "transaction": self.root_event_1.event_id, - "timestamp": self.root_event_1.data["start_timestamp"], - "span": self.span_ids_event_1["A"], - "trace": self.root_event_1.data["contexts"]["trace"]["trace_id"], - }, - { - "transaction": self.root_event_2.event_id, - "timestamp": self.root_event_2.data["start_timestamp"], - "span": self.span_ids_event_2["A"], - "trace": self.root_event_2.data["contexts"]["trace"]["trace_id"], - }, - ] - - fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] - assert data[fingerprint]["description"] == "connect" - assert round(data[fingerprint]["avg(duration)"]) == 30 + def test_simple(self): + with self.feature(self.FEATURES): + response = self.client.get( + self.url, + data={"transaction": "api/0/foo"}, + format="json", + ) - fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] - assert data[fingerprint]["description"] == "resolve_orderby" - assert data[fingerprint]["avg(exclusive_time)"] == 15.0 - assert data[fingerprint]["count()"] == 2 + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["count()"] == 2 + assert data[root_fingerprint]["description"] == "api/0/foo" + assert round(data[root_fingerprint]["avg(duration)"]) == 850 + + assert data[root_fingerprint]["samples"] == { + ( + self.root_event_1.event_id, + self.span_ids_event_1["A"], + ), + ( + self.root_event_2.event_id, + self.span_ids_event_2["A"], + ), + } + assert data[root_fingerprint]["sample_spans"] == [ + { + "transaction": self.root_event_1.event_id, + "timestamp": self.root_event_1.data["start_timestamp"], + "span": self.span_ids_event_1["A"], + "trace": self.root_event_1.data["contexts"]["trace"]["trace_id"], + }, + { + "transaction": self.root_event_2.event_id, + "timestamp": self.root_event_2.data["start_timestamp"], + "span": self.span_ids_event_2["A"], + "trace": self.root_event_2.data["contexts"]["trace"]["trace_id"], + }, + ] - fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] - assert data[fingerprint]["description"] == "resolve_orderby" - assert data[fingerprint]["avg(exclusive_time)"] == 20.0 - assert data[fingerprint]["count()"] == 1 + fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] + assert data[fingerprint]["description"] == "connect" + assert round(data[fingerprint]["avg(duration)"]) == 30 - @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") - def test_offset_logic(self, mock_query): - mock_query.side_effect = [MOCK_SNUBA_RESPONSE] - for backend in ["indexedSpans", "nodestore"]: - with self.feature(self.FEATURES): - response = self.client.get( - self.url, - data={"transaction": "api/0/foo", "backend": backend}, - format="json", - ) + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] + assert data[fingerprint]["description"] == "resolve_orderby" + assert data[fingerprint]["avg(exclusive_time)"] == 15.0 + assert data[fingerprint]["count()"] == 2 - assert response.data - data = response.data - root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] - assert root_fingerprint in data - assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0 + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] + assert data[fingerprint]["description"] == "resolve_orderby" + assert data[fingerprint]["avg(exclusive_time)"] == 20.0 + assert data[fingerprint]["count()"] == 1 - fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] - assert data[fingerprint]["avg(absolute_offset)"] == 30.0 + def test_offset_logic(self): + with self.feature(self.FEATURES): + response = self.client.get( + self.url, + data={"transaction": "api/0/foo"}, + format="json", + ) - fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16] - assert data[fingerprint]["avg(absolute_offset)"] == 35.0 + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["avg(absolute_offset)"] == 0.0 - fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] - assert data[fingerprint]["avg(absolute_offset)"] == 53.5 + fingerprint = hashlib.md5(b"e238e6c2e2466b07-B").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 30.0 - fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] - assert data[fingerprint]["avg(absolute_offset)"] == 1075.0 + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 35.0 - @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") - def test_null_group_fallback(self, mock_query): - mock_query.side_effect = [MOCK_SNUBA_RESPONSE] - for backend in ["indexedSpans", "nodestore"]: - with self.feature(self.FEATURES): - response = self.client.get( - self.url, - data={"transaction": "api/0/foo", "backend": backend}, - format="json", - ) + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 53.5 - assert response.data - data = response.data - root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16] - assert root_fingerprint in data - assert data[root_fingerprint]["description"] == "" - assert data[root_fingerprint]["count()"] == 2 + fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-D2").hexdigest()[:16] + assert data[fingerprint]["avg(absolute_offset)"] == 1075.0 - @mock.patch("sentry.api.endpoints.organization_spans_aggregation.raw_snql_query") - def test_http_method_filter(self, mock_query): + def test_null_group_fallback(self): with self.feature(self.FEATURES): response = self.client.get( self.url, - data={"transaction": "api/0/foo", "backend": "nodestore", "http.method": "GET"}, + data={"transaction": "api/0/foo"}, format="json", ) assert response.data data = response.data - root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07-C-discover.snql").hexdigest()[:16] assert root_fingerprint in data + assert data[root_fingerprint]["description"] == "" assert data[root_fingerprint]["count()"] == 2 + def test_http_method_filter(self): with self.feature(self.FEATURES): response = self.client.get( self.url, - data={"transaction": "api/0/foo", "backend": "nodestore", "http.method": "POST"}, + data={"transaction": "api/0/foo", "http.method": "GET"}, format="json", ) - assert response.data == {} + assert response.data + data = response.data + root_fingerprint = hashlib.md5(b"e238e6c2e2466b07").hexdigest()[:16] + assert root_fingerprint in data + assert data[root_fingerprint]["count()"] == 2 with self.feature(self.FEATURES): - self.client.get( + response = self.client.get( self.url, - data={"transaction": "api/0/foo", "backend": "indexedSpans", "http.method": "GET"}, + data={"transaction": "api/0/foo", "http.method": "POST"}, format="json", ) - assert ( - Condition( - lhs=Function( - function="ifNull", - parameters=[ - Column( - name="sentry_tags[transaction.method]", - ), - "", - ], - alias=None, - ), - op=Op.EQ, - rhs="GET", - ) - in mock_query.mock_calls[0].args[0].query.where - ) + assert response.data == {} def test_environment_filter(self): with self.feature(self.FEATURES): @@ -598,7 +687,6 @@ def test_environment_filter(self): self.url, data={ "transaction": "api/0/foo", - "backend": "nodestore", "environment": "production", }, format="json", @@ -615,7 +703,6 @@ def test_environment_filter(self): self.url, data={ "transaction": "api/0/foo", - "backend": "nodestore", "environment": ["production", "development"], }, format="json",