Skip to content

Commit c5379b8

Browse files
authored
feat(EXP-253): Add attribute comparison endpoint (#92283)
Adds an endpoint that returns a distribution of ranked "suspect" attributes when comparing two cohorts.
1 parent c981487 commit c5379b8

File tree

4 files changed

+262
-0
lines changed

4 files changed

+262
-0
lines changed
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
from collections import defaultdict
2+
from concurrent.futures import ThreadPoolExecutor
3+
from typing import Any
4+
5+
from rest_framework.request import Request
6+
from rest_framework.response import Response
7+
from sentry_protos.snuba.v1.endpoint_trace_item_stats_pb2 import (
8+
AttributeDistributionsRequest,
9+
StatsType,
10+
TraceItemStatsRequest,
11+
)
12+
13+
from sentry import features
14+
from sentry.api.api_owners import ApiOwner
15+
from sentry.api.api_publish_status import ApiPublishStatus
16+
from sentry.api.base import region_silo_endpoint
17+
from sentry.api.bases import NoProjects, OrganizationEventsV2EndpointBase
18+
from sentry.models.organization import Organization
19+
from sentry.search.eap.resolver import SearchResolver
20+
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
21+
from sentry.search.eap.types import SearchResolverConfig, SupportedTraceItemType
22+
from sentry.search.eap.utils import translate_internal_to_public_alias
23+
from sentry.seer.workflows.compare import keyed_rrf_score
24+
from sentry.snuba.referrer import Referrer
25+
from sentry.snuba.spans_rpc import run_table_query
26+
from sentry.utils.snuba_rpc import trace_item_stats_rpc
27+
28+
_query_thread_pool = ThreadPoolExecutor(max_workers=4)
29+
30+
31+
@region_silo_endpoint
32+
class OrganizationTraceItemsAttributesRankedEndpoint(OrganizationEventsV2EndpointBase):
33+
publish_status = {
34+
"GET": ApiPublishStatus.PRIVATE,
35+
}
36+
owner = ApiOwner.PERFORMANCE
37+
38+
def get(self, request: Request, organization: Organization) -> Response:
39+
40+
if not features.has(
41+
"organizations:performance-spans-suspect-attributes", organization, actor=request.user
42+
):
43+
return Response(status=404)
44+
45+
try:
46+
snuba_params = self.get_snuba_params(request, organization)
47+
except NoProjects:
48+
return Response({"rankedAttributes": []})
49+
50+
resolver = SearchResolver(
51+
params=snuba_params, config=SearchResolverConfig(), definitions=SPAN_DEFINITIONS
52+
)
53+
54+
meta = resolver.resolve_meta(referrer=Referrer.API_SPANS_FREQUENCY_STATS_RPC.value)
55+
query_1 = request.GET.get("query_1", "")
56+
query_2 = request.GET.get("query_2", "")
57+
58+
if query_1 == query_2:
59+
return Response({"rankedAttributes": []})
60+
61+
cohort_1, _, _ = resolver.resolve_query(query_1)
62+
cohort_1_request = TraceItemStatsRequest(
63+
filter=cohort_1,
64+
meta=meta,
65+
stats_types=[
66+
StatsType(
67+
attribute_distributions=AttributeDistributionsRequest(
68+
max_buckets=100,
69+
)
70+
)
71+
],
72+
)
73+
74+
cohort_2, _, _ = resolver.resolve_query(query_2)
75+
cohort_2_request = TraceItemStatsRequest(
76+
filter=cohort_2,
77+
meta=meta,
78+
stats_types=[
79+
StatsType(
80+
attribute_distributions=AttributeDistributionsRequest(
81+
max_buckets=100,
82+
)
83+
)
84+
],
85+
)
86+
87+
cohort_1_future = _query_thread_pool.submit(
88+
trace_item_stats_rpc,
89+
cohort_1_request,
90+
)
91+
totals_1_future = _query_thread_pool.submit(
92+
run_table_query,
93+
snuba_params,
94+
query_1,
95+
["count(span.duration)"],
96+
None,
97+
config=SearchResolverConfig(use_aggregate_conditions=False),
98+
offset=0,
99+
limit=1,
100+
sampling_mode=snuba_params.sampling_mode,
101+
referrer=Referrer.API_SPAN_SAMPLE_GET_SPAN_DATA.value,
102+
)
103+
104+
cohort_2_future = _query_thread_pool.submit(
105+
trace_item_stats_rpc,
106+
cohort_2_request,
107+
)
108+
109+
totals_2_future = _query_thread_pool.submit(
110+
run_table_query,
111+
snuba_params,
112+
query_2,
113+
["count(span.duration)"],
114+
None,
115+
config=SearchResolverConfig(use_aggregate_conditions=False),
116+
offset=0,
117+
limit=1,
118+
sampling_mode=snuba_params.sampling_mode,
119+
referrer=Referrer.API_SPAN_SAMPLE_GET_SPAN_DATA.value,
120+
)
121+
122+
cohort_1_data = cohort_1_future.result()
123+
cohort_2_data = cohort_2_future.result()
124+
totals_1_result = totals_1_future.result()
125+
totals_2_result = totals_2_future.result()
126+
127+
cohort_1_distribution = []
128+
cohort_1_distribution_map = defaultdict(list)
129+
for attribute in cohort_1_data.results[0].attribute_distributions.attributes:
130+
for bucket in attribute.buckets:
131+
cohort_1_distribution.append((attribute.attribute_name, bucket.label, bucket.value))
132+
cohort_1_distribution_map[attribute.attribute_name].append(
133+
{"label": bucket.label, "value": bucket.value}
134+
)
135+
136+
cohort_2_distribution = []
137+
cohort_2_distribution_map = defaultdict(list)
138+
for attribute in cohort_2_data.results[0].attribute_distributions.attributes:
139+
for bucket in attribute.buckets:
140+
cohort_2_distribution.append((attribute.attribute_name, bucket.label, bucket.value))
141+
cohort_2_distribution_map[attribute.attribute_name].append(
142+
{"label": bucket.label, "value": bucket.value}
143+
)
144+
145+
scored_attrs = keyed_rrf_score(
146+
cohort_1_distribution,
147+
cohort_2_distribution,
148+
totals_1_result["data"][0]["count(span.duration)"],
149+
totals_2_result["data"][0]["count(span.duration)"],
150+
)
151+
152+
ranked_distribution: dict[str, list[dict[str, Any]]] = {"rankedAttributes": []}
153+
for attr, _ in scored_attrs:
154+
distribution = {
155+
"attributeName": translate_internal_to_public_alias(
156+
attr, "string", SupportedTraceItemType.SPANS
157+
)
158+
or attr,
159+
"cohort1": cohort_1_distribution_map.get(attr),
160+
"cohort2": cohort_2_distribution_map.get(attr),
161+
}
162+
ranked_distribution["rankedAttributes"].append(distribution)
163+
164+
return Response(ranked_distribution)

src/sentry/api/urls.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
OrganizationTraceItemAttributesEndpoint,
3939
OrganizationTraceItemAttributeValuesEndpoint,
4040
)
41+
from sentry.api.endpoints.organization_trace_item_attributes_ranked import (
42+
OrganizationTraceItemsAttributesRankedEndpoint,
43+
)
4144
from sentry.api.endpoints.organization_trace_summary import OrganizationTraceSummaryEndpoint
4245
from sentry.api.endpoints.organization_unsubscribe import (
4346
OrganizationUnsubscribeIssue,
@@ -1527,6 +1530,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]:
15271530
OrganizationTraceItemAttributeValuesEndpoint.as_view(),
15281531
name="sentry-api-0-organization-trace-item-attribute-values",
15291532
),
1533+
re_path(
1534+
r"^(?P<organization_id_or_slug>[^\/]+)/trace-items/attributes/ranked/$",
1535+
OrganizationTraceItemsAttributesRankedEndpoint.as_view(),
1536+
name="sentry-api-0-organization-trace-item-attributes-ranked",
1537+
),
15301538
re_path(
15311539
r"^(?P<organization_id_or_slug>[^\/]+)/spans/fields/$",
15321540
OrganizationSpansFieldsEndpoint.as_view(),

src/sentry/features/temporary.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,8 @@ def register_temporary_features(manager: FeatureManager):
266266
manager.add("organizations:performance-vitals-standalone-cls-lcp", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
267267
# Enable default explore queries in the new side nav
268268
manager.add("organizations:performance-default-explore-queries", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
269+
# Enable suspect attributes feature
270+
manager.add("organizations:performance-spans-suspect-attributes", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True)
269271
# Enables the playstation ingestion in relay
270272
manager.add("organizations:relay-playstation-ingestion", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
271273
# Enable profiling
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
from django.urls import reverse
2+
3+
from sentry.testutils.cases import APITransactionTestCase, SnubaTestCase, SpanTestCase
4+
from sentry.testutils.helpers.datetime import before_now
5+
6+
7+
class OrganizationTraceItemsAttributesRankedEndpointTest(
8+
APITransactionTestCase,
9+
SnubaTestCase,
10+
SpanTestCase,
11+
):
12+
view = "sentry-api-0-organization-trace-item-attributes-ranked"
13+
14+
def setUp(self):
15+
super().setUp()
16+
self.login_as(user=self.user)
17+
self.features = {
18+
"organizations:performance-spans-suspect-attributes": True,
19+
}
20+
self.ten_mins_ago = before_now(minutes=10)
21+
self.ten_mins_ago_iso = self.ten_mins_ago.replace(microsecond=0).isoformat()
22+
23+
def do_request(self, query=None, features=None, **kwargs):
24+
if features is None:
25+
features = ["organizations:performance-spans-suspect-attributes"]
26+
27+
if query and "type" not in query.keys():
28+
query["type"] = "string"
29+
30+
with self.feature(features):
31+
response = self.client.get(
32+
reverse(self.view, kwargs={"organization_id_or_slug": self.organization.slug}),
33+
query,
34+
format="json",
35+
**kwargs,
36+
)
37+
38+
return response
39+
40+
def _store_span(self, description=None, tags=None, duration=None):
41+
if tags is None:
42+
tags = {"foo": "bar"}
43+
44+
self.store_span(
45+
self.create_span(
46+
{"description": description or "foo", "sentry_tags": tags},
47+
start_ts=self.ten_mins_ago,
48+
duration=duration or 1000,
49+
),
50+
is_eap=True,
51+
)
52+
53+
def test_no_project(self):
54+
response = self.do_request()
55+
assert response.status_code == 200, response.data
56+
assert response.data == {"rankedAttributes": []}
57+
58+
def test_no_feature(self):
59+
response = self.do_request(features=[])
60+
assert response.status_code == 404, response.data
61+
62+
def test_distribution_values(self):
63+
tags = [
64+
({"browser": "chrome", "device": "desktop"}, 500),
65+
({"browser": "chrome", "device": "mobile"}, 100),
66+
({"browser": "chrome", "device": "mobile"}, 100),
67+
({"browser": "chrome", "device": "desktop"}, 100),
68+
({"browser": "safari", "device": "mobile"}, 100),
69+
({"browser": "chrome", "device": "desktop"}, 500),
70+
({"browser": "edge", "device": "desktop"}, 500),
71+
]
72+
73+
for tag, duration in tags:
74+
self._store_span(tags=tag, duration=duration)
75+
76+
response = self.do_request(
77+
query={"query_1": "span.duration:<=100", "query_2": "span.duration:>100"}
78+
)
79+
assert response.status_code == 200, response.data
80+
distributions = response.data["rankedAttributes"]
81+
assert distributions[0]["attributeName"] == "sentry.device"
82+
assert distributions[0]["cohort1"] == [
83+
{"label": "mobile", "value": 3.0},
84+
{"label": "desktop", "value": 1.0},
85+
]
86+
assert distributions[0]["cohort2"] == [{"label": "desktop", "value": 3.0}]
87+
88+
assert distributions[1]["attributeName"] == "browser"

0 commit comments

Comments
 (0)