Skip to content

Commit a9d6793

Browse files
authored
feat(spans): Compue breakdowns on segment spans (#92185)
Computes breakdowns as measurements that ultimately get converted to span attributes. Once we have a specification for span breakdowns in span data, this implementation can be updated.
1 parent 878e312 commit a9d6793

File tree

7 files changed

+161
-13
lines changed

7 files changed

+161
-13
lines changed

requirements-base.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ rfc3339-validator>=0.1.2
6565
rfc3986-validator>=0.1.1
6666
# [end] jsonschema format validators
6767
sentry-arroyo>=2.21.0
68-
sentry-kafka-schemas>=1.3.2
68+
sentry-kafka-schemas>=1.3.6
6969
sentry-ophio>=1.1.3
7070
sentry-protos==0.2.0
7171
sentry-redis-tools>=0.5.0

requirements-dev-frozen.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ sentry-devenv==1.21.0
185185
sentry-forked-django-stubs==5.2.0.post3
186186
sentry-forked-djangorestframework-stubs==3.16.0.post1
187187
sentry-forked-email-reply-parser==0.5.12.post1
188-
sentry-kafka-schemas==1.3.2
188+
sentry-kafka-schemas==1.3.6
189189
sentry-ophio==1.1.3
190190
sentry-protos==0.2.0
191191
sentry-redis-tools==0.5.0

requirements-frozen.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ rsa==4.8
123123
s3transfer==0.10.0
124124
sentry-arroyo==2.21.0
125125
sentry-forked-email-reply-parser==0.5.12.post1
126-
sentry-kafka-schemas==1.3.2
126+
sentry-kafka-schemas==1.3.6
127127
sentry-ophio==1.1.3
128128
sentry-protos==0.2.0
129129
sentry-redis-tools==0.5.0

src/sentry/spans/consumers/process_segments/enrichment.py

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
from collections import defaultdict
12
from typing import Any, cast
23

3-
from sentry.spans.consumers.process_segments.types import Span
4+
from sentry.models.project import Project
5+
from sentry.spans.consumers.process_segments.types import MeasurementValue, Span
46

57
# Keys in `sentry_tags` that are shared across all spans in a segment. This list
68
# is taken from `extract_shared_tags` in Relay.
@@ -127,7 +129,7 @@ def set_exclusive_time(spans: list[Span]) -> None:
127129
span_map: dict[str, list[tuple[int, int]]] = {}
128130
for span in spans:
129131
if parent_span_id := span.get("parent_span_id"):
130-
interval = (_us(span["start_timestamp_precise"]), _us(span["end_timestamp_precise"]))
132+
interval = _span_interval(span)
131133
span_map.setdefault(parent_span_id, []).append(interval)
132134

133135
for span in spans:
@@ -136,7 +138,7 @@ def set_exclusive_time(spans: list[Span]) -> None:
136138
intervals.sort(key=lambda x: (x[0], -x[1]))
137139

138140
exclusive_time_us: int = 0 # microseconds to prevent rounding issues
139-
start, end = _us(span["start_timestamp_precise"]), _us(span["end_timestamp_precise"])
141+
start, end = _span_interval(span)
140142

141143
# Progressively add time gaps before the next span and then skip to its end.
142144
for child_start, child_end in intervals:
@@ -155,7 +157,75 @@ def set_exclusive_time(spans: list[Span]) -> None:
155157
span["exclusive_time_ms"] = exclusive_time_us / 1_000
156158

157159

160+
def _span_interval(span: Span) -> tuple[int, int]:
161+
"""Get the start and end timestamps of a span in microseconds."""
162+
return _us(span["start_timestamp_precise"]), _us(span["end_timestamp_precise"])
163+
164+
158165
def _us(timestamp: float) -> int:
159166
"""Convert the floating point duration or timestamp to integer microsecond
160167
precision."""
161168
return int(timestamp * 1_000_000)
169+
170+
171+
def compute_breakdowns(segment: Span, spans: list[Span], project: Project) -> None:
172+
"""
173+
Computes breakdowns from all spans and writes them to the segment span.
174+
175+
Breakdowns are measurements that are derived from the spans in the segment.
176+
By convention, their unit is in milliseconds. In the end, these measurements
177+
are converted into attributes on the span trace item.
178+
"""
179+
180+
config = project.get_option("sentry:breakdowns")
181+
182+
for breakdown_name, breakdown_config in config.items():
183+
ty = breakdown_config.get("type")
184+
185+
if ty == "spanOperations":
186+
breakdowns = _compute_span_ops(spans, breakdown_config)
187+
else:
188+
continue
189+
190+
measurements = segment.setdefault("measurements", {})
191+
for key, value in breakdowns.items():
192+
measurements[f"{breakdown_name}.{key}"] = value
193+
194+
195+
def _compute_span_ops(spans: list[Span], config: Any) -> dict[str, MeasurementValue]:
196+
matches = config.get("matches")
197+
if not matches:
198+
return {}
199+
200+
intervals_by_op = defaultdict(list)
201+
for span in spans:
202+
op = span.get("sentry_tags", {}).get("op", "")
203+
if operation_name := next(filter(lambda m: op.startswith(m), matches), None):
204+
intervals_by_op[operation_name].append(_span_interval(span))
205+
206+
measurements: dict[str, MeasurementValue] = {}
207+
for operation_name, intervals in intervals_by_op.items():
208+
duration = _get_duration_us(intervals)
209+
measurements[f"ops.{operation_name}"] = {"value": duration / 1000, "unit": "millisecond"}
210+
return measurements
211+
212+
213+
def _get_duration_us(intervals: list[tuple[int, int]]) -> int:
214+
"""
215+
Get the wall clock time duration covered by the intervals in microseconds.
216+
217+
Overlapping intervals are merged so that they are not counted twice. For
218+
example, the intervals [(1, 3), (2, 4)] would yield a duration of 3, not 4.
219+
"""
220+
221+
duration = 0
222+
last_end = 0
223+
224+
intervals.sort(key=lambda x: (x[0], -x[1]))
225+
for start, end in intervals:
226+
# Ensure the current interval doesn't overlap with the last one
227+
start = max(start, last_end)
228+
duration += max(end - start, 0)
229+
last_end = end
230+
231+
return duration

src/sentry/spans/consumers/process_segments/message.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
record_release_received,
2525
)
2626
from sentry.spans.consumers.process_segments.enrichment import (
27+
compute_breakdowns,
2728
match_schemas,
2829
set_exclusive_time,
2930
set_shared_tags,
@@ -50,6 +51,7 @@ def process_segment(unprocessed_spans: list[UnprocessedSpan]) -> list[Span]:
5051
# If the project does not exist then it might have been deleted during ingestion.
5152
return []
5253

54+
compute_breakdowns(segment_span, spans, project)
5355
_create_models(segment_span, project)
5456
_detect_performance_problems(segment_span, spans, project)
5557
_record_signals(segment_span, spans, project)

src/sentry/spans/consumers/process_segments/types.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
from typing import Any, NotRequired
1+
from typing import NotRequired
22

3+
from sentry_kafka_schemas.schema_types.buffered_segments_v1 import MeasurementValue
34
from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan as UnprocessedSpan
45

56
__all__ = (
7+
"MeasurementValue",
68
"Span",
79
"UnprocessedSpan",
810
)
@@ -14,11 +16,6 @@ class Span(UnprocessedSpan, total=True):
1416
extracted.
1517
"""
1618

17-
# Missing in schema
18-
start_timestamp_precise: float
19-
end_timestamp_precise: float
20-
data: NotRequired[dict[str, Any]] # currently unused
21-
2219
# Added in enrichment
2320
exclusive_time: float
2421
exclusive_time_ms: float

tests/sentry/spans/consumers/process_segments/test_enrichment.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
from sentry.spans.consumers.process_segments.enrichment import set_exclusive_time
1+
from sentry.spans.consumers.process_segments.enrichment import (
2+
compute_breakdowns,
3+
set_exclusive_time,
4+
)
5+
from sentry.testutils.pytest.fixtures import django_db_all
26
from tests.sentry.spans.consumers.process import build_mock_span
37

48
# Tests ported from Relay
@@ -303,3 +307,78 @@ def test_only_immediate_child_spans_affect_calculation():
303307
"cccccccccccccccc": 400.0,
304308
"dddddddddddddddd": 400.0,
305309
}
310+
311+
312+
@django_db_all
313+
def test_emit_ops_breakdown(default_project):
314+
segment_span = build_mock_span(
315+
project_id=1,
316+
is_segment=True,
317+
start_timestamp_precise=1577836800.0,
318+
end_timestamp_precise=1577858400.01,
319+
span_id="ffffffffffffffff",
320+
)
321+
322+
spans = [
323+
build_mock_span(
324+
project_id=1,
325+
start_timestamp_precise=1577836800.0, # 2020-01-01 00:00:00
326+
end_timestamp_precise=1577840400.0, # 2020-01-01 01:00:00
327+
span_id="fa90fdead5f74052",
328+
parent_span_id=segment_span["span_id"],
329+
span_op="http",
330+
),
331+
build_mock_span(
332+
project_id=1,
333+
start_timestamp_precise=1577844000.0, # 2020-01-01 02:00:00
334+
end_timestamp_precise=1577847600.0, # 2020-01-01 03:00:00
335+
span_id="bbbbbbbbbbbbbbbb",
336+
parent_span_id=segment_span["span_id"],
337+
span_op="db",
338+
),
339+
build_mock_span(
340+
project_id=1,
341+
start_timestamp_precise=1577845800.0, # 2020-01-01 02:30:00
342+
end_timestamp_precise=1577849400.0, # 2020-01-01 03:30:00
343+
span_id="cccccccccccccccc",
344+
parent_span_id=segment_span["span_id"],
345+
span_op="db.postgres",
346+
),
347+
build_mock_span(
348+
project_id=1,
349+
start_timestamp_precise=1577851200.0, # 2020-01-01 04:00:00
350+
end_timestamp_precise=1577853000.0, # 2020-01-01 04:30:00
351+
span_id="dddddddddddddddd",
352+
parent_span_id=segment_span["span_id"],
353+
span_op="db.mongo",
354+
),
355+
build_mock_span(
356+
project_id=1,
357+
start_timestamp_precise=1577854800.0, # 2020-01-01 05:00:00
358+
end_timestamp_precise=1577858400.01, # 2020-01-01 06:00:00.01
359+
span_id="eeeeeeeeeeeeeeee",
360+
parent_span_id=segment_span["span_id"],
361+
span_op="browser",
362+
),
363+
segment_span,
364+
]
365+
366+
breakdowns_config = {
367+
"span_ops": {"type": "spanOperations", "matches": ["http", "db"]},
368+
"span_ops_2": {"type": "spanOperations", "matches": ["http", "db"]},
369+
}
370+
default_project.update_option("sentry:breakdowns", breakdowns_config)
371+
372+
# Compute breakdowns for the segment span
373+
compute_breakdowns(segment_span, spans, default_project)
374+
measurements = segment_span["measurements"]
375+
376+
assert measurements["span_ops.ops.http"]["value"] == 3600000.0
377+
assert measurements["span_ops.ops.db"]["value"] == 7200000.0
378+
assert measurements["span_ops_2.ops.http"]["value"] == 3600000.0
379+
assert measurements["span_ops_2.ops.db"]["value"] == 7200000.0
380+
381+
# NOTE: Relay used to extract a total.time breakdown, which is no longer
382+
# included in span breakdowns.
383+
# assert measurements["span_ops.total.time"]["value"] == 14400000.01
384+
# assert measurements["span_ops_2.total.time"]["value"] == 14400000.01

0 commit comments

Comments
 (0)