Skip to content

Commit c90b387

Browse files
Optionally translate OTel histograms to NHCB (prometheus#15850)
* Optionally translate OTEL histograms to NHCB Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Add tests for explicit histogram to NHCB translation Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Linting Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Add test for translation with flag Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Refactor to re-use bucket conversion function Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Update prompb Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Add comment explaining explicit bounds and use defined CustomBucketSchema Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Add TODO for limiting max bucket count Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Fix imports and remove extra blank line Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Update configuration docs and CHANGELOG Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Update CHANGELOG Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> * Use otlptranslator package Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com> --------- Signed-off-by: Carrie Edwards <edwrdscarrie@gmail.com>
1 parent b396727 commit c90b387

File tree

13 files changed

+814
-127
lines changed

13 files changed

+814
-127
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## unreleased
44

55
* [CHANGE] Make setting out-of-order native histograms feature (`--enable-feature=ooo-native-histograms`) a no-op. Out-of-order native histograms are now always enabled when `out_of_order_time_window` is greater than zero and `--enable-feature=native-histograms` is set. #16207
6+
* [FEATURE] OTLP translate: Add feature flag for optionally translating OTel explicit bucket histograms into native histograms with custom buckets. #15850
67
* [ENHANCEMENT] TSDB: add `prometheus_tsdb_wal_replay_unknown_refs_total` and `prometheus_tsdb_wbl_replay_unknown_refs_total` metrics to track unknown series references during WAL/WBL replay. #16166
78
* [BUGFIX] TSDB: fix unknown series errors and possible lost data during WAL replay when series are removed from the head due to inactivity and reappear before the next WAL checkpoint. #16060
89

config/config.go

+1
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,7 @@ type OTLPConfig struct {
14431443
PromoteResourceAttributes []string `yaml:"promote_resource_attributes,omitempty"`
14441444
TranslationStrategy translationStrategyOption `yaml:"translation_strategy,omitempty"`
14451445
KeepIdentifyingResourceAttributes bool `yaml:"keep_identifying_resource_attributes,omitempty"`
1446+
ConvertHistogramsToNHCB bool `yaml:"convert_histograms_to_nhcb,omitempty"`
14461447
}
14471448

14481449
// UnmarshalYAML implements the yaml.Unmarshaler interface.

config/config_test.go

+14
Original file line numberDiff line numberDiff line change
@@ -1563,6 +1563,20 @@ func TestOTLPAllowServiceNameInTargetInfo(t *testing.T) {
15631563
})
15641564
}
15651565

1566+
func TestOTLPConvertHistogramsToNHCB(t *testing.T) {
1567+
t.Run("good config", func(t *testing.T) {
1568+
want, err := LoadFile(filepath.Join("testdata", "otlp_convert_histograms_to_nhcb.good.yml"), false, promslog.NewNopLogger())
1569+
require.NoError(t, err)
1570+
1571+
out, err := yaml.Marshal(want)
1572+
require.NoError(t, err)
1573+
var got Config
1574+
require.NoError(t, yaml.UnmarshalStrict(out, &got))
1575+
1576+
require.True(t, got.OTLPConfig.ConvertHistogramsToNHCB)
1577+
})
1578+
}
1579+
15661580
func TestOTLPAllowUTF8(t *testing.T) {
15671581
t.Run("good config", func(t *testing.T) {
15681582
fpath := filepath.Join("testdata", "otlp_allow_utf8.good.yml")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
otlp:
2+
convert_histograms_to_nhcb: true

docs/configuration/configuration.md

+2
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ otlp:
187187
# resource attributes to the "target_info" metric, on top of converting
188188
# them into the "instance" and "job" labels.
189189
[ keep_identifying_resource_attributes: <boolean> | default = false]
190+
# Configures optional translation of OTLP explicit bucket histograms into native histograms with custom buckets.
191+
[ convert_histograms_to_nhcb: <boolean> | default = false]
190192

191193
# Settings related to the remote read feature.
192194
remote_read:

prompb/codec.go

+2
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ func (h Histogram) ToIntHistogram() *histogram.Histogram {
9090
PositiveBuckets: h.GetPositiveDeltas(),
9191
NegativeSpans: spansProtoToSpans(h.GetNegativeSpans()),
9292
NegativeBuckets: h.GetNegativeDeltas(),
93+
CustomValues: h.CustomValues,
9394
}
9495
}
9596

@@ -109,6 +110,7 @@ func (h Histogram) ToFloatHistogram() *histogram.FloatHistogram {
109110
PositiveBuckets: h.GetPositiveCounts(),
110111
NegativeSpans: spansProtoToSpans(h.GetNegativeSpans()),
111112
NegativeBuckets: h.GetNegativeCounts(),
113+
CustomValues: h.CustomValues,
112114
}
113115
}
114116
// Conversion from integer histogram.

prompb/types.pb.go

+182-102
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prompb/types.proto

+4
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ message Histogram {
107107
// timestamp is in ms format, see model/timestamp/timestamp.go for
108108
// conversion from time.Time to Prometheus timestamp.
109109
int64 timestamp = 15;
110+
111+
// custom_values are not part of the specification, DO NOT use in remote write clients.
112+
// Used only for converting from OpenTelemetry to Prometheus internally.
113+
repeated double custom_values = 16;
110114
}
111115

112116
// A BucketSpan defines a number of consecutive buckets with their

storage/remote/otlptranslator/prometheusremotewrite/histograms.go

+128-19
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package prometheusremotewrite
1919
import (
2020
"context"
2121
"fmt"
22+
"github.com/prometheus/prometheus/model/histogram"
2223
"math"
2324

2425
"github.com/prometheus/common/model"
@@ -89,8 +90,8 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
8990
scale = 8
9091
}
9192

92-
pSpans, pDeltas := convertBucketsLayout(p.Positive(), scaleDown)
93-
nSpans, nDeltas := convertBucketsLayout(p.Negative(), scaleDown)
93+
pSpans, pDeltas := convertBucketsLayout(p.Positive().BucketCounts().AsRaw(), p.Positive().Offset(), scaleDown, true)
94+
nSpans, nDeltas := convertBucketsLayout(p.Negative().BucketCounts().AsRaw(), p.Negative().Offset(), scaleDown, true)
9495

9596
h := prompb.Histogram{
9697
// The counter reset detection must be compatible with Prometheus to
@@ -133,19 +134,25 @@ func exponentialToNativeHistogram(p pmetric.ExponentialHistogramDataPoint) (prom
133134
return h, annots, nil
134135
}
135136

136-
// convertBucketsLayout translates OTel Exponential Histogram dense buckets
137-
// representation to Prometheus Native Histogram sparse bucket representation.
137+
// convertBucketsLayout translates OTel Explicit or Exponential Histogram dense buckets
138+
// representation to Prometheus Native Histogram sparse bucket representation. This is used
139+
// for translating Exponential Histograms into Native Histograms, and Explicit Histograms
140+
// into Native Histograms with Custom Buckets.
138141
//
139142
// The translation logic is taken from the client_golang `histogram.go#makeBuckets`
140143
// function, see `makeBuckets` https://github.com/prometheus/client_golang/blob/main/prometheus/histogram.go
141-
// The bucket indexes conversion was adjusted, since OTel exp. histogram bucket
144+
//
145+
// scaleDown is the factor by which the buckets are scaled down. In other words 2^scaleDown buckets will be merged into one.
146+
//
147+
// When converting from OTel Exponential Histograms to Native Histograms, the
148+
// bucket indexes conversion is adjusted, since OTel exp. histogram bucket
142149
// index 0 corresponds to the range (1, base] while Prometheus bucket index 0
143150
// to the range (base 1].
144151
//
145-
// scaleDown is the factor by which the buckets are scaled down. In other words 2^scaleDown buckets will be merged into one.
146-
func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets, scaleDown int32) ([]prompb.BucketSpan, []int64) {
147-
bucketCounts := buckets.BucketCounts()
148-
if bucketCounts.Len() == 0 {
152+
// When converting from OTel Explicit Histograms to Native Histograms with Custom Buckets,
153+
// the bucket indexes are not scaled, and the indices are not adjusted by 1.
154+
func convertBucketsLayout(bucketCounts []uint64, offset int32, scaleDown int32, adjustOffset bool) ([]prompb.BucketSpan, []int64) {
155+
if len(bucketCounts) == 0 {
149156
return nil, nil
150157
}
151158

@@ -164,24 +171,28 @@ func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets,
164171

165172
// Let the compiler figure out that this is const during this function by
166173
// moving it into a local variable.
167-
numBuckets := bucketCounts.Len()
174+
numBuckets := len(bucketCounts)
175+
176+
bucketIdx := offset>>scaleDown + 1
177+
178+
initialOffset := offset
179+
if adjustOffset {
180+
initialOffset = initialOffset>>scaleDown + 1
181+
}
168182

169-
// The offset is scaled and adjusted by 1 as described above.
170-
bucketIdx := buckets.Offset()>>scaleDown + 1
171183
spans = append(spans, prompb.BucketSpan{
172-
Offset: bucketIdx,
184+
Offset: initialOffset,
173185
Length: 0,
174186
})
175187

176188
for i := 0; i < numBuckets; i++ {
177-
// The offset is scaled and adjusted by 1 as described above.
178-
nextBucketIdx := (int32(i)+buckets.Offset())>>scaleDown + 1
189+
nextBucketIdx := (int32(i)+offset)>>scaleDown + 1
179190
if bucketIdx == nextBucketIdx { // We have not collected enough buckets to merge yet.
180-
count += int64(bucketCounts.At(i))
191+
count += int64(bucketCounts[i])
181192
continue
182193
}
183194
if count == 0 {
184-
count = int64(bucketCounts.At(i))
195+
count = int64(bucketCounts[i])
185196
continue
186197
}
187198

@@ -202,11 +213,12 @@ func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets,
202213
}
203214
}
204215
appendDelta(count)
205-
count = int64(bucketCounts.At(i))
216+
count = int64(bucketCounts[i])
206217
bucketIdx = nextBucketIdx
207218
}
219+
208220
// Need to use the last item's index. The offset is scaled and adjusted by 1 as described above.
209-
gap := (int32(numBuckets)+buckets.Offset()-1)>>scaleDown + 1 - bucketIdx
221+
gap := (int32(numBuckets)+offset-1)>>scaleDown + 1 - bucketIdx
210222
if gap > 2 {
211223
// We have to create a new span, because we have found a gap
212224
// of more than two buckets. The constant 2 is copied from the logic in
@@ -226,3 +238,100 @@ func convertBucketsLayout(buckets pmetric.ExponentialHistogramDataPointBuckets,
226238

227239
return spans, deltas
228240
}
241+
242+
func (c *PrometheusConverter) addCustomBucketsHistogramDataPoints(ctx context.Context, dataPoints pmetric.HistogramDataPointSlice,
243+
resource pcommon.Resource, settings Settings, promName string) (annotations.Annotations, error) {
244+
var annots annotations.Annotations
245+
246+
for x := 0; x < dataPoints.Len(); x++ {
247+
if err := c.everyN.checkContext(ctx); err != nil {
248+
return annots, err
249+
}
250+
251+
pt := dataPoints.At(x)
252+
253+
histogram, ws, err := explicitHistogramToCustomBucketsHistogram(pt)
254+
annots.Merge(ws)
255+
if err != nil {
256+
return annots, err
257+
}
258+
259+
lbls := createAttributes(
260+
resource,
261+
pt.Attributes(),
262+
settings,
263+
nil,
264+
true,
265+
model.MetricNameLabel,
266+
promName,
267+
)
268+
269+
ts, _ := c.getOrCreateTimeSeries(lbls)
270+
ts.Histograms = append(ts.Histograms, histogram)
271+
272+
exemplars, err := getPromExemplars[pmetric.HistogramDataPoint](ctx, &c.everyN, pt)
273+
if err != nil {
274+
return annots, err
275+
}
276+
ts.Exemplars = append(ts.Exemplars, exemplars...)
277+
}
278+
279+
return annots, nil
280+
}
281+
282+
func explicitHistogramToCustomBucketsHistogram(p pmetric.HistogramDataPoint) (prompb.Histogram, annotations.Annotations, error) {
283+
var annots annotations.Annotations
284+
285+
buckets := p.BucketCounts().AsRaw()
286+
offset := getBucketOffset(buckets)
287+
bucketCounts := buckets[offset:]
288+
positiveSpans, positiveDeltas := convertBucketsLayout(bucketCounts, int32(offset), 0, false)
289+
290+
// TODO(carrieedwards): Add setting to limit maximum bucket count
291+
h := prompb.Histogram{
292+
// The counter reset detection must be compatible with Prometheus to
293+
// safely set ResetHint to NO. This is not ensured currently.
294+
// Sending a sample that triggers counter reset but with ResetHint==NO
295+
// would lead to Prometheus panic as it does not double check the hint.
296+
// Thus we're explicitly saying UNKNOWN here, which is always safe.
297+
// TODO: using created time stamp should be accurate, but we
298+
// need to know here if it was used for the detection.
299+
// Ref: https://github.com/open-telemetry/opentelemetry-collector-contrib/pull/28663#issuecomment-1810577303
300+
// Counter reset detection in Prometheus: https://github.com/prometheus/prometheus/blob/f997c72f294c0f18ca13fa06d51889af04135195/tsdb/chunkenc/histogram.go#L232
301+
ResetHint: prompb.Histogram_UNKNOWN,
302+
Schema: histogram.CustomBucketsSchema,
303+
304+
PositiveSpans: positiveSpans,
305+
PositiveDeltas: positiveDeltas,
306+
// Note: OTel explicit histograms have an implicit +Inf bucket, which has a lower bound
307+
// of the last element in the explicit_bounds array.
308+
// This is similar to the custom_values array in native histograms with custom buckets.
309+
// Because of this shared property, the OTel explicit histogram's explicit_bounds array
310+
// can be mapped directly to the custom_values array.
311+
// See: https://github.com/open-telemetry/opentelemetry-proto/blob/d7770822d70c7bd47a6891fc9faacc66fc4af3d3/opentelemetry/proto/metrics/v1/metrics.proto#L469
312+
CustomValues: p.ExplicitBounds().AsRaw(),
313+
314+
Timestamp: convertTimeStamp(p.Timestamp()),
315+
}
316+
317+
if p.Flags().NoRecordedValue() {
318+
h.Sum = math.Float64frombits(value.StaleNaN)
319+
h.Count = &prompb.Histogram_CountInt{CountInt: value.StaleNaN}
320+
} else {
321+
if p.HasSum() {
322+
h.Sum = p.Sum()
323+
}
324+
h.Count = &prompb.Histogram_CountInt{CountInt: p.Count()}
325+
if p.Count() == 0 && h.Sum != 0 {
326+
annots.Add(fmt.Errorf("histogram data point has zero count, but non-zero sum: %f", h.Sum))
327+
}
328+
}
329+
return h, annots, nil
330+
}
331+
332+
func getBucketOffset(buckets []uint64) (offset int) {
333+
for offset < len(buckets) && buckets[offset] == 0 {
334+
offset++
335+
}
336+
return offset
337+
}

0 commit comments

Comments
 (0)