Skip to content

Commit

Permalink
feat: add OpAMPRecordUsage option (#1500)
Browse files Browse the repository at this point in the history
## Which problem is this PR solving?

To make sure all traffic is only counted once, Refinery will only report
usage if it is receiving traffic from client directly.

## Short description of the changes

- add OpAMPRecordUsage config option and default to true
- fix a metrics name bug
- add unit test
- generate config docs
  • Loading branch information
VinozzZ authored Mar 6, 2025
1 parent e329415 commit ea8eed9
Show file tree
Hide file tree
Showing 14 changed files with 225 additions and 40 deletions.
4 changes: 2 additions & 2 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,11 @@ func (agent *Agent) healthCheck() {
}
}

traceUsage, ok := agent.metrics.Get("bytes_received_trace")
traceUsage, ok := agent.metrics.Get("bytes_received_traces")
if !ok {
agent.logger.Errorf(context.Background(), "unexpected missing trace usage metric")
}
logUsage, ok := agent.metrics.Get("bytes_received_log")
logUsage, ok := agent.metrics.Get("bytes_received_logs")
if !ok {
agent.logger.Errorf(context.Background(), "unexpected missing log usage metric")
}
Expand Down
12 changes: 11 additions & 1 deletion config.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Honeycomb Refinery Configuration Documentation

This is the documentation for the configuration file for Honeycomb's Refinery.
It was automatically generated on 2025-02-14 at 21:35:19 UTC.
It was automatically generated on 2025-03-06 at 18:09:16 UTC.

## The Config file

Expand Down Expand Up @@ -159,6 +159,16 @@ OpAMP support is experimental in Refinery.
- Not eligible for live reload.
- Type: `bool`

### `OpAMPRecordUsage`

OpAMPRecordUsage controls whether to record usage metrics.

This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true.

- Eligible for live reload.
- Type: `bool`
- Default: `true`

### `OpAMPEndpoint`

OpAMPEndpoint is the URL of the OpAMP server for this client.
Expand Down
13 changes: 7 additions & 6 deletions config/config_serializer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,13 @@ func populateConfigContents(cfg Config) configContents {
return configContents{
General: cfg.GetGeneralConfig(),
Network: NetworkConfig{
ListenAddr: cfg.GetListenAddr(),
PeerListenAddr: cfg.GetPeerListenAddr(),
HoneycombAPI: cfg.GetHoneycombAPI(),
HTTPIdleTimeout: Duration(cfg.GetHTTPIdleTimeout()),
OpAMPEndpoint: opAMPConfig.Endpoint,
OpAMPEnabled: opAMPConfig.Enabled,
ListenAddr: cfg.GetListenAddr(),
PeerListenAddr: cfg.GetPeerListenAddr(),
HoneycombAPI: cfg.GetHoneycombAPI(),
HTTPIdleTimeout: Duration(cfg.GetHTTPIdleTimeout()),
OpAMPEndpoint: opAMPConfig.Endpoint,
OpAMPEnabled: opAMPConfig.Enabled,
OpAMPRecordUsage: getDefaultTrueValue(cfg.GetOpAMPConfig().RecordUsage.Get()),
},
AccessKeys: cfg.GetAccessKeyConfig(),
Telemetry: getRefineryTelemetryConfig(cfg),
Expand Down
23 changes: 13 additions & 10 deletions config/file_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,19 @@ type GeneralConfig struct {

// TODO: Implement opamp config in its own config section once we are ready to release the feature
type OpAMPConfig struct {
Endpoint string `yaml:"Endpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"`
Enabled bool `yaml:"Enabled" default:"false"`
Endpoint string `yaml:"Endpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"`
Enabled bool `yaml:"Enabled" default:"false"`
RecordUsage *DefaultTrue `yaml:"SendUsageReport" default:"true"`
}

type NetworkConfig struct {
ListenAddr string `yaml:"ListenAddr" default:"0.0.0.0:8080" cmdenv:"HTTPListenAddr"`
PeerListenAddr string `yaml:"PeerListenAddr" default:"0.0.0.0:8081" cmdenv:"PeerListenAddr"`
HoneycombAPI string `yaml:"HoneycombAPI" default:"https://api.honeycomb.io" cmdenv:"HoneycombAPI"`
HTTPIdleTimeout Duration `yaml:"HTTPIdleTimeout"`
OpAMPEndpoint string `yaml:"OpAMPEndpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"`
OpAMPEnabled bool `yaml:"OpAMPEnabled" default:"false"`
ListenAddr string `yaml:"ListenAddr" default:"0.0.0.0:8080" cmdenv:"HTTPListenAddr"`
PeerListenAddr string `yaml:"PeerListenAddr" default:"0.0.0.0:8081" cmdenv:"PeerListenAddr"`
HoneycombAPI string `yaml:"HoneycombAPI" default:"https://api.honeycomb.io" cmdenv:"HoneycombAPI"`
HTTPIdleTimeout Duration `yaml:"HTTPIdleTimeout"`
OpAMPEndpoint string `yaml:"OpAMPEndpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"`
OpAMPEnabled bool `yaml:"OpAMPEnabled" default:"false"`
OpAMPRecordUsage *DefaultTrue `yaml:"OpAMPRecordUsage" default:"true"`
}

type AccessKeyConfig struct {
Expand Down Expand Up @@ -959,8 +961,9 @@ func (f *fileConfig) GetOpAMPConfig() OpAMPConfig {
defer f.mux.RUnlock()

return OpAMPConfig{
Enabled: f.mainConfig.Network.OpAMPEnabled,
Endpoint: f.mainConfig.Network.OpAMPEndpoint,
Enabled: f.mainConfig.Network.OpAMPEnabled,
Endpoint: f.mainConfig.Network.OpAMPEndpoint,
RecordUsage: f.mainConfig.Network.OpAMPRecordUsage,
}
}

Expand Down
9 changes: 9 additions & 0 deletions config/metadata/configMeta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,15 @@ groups:
summary: controls whether to enable OpAMP support.
description: >
OpAMP support is experimental in Refinery.
- name: OpAMPRecordUsage
type: defaulttrue
valuetype: nondefault
default: true
reload: true
firstversion: v2.9.4
summary: controls whether to record usage metrics.
description: >
This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true.
- name: OpAMPEndpoint
type: string
valuetype: assigndefault
Expand Down
15 changes: 12 additions & 3 deletions config_complete.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
## Honeycomb Refinery Configuration ##
######################################
#
# created on 2025-02-14 at 21:35:18 UTC from ../../config.yaml using a template generated on 2025-02-14 at 21:35:13 UTC
# created on 2025-03-06 at 18:09:15 UTC from ../../config.yaml using a template generated on 2025-03-06 at 18:09:09 UTC

# This file contains a configuration for the Honeycomb Refinery. It is in YAML
# format, organized into named groups, each of which contains a set of
Expand Down Expand Up @@ -133,15 +133,24 @@ Network:
## OpAMP support is experimental in Refinery.
##
## Not eligible for live reload.
OpAMPEnabled: true
# OpAMPEnabled: false

## OpAMPRecordUsage controls whether to record usage metrics.
##
## This setting is only enabled if both OpAMP is enabled and RecordUsage
## is set to true.
##
## default: true
## Eligible for live reload.
# OpAMPRecordUsage: true

## OpAMPEndpoint is the URL of the OpAMP server for this client.
##
## This setting is the URL of the OpAMP server for this client.
##
## default: wss://127.0.0.1:4320/v1/opamp
## Not eligible for live reload.
OpAMPEndpoint: ws://127.0.0.1:4320/v1/opamp
OpAMPEndpoint: wss://127.0.0.1:4320/v1/opamp

##############################
## Access Key Configuration ##
Expand Down
8 changes: 5 additions & 3 deletions metrics.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Honeycomb Refinery Metrics Documentation

This document contains the description of various metrics used in Refinery.
It was automatically generated on 2025-02-14 at 21:35:17 UTC.
It was automatically generated on 2025-03-06 at 18:09:14 UTC.

Note: This document does not include metrics defined in the dynsampler-go dependency, as those metrics are generated dynamically at runtime. As a result, certain metrics may be missing or incomplete in this document, but they will still be available during execution with their full names.

Expand All @@ -10,6 +10,8 @@ This table includes metrics with fully defined names.

| Name | Type | Unit | Description |
|------|------|------|-------------|
| is_ready | Gauge | Dimensionless | Whether the system is ready to receive traffic |
| is_alive | Gauge | Dimensionless | Whether the system is alive and reporting in |
| collect_cache_entries | Histogram | Dimensionless | The number of traces currently stored in the cache |
| cuckoo_current_capacity | Gauge | Dimensionless | current capacity of the cuckoo filter |
| cuckoo_future_load_factor | Gauge | Percent | the fraction of slots occupied in the future cuckoo filter |
Expand All @@ -18,8 +20,6 @@ This table includes metrics with fully defined names.
| cuckoo_addqueue_locktime_uS | Histogram | Microseconds | the time spent holding the add queue lock |
| cache_recent_dropped_traces | Gauge | Dimensionless | the current size of the most recent dropped trace cache |
| collect_sent_reasons_cache_entries | Histogram | Dimensionless | Number of entries in the sent reasons cache |
| is_ready | Gauge | Dimensionless | Whether the system is ready to receive traffic |
| is_alive | Gauge | Dimensionless | Whether the system is alive and reporting in |
| redis_pubsub_published | Counter | Dimensionless | Number of messages published to Redis PubSub |
| redis_pubsub_received | Counter | Dimensionless | Number of messages received from Redis PubSub |
| local_pubsub_published | Counter | Dimensionless | The total number of messages sent via the local pubsub implementation |
Expand Down Expand Up @@ -103,6 +103,8 @@ Metrics in this table don't contain their expected prefixes. This is because the
| _router_peer | Counter | Dimensionless | the number of spans proxied to a peer |
| _router_batch | Counter | Dimensionless | the number of batches of events received |
| _router_otlp | Counter | Dimensionless | the number of batches of otlp requests received |
| bytes_received_traces | Counter | Bytes | the number of bytes received in trace events |
| bytes_received_logs | Counter | Bytes | the number of bytes received in log events |
| queue_length | Gauge | Dimensionless | number of events waiting to be sent to destination |
| queue_overflow | Counter | Dimensionless | number of events dropped due to queue overflow |
| send_errors | Counter | Dimensionless | number of errors encountered while sending events to destination |
Expand Down
10 changes: 10 additions & 0 deletions refinery_config.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,16 @@ OpAMP support is experimental in Refinery.
- Not eligible for live reload.
- Type: `bool`

### `OpAMPRecordUsage`

`OpAMPRecordUsage` controls whether to record usage metrics.

This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true.

- Eligible for live reload.
- Type: `defaulttrue`
- Default: `true`

### `OpAMPEndpoint`

`OpAMPEndpoint` is the URL of the OpAMP server for this client.
Expand Down
6 changes: 2 additions & 4 deletions route/route.go
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,6 @@ func (r *Router) event(w http.ResponseWriter, req *http.Request) {
return
}

r.Metrics.Count("bytes_received_trace", len(reqBod))

ev, err := r.requestToEvent(ctx, req, reqBod)
if err != nil {
r.handlerReturnWithError(w, ErrReqToEvent, err)
Expand Down Expand Up @@ -608,8 +606,8 @@ func (r *Router) processEvent(ev *types.Event, reqID interface{}) error {
IsRoot: isRootSpan(ev, r.Config),
}

// only record bytes received for incoming traffic when opamp is enabled
if r.incomingOrPeer == "incoming" && r.Config.GetOpAMPConfig().Enabled {
// only record bytes received for incoming traffic when opamp is enabled and record usage is set to true
if r.incomingOrPeer == "incoming" && r.Config.GetOpAMPConfig().Enabled && r.Config.GetOpAMPConfig().RecordUsage.Get() {
if span.Data["meta.signal_type"] == "log" {
r.Metrics.Count("bytes_received_logs", span.GetDataSize())
} else {
Expand Down
123 changes: 123 additions & 0 deletions route/route_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -800,3 +800,126 @@ func TestAddIncomingUserAgent(t *testing.T) {
require.Equal(t, "test-agent", event.Data["meta.refinery.incoming_user_agent"])
})
}

func TestProcessEventMetrics(t *testing.T) {

tests := []struct {
name string
incomingOrPeer string
opampEnabled bool
recordUsage config.DefaultTrue
signalType string
expectedCount int
metricName string
}{
{
name: "log event with opamp enabled and record usage",
incomingOrPeer: "incoming",
opampEnabled: true,
recordUsage: config.DefaultTrue(true),
signalType: "log",
expectedCount: 30,
metricName: "bytes_received_logs",
},
{
name: "trace event with opamp enabled and record usage",
incomingOrPeer: "incoming",
opampEnabled: true,
recordUsage: config.DefaultTrue(true),
signalType: "trace",
expectedCount: 32,
metricName: "bytes_received_traces",
},
{
name: "log event with opamp disabled",
incomingOrPeer: "incoming",
opampEnabled: false,
recordUsage: config.DefaultTrue(true),
signalType: "log",
expectedCount: 0,
},
{
name: "log event with record usage disabled",
incomingOrPeer: "incoming",
opampEnabled: true,
recordUsage: config.DefaultTrue(false),
signalType: "log",
expectedCount: 0,
},
{
name: "log event from peer",
incomingOrPeer: "peer",
opampEnabled: true,
recordUsage: config.DefaultTrue(true),
signalType: "log",
expectedCount: 0,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
mockMetrics := &metrics.MockMetrics{}
mockMetrics.Start()

mockConfig := &config.MockConfig{
GetOpAmpConfigVal: config.OpAMPConfig{
Enabled: tt.opampEnabled,
RecordUsage: &tt.recordUsage,
},
TraceIdFieldNames: []string{"trace.trace_id"},
}

// Setup mock transmissions
mockUpstream := &transmit.MockTransmission{}
mockUpstream.Start()
mockPeer := &transmit.MockTransmission{}
mockPeer.Start()

mockSharder := &sharder.MockSharder{
Self: &sharder.TestShard{
Addr: "http://localhost:12345",
},
}

router := &Router{
Config: mockConfig,
Logger: &logger.NullLogger{},
Metrics: mockMetrics,
UpstreamTransmission: mockUpstream,
PeerTransmission: mockPeer,
Collector: collect.NewMockCollector(),
Sharder: mockSharder,
incomingOrPeer: tt.incomingOrPeer,
iopLogger: iopLogger{Logger: &logger.NullLogger{}, incomingOrPeer: tt.incomingOrPeer},
}

// Create test event with traceID and signal type
event := &types.Event{
Context: context.Background(),
APIHost: "test.honeycomb.io",
Dataset: "test-dataset",
Timestamp: time.Now(),
Data: map[string]interface{}{
"trace.trace_id": "trace-123",
"meta.signal_type": tt.signalType,
"test_attribute": "test_value",
"another_attribute": 123,
},
}
span := &types.Span{
Event: *event,
TraceID: "trace-123",
IsRoot: true,
}
size := span.GetDataSize()
if tt.expectedCount > 0 {
assert.Equal(t, tt.expectedCount, size)
}

// Call processEvent
err := router.processEvent(event, "request-123")
assert.NoError(t, err)
assert.Equal(t, tt.expectedCount, mockMetrics.CounterIncrements[tt.metricName])
})
}
}
4 changes: 3 additions & 1 deletion tools/convert/configDataNames.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Names of groups and fields in the new config file format.
# Automatically generated on 2025-02-14 at 21:35:14 UTC.
# Automatically generated on 2025-03-06 at 18:09:11 UTC.

General:
- ConfigurationVersion
Expand All @@ -22,6 +22,8 @@ Network:

- OpAMPEnabled

- OpAMPRecordUsage

- OpAMPEndpoint


Expand Down
Loading

0 comments on commit ea8eed9

Please sign in to comment.