From ea8eed9cd2838a7428dbea5075f9d9c55fdd6705 Mon Sep 17 00:00:00 2001 From: Yingrong Zhao <22300958+VinozzZ@users.noreply.github.com> Date: Thu, 6 Mar 2025 15:32:37 -0500 Subject: [PATCH] feat: add OpAMPRecordUsage option (#1500) ## Which problem is this PR solving? To make sure all traffic is only counted once, Refinery will only report usage if it is receiving traffic from client directly. ## Short description of the changes - add OpAMPRecordUsage config option and default to true - fix a metrics name bug - add unit test - generate config docs --- agent/agent.go | 4 +- config.md | 12 ++- config/config_serializer.go | 13 +-- config/file_config.go | 23 ++--- config/metadata/configMeta.yaml | 9 ++ config_complete.yaml | 15 +++- metrics.md | 8 +- refinery_config.md | 10 +++ route/route.go | 6 +- route/route_test.go | 123 ++++++++++++++++++++++++++ tools/convert/configDataNames.txt | 4 +- tools/convert/metricsMeta.yaml | 24 +++-- tools/convert/minimal_config.yaml | 3 +- tools/convert/templates/configV2.tmpl | 11 ++- 14 files changed, 225 insertions(+), 40 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 6cc0f5ca9f..3e06f306a1 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -209,11 +209,11 @@ func (agent *Agent) healthCheck() { } } - traceUsage, ok := agent.metrics.Get("bytes_received_trace") + traceUsage, ok := agent.metrics.Get("bytes_received_traces") if !ok { agent.logger.Errorf(context.Background(), "unexpected missing trace usage metric") } - logUsage, ok := agent.metrics.Get("bytes_received_log") + logUsage, ok := agent.metrics.Get("bytes_received_logs") if !ok { agent.logger.Errorf(context.Background(), "unexpected missing log usage metric") } diff --git a/config.md b/config.md index 308f59a2c5..17cb6cb05c 100644 --- a/config.md +++ b/config.md @@ -1,7 +1,7 @@ # Honeycomb Refinery Configuration Documentation This is the documentation for the configuration file for Honeycomb's Refinery. -It was automatically generated on 2025-02-14 at 21:35:19 UTC. +It was automatically generated on 2025-03-06 at 18:09:16 UTC. ## The Config file @@ -159,6 +159,16 @@ OpAMP support is experimental in Refinery. - Not eligible for live reload. - Type: `bool` +### `OpAMPRecordUsage` + +OpAMPRecordUsage controls whether to record usage metrics. + +This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true. + +- Eligible for live reload. +- Type: `bool` +- Default: `true` + ### `OpAMPEndpoint` OpAMPEndpoint is the URL of the OpAMP server for this client. diff --git a/config/config_serializer.go b/config/config_serializer.go index 9947b727d0..0e3c7bdabf 100644 --- a/config/config_serializer.go +++ b/config/config_serializer.go @@ -25,12 +25,13 @@ func populateConfigContents(cfg Config) configContents { return configContents{ General: cfg.GetGeneralConfig(), Network: NetworkConfig{ - ListenAddr: cfg.GetListenAddr(), - PeerListenAddr: cfg.GetPeerListenAddr(), - HoneycombAPI: cfg.GetHoneycombAPI(), - HTTPIdleTimeout: Duration(cfg.GetHTTPIdleTimeout()), - OpAMPEndpoint: opAMPConfig.Endpoint, - OpAMPEnabled: opAMPConfig.Enabled, + ListenAddr: cfg.GetListenAddr(), + PeerListenAddr: cfg.GetPeerListenAddr(), + HoneycombAPI: cfg.GetHoneycombAPI(), + HTTPIdleTimeout: Duration(cfg.GetHTTPIdleTimeout()), + OpAMPEndpoint: opAMPConfig.Endpoint, + OpAMPEnabled: opAMPConfig.Enabled, + OpAMPRecordUsage: getDefaultTrueValue(cfg.GetOpAMPConfig().RecordUsage.Get()), }, AccessKeys: cfg.GetAccessKeyConfig(), Telemetry: getRefineryTelemetryConfig(cfg), diff --git a/config/file_config.go b/config/file_config.go index 05cd732f33..7a9a0e4c19 100644 --- a/config/file_config.go +++ b/config/file_config.go @@ -78,17 +78,19 @@ type GeneralConfig struct { // TODO: Implement opamp config in its own config section once we are ready to release the feature type OpAMPConfig struct { - Endpoint string `yaml:"Endpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"` - Enabled bool `yaml:"Enabled" default:"false"` + Endpoint string `yaml:"Endpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"` + Enabled bool `yaml:"Enabled" default:"false"` + RecordUsage *DefaultTrue `yaml:"SendUsageReport" default:"true"` } type NetworkConfig struct { - ListenAddr string `yaml:"ListenAddr" default:"0.0.0.0:8080" cmdenv:"HTTPListenAddr"` - PeerListenAddr string `yaml:"PeerListenAddr" default:"0.0.0.0:8081" cmdenv:"PeerListenAddr"` - HoneycombAPI string `yaml:"HoneycombAPI" default:"https://api.honeycomb.io" cmdenv:"HoneycombAPI"` - HTTPIdleTimeout Duration `yaml:"HTTPIdleTimeout"` - OpAMPEndpoint string `yaml:"OpAMPEndpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"` - OpAMPEnabled bool `yaml:"OpAMPEnabled" default:"false"` + ListenAddr string `yaml:"ListenAddr" default:"0.0.0.0:8080" cmdenv:"HTTPListenAddr"` + PeerListenAddr string `yaml:"PeerListenAddr" default:"0.0.0.0:8081" cmdenv:"PeerListenAddr"` + HoneycombAPI string `yaml:"HoneycombAPI" default:"https://api.honeycomb.io" cmdenv:"HoneycombAPI"` + HTTPIdleTimeout Duration `yaml:"HTTPIdleTimeout"` + OpAMPEndpoint string `yaml:"OpAMPEndpoint" cmdenv:"OpAMPEndpoint" default:"wss://127.0.0.1:4320/v1/opamp"` + OpAMPEnabled bool `yaml:"OpAMPEnabled" default:"false"` + OpAMPRecordUsage *DefaultTrue `yaml:"OpAMPRecordUsage" default:"true"` } type AccessKeyConfig struct { @@ -959,8 +961,9 @@ func (f *fileConfig) GetOpAMPConfig() OpAMPConfig { defer f.mux.RUnlock() return OpAMPConfig{ - Enabled: f.mainConfig.Network.OpAMPEnabled, - Endpoint: f.mainConfig.Network.OpAMPEndpoint, + Enabled: f.mainConfig.Network.OpAMPEnabled, + Endpoint: f.mainConfig.Network.OpAMPEndpoint, + RecordUsage: f.mainConfig.Network.OpAMPRecordUsage, } } diff --git a/config/metadata/configMeta.yaml b/config/metadata/configMeta.yaml index 3768ebf652..910457b5d7 100644 --- a/config/metadata/configMeta.yaml +++ b/config/metadata/configMeta.yaml @@ -153,6 +153,15 @@ groups: summary: controls whether to enable OpAMP support. description: > OpAMP support is experimental in Refinery. + - name: OpAMPRecordUsage + type: defaulttrue + valuetype: nondefault + default: true + reload: true + firstversion: v2.9.4 + summary: controls whether to record usage metrics. + description: > + This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true. - name: OpAMPEndpoint type: string valuetype: assigndefault diff --git a/config_complete.yaml b/config_complete.yaml index db18704872..0016d8d2fc 100644 --- a/config_complete.yaml +++ b/config_complete.yaml @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created on 2025-02-14 at 21:35:18 UTC from ../../config.yaml using a template generated on 2025-02-14 at 21:35:13 UTC +# created on 2025-03-06 at 18:09:15 UTC from ../../config.yaml using a template generated on 2025-03-06 at 18:09:09 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -133,7 +133,16 @@ Network: ## OpAMP support is experimental in Refinery. ## ## Not eligible for live reload. - OpAMPEnabled: true + # OpAMPEnabled: false + + ## OpAMPRecordUsage controls whether to record usage metrics. + ## + ## This setting is only enabled if both OpAMP is enabled and RecordUsage + ## is set to true. + ## + ## default: true + ## Eligible for live reload. + # OpAMPRecordUsage: true ## OpAMPEndpoint is the URL of the OpAMP server for this client. ## @@ -141,7 +150,7 @@ Network: ## ## default: wss://127.0.0.1:4320/v1/opamp ## Not eligible for live reload. - OpAMPEndpoint: ws://127.0.0.1:4320/v1/opamp + OpAMPEndpoint: wss://127.0.0.1:4320/v1/opamp ############################## ## Access Key Configuration ## diff --git a/metrics.md b/metrics.md index 4c4bfe668d..9cf363ac6a 100644 --- a/metrics.md +++ b/metrics.md @@ -1,7 +1,7 @@ # Honeycomb Refinery Metrics Documentation This document contains the description of various metrics used in Refinery. -It was automatically generated on 2025-02-14 at 21:35:17 UTC. +It was automatically generated on 2025-03-06 at 18:09:14 UTC. Note: This document does not include metrics defined in the dynsampler-go dependency, as those metrics are generated dynamically at runtime. As a result, certain metrics may be missing or incomplete in this document, but they will still be available during execution with their full names. @@ -10,6 +10,8 @@ This table includes metrics with fully defined names. | Name | Type | Unit | Description | |------|------|------|-------------| +| is_ready | Gauge | Dimensionless | Whether the system is ready to receive traffic | +| is_alive | Gauge | Dimensionless | Whether the system is alive and reporting in | | collect_cache_entries | Histogram | Dimensionless | The number of traces currently stored in the cache | | cuckoo_current_capacity | Gauge | Dimensionless | current capacity of the cuckoo filter | | cuckoo_future_load_factor | Gauge | Percent | the fraction of slots occupied in the future cuckoo filter | @@ -18,8 +20,6 @@ This table includes metrics with fully defined names. | cuckoo_addqueue_locktime_uS | Histogram | Microseconds | the time spent holding the add queue lock | | cache_recent_dropped_traces | Gauge | Dimensionless | the current size of the most recent dropped trace cache | | collect_sent_reasons_cache_entries | Histogram | Dimensionless | Number of entries in the sent reasons cache | -| is_ready | Gauge | Dimensionless | Whether the system is ready to receive traffic | -| is_alive | Gauge | Dimensionless | Whether the system is alive and reporting in | | redis_pubsub_published | Counter | Dimensionless | Number of messages published to Redis PubSub | | redis_pubsub_received | Counter | Dimensionless | Number of messages received from Redis PubSub | | local_pubsub_published | Counter | Dimensionless | The total number of messages sent via the local pubsub implementation | @@ -103,6 +103,8 @@ Metrics in this table don't contain their expected prefixes. This is because the | _router_peer | Counter | Dimensionless | the number of spans proxied to a peer | | _router_batch | Counter | Dimensionless | the number of batches of events received | | _router_otlp | Counter | Dimensionless | the number of batches of otlp requests received | +| bytes_received_traces | Counter | Bytes | the number of bytes received in trace events | +| bytes_received_logs | Counter | Bytes | the number of bytes received in log events | | queue_length | Gauge | Dimensionless | number of events waiting to be sent to destination | | queue_overflow | Counter | Dimensionless | number of events dropped due to queue overflow | | send_errors | Counter | Dimensionless | number of errors encountered while sending events to destination | diff --git a/refinery_config.md b/refinery_config.md index 745e3b6ecf..45c5e083e2 100644 --- a/refinery_config.md +++ b/refinery_config.md @@ -135,6 +135,16 @@ OpAMP support is experimental in Refinery. - Not eligible for live reload. - Type: `bool` +### `OpAMPRecordUsage` + +`OpAMPRecordUsage` controls whether to record usage metrics. + +This setting is only enabled if both OpAMP is enabled and RecordUsage is set to true. + +- Eligible for live reload. +- Type: `defaulttrue` +- Default: `true` + ### `OpAMPEndpoint` `OpAMPEndpoint` is the URL of the OpAMP server for this client. diff --git a/route/route.go b/route/route.go index 5aa0c50bfa..6df20c98fc 100644 --- a/route/route.go +++ b/route/route.go @@ -396,8 +396,6 @@ func (r *Router) event(w http.ResponseWriter, req *http.Request) { return } - r.Metrics.Count("bytes_received_trace", len(reqBod)) - ev, err := r.requestToEvent(ctx, req, reqBod) if err != nil { r.handlerReturnWithError(w, ErrReqToEvent, err) @@ -608,8 +606,8 @@ func (r *Router) processEvent(ev *types.Event, reqID interface{}) error { IsRoot: isRootSpan(ev, r.Config), } - // only record bytes received for incoming traffic when opamp is enabled - if r.incomingOrPeer == "incoming" && r.Config.GetOpAMPConfig().Enabled { + // only record bytes received for incoming traffic when opamp is enabled and record usage is set to true + if r.incomingOrPeer == "incoming" && r.Config.GetOpAMPConfig().Enabled && r.Config.GetOpAMPConfig().RecordUsage.Get() { if span.Data["meta.signal_type"] == "log" { r.Metrics.Count("bytes_received_logs", span.GetDataSize()) } else { diff --git a/route/route_test.go b/route/route_test.go index 8ab45cf01d..5007a55238 100644 --- a/route/route_test.go +++ b/route/route_test.go @@ -800,3 +800,126 @@ func TestAddIncomingUserAgent(t *testing.T) { require.Equal(t, "test-agent", event.Data["meta.refinery.incoming_user_agent"]) }) } + +func TestProcessEventMetrics(t *testing.T) { + + tests := []struct { + name string + incomingOrPeer string + opampEnabled bool + recordUsage config.DefaultTrue + signalType string + expectedCount int + metricName string + }{ + { + name: "log event with opamp enabled and record usage", + incomingOrPeer: "incoming", + opampEnabled: true, + recordUsage: config.DefaultTrue(true), + signalType: "log", + expectedCount: 30, + metricName: "bytes_received_logs", + }, + { + name: "trace event with opamp enabled and record usage", + incomingOrPeer: "incoming", + opampEnabled: true, + recordUsage: config.DefaultTrue(true), + signalType: "trace", + expectedCount: 32, + metricName: "bytes_received_traces", + }, + { + name: "log event with opamp disabled", + incomingOrPeer: "incoming", + opampEnabled: false, + recordUsage: config.DefaultTrue(true), + signalType: "log", + expectedCount: 0, + }, + { + name: "log event with record usage disabled", + incomingOrPeer: "incoming", + opampEnabled: true, + recordUsage: config.DefaultTrue(false), + signalType: "log", + expectedCount: 0, + }, + { + name: "log event from peer", + incomingOrPeer: "peer", + opampEnabled: true, + recordUsage: config.DefaultTrue(true), + signalType: "log", + expectedCount: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + mockMetrics := &metrics.MockMetrics{} + mockMetrics.Start() + + mockConfig := &config.MockConfig{ + GetOpAmpConfigVal: config.OpAMPConfig{ + Enabled: tt.opampEnabled, + RecordUsage: &tt.recordUsage, + }, + TraceIdFieldNames: []string{"trace.trace_id"}, + } + + // Setup mock transmissions + mockUpstream := &transmit.MockTransmission{} + mockUpstream.Start() + mockPeer := &transmit.MockTransmission{} + mockPeer.Start() + + mockSharder := &sharder.MockSharder{ + Self: &sharder.TestShard{ + Addr: "http://localhost:12345", + }, + } + + router := &Router{ + Config: mockConfig, + Logger: &logger.NullLogger{}, + Metrics: mockMetrics, + UpstreamTransmission: mockUpstream, + PeerTransmission: mockPeer, + Collector: collect.NewMockCollector(), + Sharder: mockSharder, + incomingOrPeer: tt.incomingOrPeer, + iopLogger: iopLogger{Logger: &logger.NullLogger{}, incomingOrPeer: tt.incomingOrPeer}, + } + + // Create test event with traceID and signal type + event := &types.Event{ + Context: context.Background(), + APIHost: "test.honeycomb.io", + Dataset: "test-dataset", + Timestamp: time.Now(), + Data: map[string]interface{}{ + "trace.trace_id": "trace-123", + "meta.signal_type": tt.signalType, + "test_attribute": "test_value", + "another_attribute": 123, + }, + } + span := &types.Span{ + Event: *event, + TraceID: "trace-123", + IsRoot: true, + } + size := span.GetDataSize() + if tt.expectedCount > 0 { + assert.Equal(t, tt.expectedCount, size) + } + + // Call processEvent + err := router.processEvent(event, "request-123") + assert.NoError(t, err) + assert.Equal(t, tt.expectedCount, mockMetrics.CounterIncrements[tt.metricName]) + }) + } +} diff --git a/tools/convert/configDataNames.txt b/tools/convert/configDataNames.txt index 1323540f56..828617b219 100644 --- a/tools/convert/configDataNames.txt +++ b/tools/convert/configDataNames.txt @@ -1,5 +1,5 @@ # Names of groups and fields in the new config file format. -# Automatically generated on 2025-02-14 at 21:35:14 UTC. +# Automatically generated on 2025-03-06 at 18:09:11 UTC. General: - ConfigurationVersion @@ -22,6 +22,8 @@ Network: - OpAMPEnabled + - OpAMPRecordUsage + - OpAMPEndpoint diff --git a/tools/convert/metricsMeta.yaml b/tools/convert/metricsMeta.yaml index 31267e5a6e..f8550c0bb9 100644 --- a/tools/convert/metricsMeta.yaml +++ b/tools/convert/metricsMeta.yaml @@ -1,4 +1,12 @@ complete: + - name: is_ready + type: Gauge + unit: Dimensionless + description: Whether the system is ready to receive traffic + - name: is_alive + type: Gauge + unit: Dimensionless + description: Whether the system is alive and reporting in - name: collect_cache_entries type: Histogram unit: Dimensionless @@ -31,14 +39,6 @@ complete: type: Histogram unit: Dimensionless description: Number of entries in the sent reasons cache - - name: is_ready - type: Gauge - unit: Dimensionless - description: Whether the system is ready to receive traffic - - name: is_alive - type: Gauge - unit: Dimensionless - description: Whether the system is alive and reporting in - name: redis_pubsub_published type: Counter unit: Dimensionless @@ -340,6 +340,14 @@ hasprefix: type: Counter unit: Dimensionless description: the number of batches of otlp requests received + - name: bytes_received_traces + type: Counter + unit: Bytes + description: the number of bytes received in trace events + - name: bytes_received_logs + type: Counter + unit: Bytes + description: the number of bytes received in log events - name: queue_length type: Gauge unit: Dimensionless diff --git a/tools/convert/minimal_config.yaml b/tools/convert/minimal_config.yaml index b30a263c78..efb3b78791 100644 --- a/tools/convert/minimal_config.yaml +++ b/tools/convert/minimal_config.yaml @@ -1,5 +1,5 @@ # sample uncommented config file containing all possible fields -# automatically generated on 2025-02-14 at 21:35:15 UTC +# automatically generated on 2025-03-06 at 18:09:11 UTC General: ConfigurationVersion: 2 MinRefineryVersion: "v2.0" @@ -11,6 +11,7 @@ Network: HTTPIdleTimeout: 0s HoneycombAPI: "https://api.honeycomb.io" OpAMPEnabled: false + OpAMPRecordUsage: true OpAMPEndpoint: "wss://127.0.0.1:4320/v1/opamp" AccessKeys: ReceiveKeys: diff --git a/tools/convert/templates/configV2.tmpl b/tools/convert/templates/configV2.tmpl index b53347bdbb..a5967eb0ee 100644 --- a/tools/convert/templates/configV2.tmpl +++ b/tools/convert/templates/configV2.tmpl @@ -2,7 +2,7 @@ ## Honeycomb Refinery Configuration ## ###################################### # -# created {{ now }} from {{ .Input }} using a template generated on 2025-02-14 at 21:35:13 UTC +# created {{ now }} from {{ .Input }} using a template generated on 2025-03-06 at 18:09:09 UTC # This file contains a configuration for the Honeycomb Refinery. It is in YAML # format, organized into named groups, each of which contains a set of @@ -135,6 +135,15 @@ Network: ## Not eligible for live reload. {{ nonDefaultOnly .Data "OpAMPEnabled" "OpAMPEnabled" false }} + ## OpAMPRecordUsage controls whether to record usage metrics. + ## + ## This setting is only enabled if both OpAMP is enabled and RecordUsage + ## is set to true. + ## + ## default: true + ## Eligible for live reload. + {{ nonDefaultOnly .Data "OpAMPRecordUsage" "OpAMPRecordUsage" true }} + ## OpAMPEndpoint is the URL of the OpAMP server for this client. ## ## This setting is the URL of the OpAMP server for this client.