Skip to content

Commit 67939bd

Browse files
committed
Handler metrics
1 parent f6ebbd1 commit 67939bd

File tree

6 files changed

+121
-82
lines changed

6 files changed

+121
-82
lines changed

cmd/gcp/main.go

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ import (
2828
"syscall"
2929
"time"
3030

31-
"github.com/prometheus/client_golang/prometheus/promhttp"
3231
sctfe "github.com/transparency-dev/static-ct"
3332
"github.com/transparency-dev/static-ct/storage"
3433
gcpSCTFE "github.com/transparency-dev/static-ct/storage/gcp"
@@ -49,7 +48,6 @@ var (
4948
notAfterLimit timestampFlag
5049

5150
httpEndpoint = flag.String("http_endpoint", "localhost:6962", "Endpoint for HTTP (host:port).")
52-
metricsEndpoint = flag.String("metrics_endpoint", "", "Endpoint for serving metrics; if left empty, metrics will be visible on --http_endpoint.")
5351
httpDeadline = flag.Duration("http_deadline", time.Second*10, "Deadline for HTTP requests.")
5452
maskInternalErrors = flag.Bool("mask_internal_errors", false, "Don't return error strings with Internal Server Error HTTP responses.")
5553
origin = flag.String("origin", "", "Origin of the log, for checkpoints and the monitoring prefix.")
@@ -99,25 +97,6 @@ func main() {
9997
klog.Info("**** CT HTTP Server Starting ****")
10098
http.Handle("/", logHandler)
10199

102-
metricsAt := *metricsEndpoint
103-
if metricsAt == "" {
104-
metricsAt = *httpEndpoint
105-
}
106-
107-
if metricsAt != *httpEndpoint {
108-
// Run a separate handler for metrics.
109-
go func() {
110-
mux := http.NewServeMux()
111-
mux.Handle("/metrics", promhttp.Handler())
112-
metricsServer := http.Server{Addr: metricsAt, Handler: mux}
113-
err := metricsServer.ListenAndServe()
114-
klog.Warningf("Metrics server exited: %v", err)
115-
}()
116-
} else {
117-
// Handle metrics on the DefaultServeMux.
118-
http.Handle("/metrics", promhttp.Handler())
119-
}
120-
121100
// Bring up the HTTP server and serve until we get a signal not to.
122101
srv := http.Server{Addr: *httpEndpoint}
123102
shutdownWG := new(sync.WaitGroup)

ctlog.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,7 @@ func NewLogHandler(ctx context.Context, origin string, signer crypto.Signer, cfg
138138
TimeSource: sysTimeSource,
139139
}
140140

141-
handlers := scti.NewPathHandlers(opts, log)
141+
handlers := scti.NewPathHandlers(ctx, opts, log)
142142
mux := http.NewServeMux()
143143
// Register handlers for all the configured logs.
144144
for path, handler := range handlers {

internal/otel/cast.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Copyright 2025 The Tessera authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package otel
16+
17+
import "math"
18+
19+
var (
20+
// LatencyHistogramBuckets is a range of millisecond scale bucket boundaries which remain useful at around 1-2 seconds timescale in addition to smaller latencies.
21+
LatencyHistogramBuckets = []float64{0, 10, 50, 100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, 4000, 5000, 6000, 8000, 10000}
22+
)
23+
24+
// Clamp64 casts a uint64 to an int64, clamping it at MaxInt64 if the value is above.
25+
//
26+
// Intended only for converting Tessera uint64 internal values to int64 for use with
27+
// open telemetry metrics.
28+
func Clamp64(u uint64) int64 {
29+
if u > math.MaxInt64 {
30+
return math.MaxInt64
31+
}
32+
return int64(u)
33+
}

internal/scti/handlers.go

Lines changed: 41 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -24,19 +24,18 @@ import (
2424
"fmt"
2525
"io"
2626
"net/http"
27-
"strconv"
2827
"strings"
2928
"sync"
3029
"time"
3130

32-
"github.com/prometheus/client_golang/prometheus"
33-
"github.com/prometheus/client_golang/prometheus/promauto"
31+
"github.com/transparency-dev/static-ct/internal/otel"
3432
"github.com/transparency-dev/static-ct/internal/types/rfc6962"
3533
"github.com/transparency-dev/static-ct/internal/types/tls"
3634
"github.com/transparency-dev/static-ct/internal/x509util"
3735
"github.com/transparency-dev/static-ct/modules/dedup"
3836
tessera "github.com/transparency-dev/trillian-tessera"
3937
"github.com/transparency-dev/trillian-tessera/ctonly"
38+
"go.opentelemetry.io/otel/metric"
4039
"k8s.io/klog/v2"
4140
)
4241

@@ -50,7 +49,7 @@ const (
5049
)
5150

5251
// entrypointName identifies a CT entrypoint as defined in section 4 of RFC 6962.
53-
type entrypointName string
52+
type entrypointName = string
5453

5554
// Constants for entrypoint names, as exposed in statistics/logging.
5655
const (
@@ -63,53 +62,38 @@ var (
6362
// Metrics are all per-log (label "origin"), but may also be
6463
// per-entrypoint (label "ep") or per-return-code (label "rc").
6564
once sync.Once
66-
knownLogs *prometheus.GaugeVec // origin => value (always 1.0)
67-
lastSCTIndex *prometheus.GaugeVec // origin => value
68-
lastSCTTimestamp *prometheus.GaugeVec // origin => value
69-
reqsCounter *prometheus.CounterVec // origin, op => value
70-
rspsCounter *prometheus.CounterVec // origin, op, code => value
71-
rspLatency *prometheus.HistogramVec // origin, op, code => value
65+
knownLogs metric.Int64Gauge // origin => value (always 1.0)
66+
lastSCTIndex metric.Int64Gauge // origin => value
67+
lastSCTTimestamp metric.Int64Gauge // origin => value
68+
reqsCounter metric.Int64Counter // origin, op => value
69+
rspsCounter metric.Int64Counter // origin, op, code => value
70+
rspLatency metric.Float64Histogram // origin, op, code => value
7271
)
7372

7473
// setupMetrics initializes all the exported metrics.
7574
func setupMetrics() {
7675
// TODO(phboneff): add metrics for deduplication and chain storage.
77-
knownLogs = promauto.NewGaugeVec(
78-
prometheus.GaugeOpts{
79-
Name: "known_logs",
80-
Help: "Set to 1 for known logs",
81-
},
82-
[]string{"origin"})
83-
lastSCTTimestamp = promauto.NewGaugeVec(
84-
prometheus.GaugeOpts{
85-
Name: "last_sct_timestamp",
86-
Help: "Time of last SCT in ms since epoch",
87-
},
88-
[]string{"origin"})
89-
lastSCTIndex = promauto.NewGaugeVec(
90-
prometheus.GaugeOpts{
91-
Name: "last_sct_index",
92-
Help: "Index of last SCT",
93-
},
94-
[]string{"origin"})
95-
reqsCounter = promauto.NewCounterVec(
96-
prometheus.CounterOpts{
97-
Name: "http_reqs",
98-
Help: "Number of requests",
99-
},
100-
[]string{"origin", "ep"})
101-
rspsCounter = promauto.NewCounterVec(
102-
prometheus.CounterOpts{
103-
Name: "http_rsps",
104-
Help: "Number of responses",
105-
},
106-
[]string{"origin", "op", "code"})
107-
rspLatency = promauto.NewHistogramVec(
108-
prometheus.HistogramOpts{
109-
Name: "http_latency",
110-
Help: "Latency of responses in seconds",
111-
},
112-
[]string{"origin", "op", "code"})
76+
knownLogs = mustCreate(meter.Int64Gauge("tesseract.known_logs",
77+
metric.WithDescription("Set to 1 for known logs")))
78+
79+
lastSCTTimestamp = mustCreate(meter.Int64Gauge("tesseract.last_sct.timestamp",
80+
metric.WithDescription("Time of last SCT since epoch"),
81+
metric.WithUnit("ms")))
82+
83+
lastSCTIndex = mustCreate(meter.Int64Gauge("tesseract.last_sct.index",
84+
metric.WithDescription("Index of last SCT"),
85+
metric.WithUnit("{entry}")))
86+
87+
reqsCounter = mustCreate(meter.Int64Counter("tesseract.http_request.count",
88+
metric.WithDescription("CT HTTP requests")))
89+
90+
rspsCounter = mustCreate(meter.Int64Counter("tesseract.http_response.count",
91+
metric.WithDescription("CT HTTP responses")))
92+
93+
rspLatency = mustCreate(meter.Float64Histogram("tesseract.http_response.duration",
94+
metric.WithDescription("CT HTTP response duration"),
95+
metric.WithExplicitBucketBoundaries(otel.LatencyHistogramBuckets...),
96+
metric.WithUnit("ms")))
11397
}
11498

11599
// entrypoints is a list of entrypoint names as exposed in statistics/logging.
@@ -132,16 +116,18 @@ type appHandler struct {
132116
// does additional common error and stats processing.
133117
func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
134118
var statusCode int
135-
label0 := a.log.origin
136-
label1 := string(a.name)
137-
reqsCounter.WithLabelValues(label0, label1).Inc()
119+
120+
originAttr := originKey.String(a.log.origin)
121+
operationAttr := operationKey.String(a.name)
122+
reqsCounter.Add(r.Context(), 1, metric.WithAttributes(originAttr, operationAttr))
138123
startTime := a.opts.TimeSource.Now()
139124
logCtx := a.opts.RequestLog.start(r.Context())
140125
a.opts.RequestLog.origin(logCtx, a.log.origin)
141126
defer func() {
142127
latency := a.opts.TimeSource.Now().Sub(startTime).Seconds()
143-
rspLatency.WithLabelValues(label0, label1, strconv.Itoa(statusCode)).Observe(latency)
128+
rspLatency.Record(r.Context(), latency, metric.WithAttributes(originAttr, operationAttr, codeKey.Int(statusCode)))
144129
}()
130+
145131
klog.V(2).Infof("%s: request %v %q => %s", a.log.origin, r.Method, r.URL, a.name)
146132
// TODO(phboneff): add a.Method directly on the handler path and remove this test.
147133
if r.Method != a.method {
@@ -169,7 +155,7 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
169155
statusCode, err = a.handler(ctx, a.opts, a.log, w, r)
170156
a.opts.RequestLog.status(ctx, statusCode)
171157
klog.V(2).Infof("%s: %s <= st=%d", a.log.origin, a.name, statusCode)
172-
rspsCounter.WithLabelValues(label0, label1, strconv.Itoa(statusCode)).Inc()
158+
rspsCounter.Add(r.Context(), 1, metric.WithAttributes(originAttr, operationAttr, codeKey.Int(statusCode)))
173159
if err != nil {
174160
klog.Warningf("%s: %s handler error: %v", a.log.origin, a.name, err)
175161
a.opts.sendHTTPError(w, statusCode, err)
@@ -197,9 +183,9 @@ type HandlerOptions struct {
197183
TimeSource TimeSource
198184
}
199185

200-
func NewPathHandlers(opts *HandlerOptions, log *log) pathHandlers {
186+
func NewPathHandlers(ctx context.Context, opts *HandlerOptions, log *log) pathHandlers {
201187
once.Do(func() { setupMetrics() })
202-
knownLogs.WithLabelValues(log.origin).Set(1.0)
188+
knownLogs.Record(ctx, 1, metric.WithAttributes(originKey.String(log.origin)))
203189

204190
prefix := strings.TrimRight(log.origin, "/")
205191
if !strings.HasPrefix(prefix, "/") {
@@ -351,8 +337,8 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt
351337
}
352338
klog.V(3).Infof("%s: %s <= SCT", log.origin, method)
353339
if sct.Timestamp == timeMillis {
354-
lastSCTTimestamp.WithLabelValues(log.origin).Set(float64(sct.Timestamp))
355-
lastSCTIndex.WithLabelValues(log.origin).Set(float64(idx))
340+
lastSCTTimestamp.Record(ctx, otel.Clamp64(sct.Timestamp), metric.WithAttributes(originKey.String(log.origin)))
341+
lastSCTIndex.Record(ctx, otel.Clamp64(idx), metric.WithAttributes(originKey.String(log.origin)))
356342
}
357343

358344
return http.StatusOK, nil

internal/scti/handlers_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ func setupTestLog(t *testing.T) (*log, string) {
123123
func setupTestServer(t *testing.T, log *log, path string) *httptest.Server {
124124
t.Helper()
125125

126-
handlers := NewPathHandlers(&hOpts, log)
126+
handlers := NewPathHandlers(t.Context(), &hOpts, log)
127127
handler, ok := handlers[path]
128128
if !ok {
129129
t.Fatalf("Handler not found: %s", path)
@@ -208,7 +208,7 @@ func postHandlers(t *testing.T, handlers pathHandlers) pathHandlers {
208208

209209
func TestPostHandlersRejectGet(t *testing.T) {
210210
log, _ := setupTestLog(t)
211-
handlers := NewPathHandlers(&hOpts, log)
211+
handlers := NewPathHandlers(t.Context(), &hOpts, log)
212212

213213
// Anything in the post handler list should reject GET
214214
for path, handler := range postHandlers(t, handlers) {
@@ -229,7 +229,7 @@ func TestPostHandlersRejectGet(t *testing.T) {
229229

230230
func TestGetHandlersRejectPost(t *testing.T) {
231231
log, _ := setupTestLog(t)
232-
handlers := NewPathHandlers(&hOpts, log)
232+
handlers := NewPathHandlers(t.Context(), &hOpts, log)
233233

234234
// Anything in the get handler list should reject POST.
235235
for path, handler := range getHandlers(t, handlers) {
@@ -262,7 +262,7 @@ func TestPostHandlersFailure(t *testing.T) {
262262
}
263263

264264
log, _ := setupTestLog(t)
265-
handlers := NewPathHandlers(&hOpts, log)
265+
handlers := NewPathHandlers(t.Context(), &hOpts, log)
266266

267267
for path, handler := range postHandlers(t, handlers) {
268268
t.Run(path, func(t *testing.T) {
@@ -285,7 +285,7 @@ func TestPostHandlersFailure(t *testing.T) {
285285
func TestNewPathHandlers(t *testing.T) {
286286
log, _ := setupTestLog(t)
287287
t.Run("Handlers", func(t *testing.T) {
288-
handlers := NewPathHandlers(&HandlerOptions{}, log)
288+
handlers := NewPathHandlers(t.Context(), &HandlerOptions{}, log)
289289
// Check each entrypoint has a handler
290290
if got, want := len(handlers), len(entrypoints); got != want {
291291
t.Fatalf("len(info.handler)=%d; want %d", got, want)

internal/scti/otel.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// Copyright 2025 The Tessera authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package scti
16+
17+
import (
18+
"go.opentelemetry.io/otel"
19+
"go.opentelemetry.io/otel/attribute"
20+
"k8s.io/klog/v2"
21+
)
22+
23+
const name = "github.com/transparency-dev/static-ct/internal/scti"
24+
25+
var (
26+
meter = otel.Meter(name)
27+
tracer = otel.Tracer(name)
28+
)
29+
30+
var (
31+
codeKey = attribute.Key("tesseract.code")
32+
operationKey = attribute.Key("tesseract.operation")
33+
originKey = attribute.Key("tesseract.origin")
34+
)
35+
36+
func mustCreate[T any](t T, err error) T {
37+
if err != nil {
38+
klog.Exit(err.Error())
39+
}
40+
return t
41+
}

0 commit comments

Comments
 (0)