Skip to content

Drop dependency on trillian/monitoring #117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 53 additions & 20 deletions internal/scti/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ import (

"github.com/google/certificate-transparency-go/tls"
"github.com/google/certificate-transparency-go/x509"
"github.com/google/trillian/monitoring"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/transparency-dev/static-ct/modules/dedup"
tessera "github.com/transparency-dev/trillian-tessera"
"github.com/transparency-dev/trillian-tessera/ctonly"
Expand Down Expand Up @@ -62,20 +63,53 @@ var (
// Metrics are all per-log (label "origin"), but may also be
// per-entrypoint (label "ep") or per-return-code (label "rc").
once sync.Once
knownLogs monitoring.Gauge // origin => value (always 1.0)
lastSCTTimestamp monitoring.Gauge // origin => value
reqsCounter monitoring.Counter // origin, ep => value
rspsCounter monitoring.Counter // origin, ep, rc => value
rspLatency monitoring.Histogram // origin, ep, rc => value
knownLogs *prometheus.GaugeVec // origin => value (always 1.0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could just remove this one, I suspect?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've left it on purpose: this will allow to build global dashboard with all the log names for instance, if multiple stacks exist. It's a nice way to extract the origin, and protects us from bugs like nan values on some metrics because no request is flying around.

lastSCTIndex *prometheus.GaugeVec // origin => value
lastSCTTimestamp *prometheus.GaugeVec // origin => value
reqsCounter *prometheus.CounterVec // origin, op => value
rspsCounter *prometheus.CounterVec // origin, op, code => value
rspLatency *prometheus.HistogramVec // origin, op, code => value
)

// setupMetrics initializes all the exported metrics.
func setupMetrics(mf monitoring.MetricFactory) {
knownLogs = mf.NewGauge("known_logs", "Set to 1 for known logs", "logid")
lastSCTTimestamp = mf.NewGauge("last_sct_timestamp", "Time of last SCT in ms since epoch", "logid")
reqsCounter = mf.NewCounter("http_reqs", "Number of requests", "logid", "ep")
rspsCounter = mf.NewCounter("http_rsps", "Number of responses", "logid", "ep", "rc")
rspLatency = mf.NewHistogram("http_latency", "Latency of responses in seconds", "logid", "ep", "rc")
func setupMetrics() {
// TODO(phboneff): add metrics for deduplication and chain storage.
knownLogs = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "known_logs",
Help: "Set to 1 for known logs",
},
[]string{"origin"})
lastSCTTimestamp = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_sct_timestamp",
Help: "Time of last SCT in ms since epoch",
},
[]string{"origin"})
lastSCTIndex = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "last_sct_index",
Help: "Index of last SCT",
},
[]string{"origin"})
reqsCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_reqs",
Help: "Number of requests",
},
[]string{"origin", "ep"})
rspsCounter = promauto.NewCounterVec(
prometheus.CounterOpts{
Name: "http_rsps",
Help: "Number of responses",
},
[]string{"origin", "op", "code"})
rspLatency = promauto.NewHistogramVec(
prometheus.HistogramOpts{
Name: "http_latency",
Help: "Latency of responses in seconds",
},
[]string{"origin", "op", "code"})
}

// entrypoints is a list of entrypoint names as exposed in statistics/logging.
Expand All @@ -100,13 +134,13 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var statusCode int
label0 := a.log.origin
label1 := string(a.name)
reqsCounter.Inc(label0, label1)
reqsCounter.WithLabelValues(label0, label1).Inc()
startTime := a.opts.TimeSource.Now()
logCtx := a.opts.RequestLog.start(r.Context())
a.opts.RequestLog.origin(logCtx, a.log.origin)
defer func() {
latency := a.opts.TimeSource.Now().Sub(startTime).Seconds()
rspLatency.Observe(latency, label0, label1, strconv.Itoa(statusCode))
rspLatency.WithLabelValues(label0, label1, strconv.Itoa(statusCode)).Observe(latency)
}()
klog.V(2).Infof("%s: request %v %q => %s", a.log.origin, r.Method, r.URL, a.name)
// TODO(phboneff): add a.Method directly on the handler path and remove this test.
Expand Down Expand Up @@ -135,7 +169,7 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
statusCode, err = a.handler(ctx, a.opts, a.log, w, r)
a.opts.RequestLog.status(ctx, statusCode)
klog.V(2).Infof("%s: %s <= st=%d", a.log.origin, a.name, statusCode)
rspsCounter.Inc(label0, label1, strconv.Itoa(statusCode))
rspsCounter.WithLabelValues(label0, label1, strconv.Itoa(statusCode)).Inc()
if err != nil {
klog.Warningf("%s: %s handler error: %v", a.log.origin, a.name, err)
a.opts.sendHTTPError(w, statusCode, err)
Expand All @@ -154,8 +188,6 @@ func (a appHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
type HandlerOptions struct {
// Deadline is a timeout for HTTP requests.
Deadline time.Duration
// MetricFactory allows creating metrics.
MetricFactory monitoring.MetricFactory
// RequestLog provides structured logging of CTFE requests.
RequestLog requestLog
// MaskInternalErrors indicates if internal server errors should be masked
Expand All @@ -166,8 +198,8 @@ type HandlerOptions struct {
}

func NewPathHandlers(opts *HandlerOptions, log *log) pathHandlers {
once.Do(func() { setupMetrics(opts.MetricFactory) })
knownLogs.Set(1.0, log.origin)
once.Do(func() { setupMetrics() })
knownLogs.WithLabelValues(log.origin).Set(1.0)

prefix := strings.TrimRight(log.origin, "/")

Expand Down Expand Up @@ -314,7 +346,8 @@ func addChainInternal(ctx context.Context, opts *HandlerOptions, log *log, w htt
}
klog.V(3).Infof("%s: %s <= SCT", log.origin, method)
if sct.Timestamp == timeMillis {
lastSCTTimestamp.Set(float64(sct.Timestamp), log.origin)
lastSCTTimestamp.WithLabelValues(log.origin).Set(float64(sct.Timestamp))
lastSCTIndex.WithLabelValues(log.origin).Set(float64(idx))
}

return http.StatusOK, nil
Expand Down
8 changes: 3 additions & 5 deletions internal/scti/handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"github.com/google/certificate-transparency-go/x509util"
"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/google/trillian/monitoring"
"github.com/transparency-dev/static-ct/internal/testdata"
"github.com/transparency-dev/static-ct/mockstorage"
"github.com/transparency-dev/static-ct/modules/dedup"
Expand Down Expand Up @@ -96,10 +95,9 @@ func setupTest(t *testing.T, pemRoots []string, signer crypto.Signer) handlerTes
}

hOpts := HandlerOptions{
Deadline: time.Millisecond * 500,
MetricFactory: monitoring.InertMetricFactory{},
RequestLog: new(DefaultRequestLog),
TimeSource: fakeTimeSource,
Deadline: time.Millisecond * 500,
RequestLog: new(DefaultRequestLog),
TimeSource: fakeTimeSource,
}
signSCT := func(leaf *ct.MerkleTreeLeaf) (*ct.SignedCertificateTimestamp, error) {
return buildV1SCT(signer, leaf)
Expand Down
Loading