From 559d8f47143a3d3890fd85210a101bf4dee32a1b Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:14:45 +0000 Subject: [PATCH 01/10] Change route / to only accept GET and HEAD method --- changelogs/9.0.asciidoc | 1 + internal/beater/api/mux_root_test.go | 57 +++++++++++++++++++++++++--- internal/beater/api/root/handler.go | 12 +++++- 3 files changed, 62 insertions(+), 8 deletions(-) diff --git a/changelogs/9.0.asciidoc b/changelogs/9.0.asciidoc index d890048ab25..e0e75ebb4e3 100644 --- a/changelogs/9.0.asciidoc +++ b/changelogs/9.0.asciidoc @@ -16,6 +16,7 @@ https://github.com/elastic/apm-server/compare/v\...v9.0.0[View commits] [float] ==== Breaking Changes - Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] {pull}15524[15524] +- Change route `/` to only accept GET and HEAD requests, and return 405 Method Not Allowed otherwise. [float] ==== Deprecations diff --git a/internal/beater/api/mux_root_test.go b/internal/beater/api/mux_root_test.go index 2b2af7dcc47..2d2d4956c56 100644 --- a/internal/beater/api/mux_root_test.go +++ b/internal/beater/api/mux_root_test.go @@ -20,6 +20,7 @@ package api import ( "encoding/json" "net/http" + "net/http/httptest" "testing" "github.com/stretchr/testify/assert" @@ -27,6 +28,7 @@ import ( "github.com/elastic/apm-server/internal/beater/config" "github.com/elastic/apm-server/internal/beater/headers" + "github.com/elastic/apm-server/internal/beater/monitoringtest" "github.com/elastic/apm-server/internal/beater/request" "github.com/elastic/apm-server/internal/version" ) @@ -63,10 +65,53 @@ func TestRootHandler_PanicMiddleware(t *testing.T) { } func TestRootHandler_MonitoringMiddleware(t *testing.T) { - testMonitoringMiddleware(t, "/", map[string]any{ - "http.server." + string(request.IDRequestCount): 1, - "http.server." + string(request.IDResponseCount): 1, - "http.server." + string(request.IDResponseValidCount): 1, - "http.server." + string(request.IDResponseValidOK): 1, - }) + validMethodMetrics := map[string]any{ + "http.server." + string(request.IDRequestCount): 1, + "http.server." + string(request.IDResponseCount): 1, + "http.server." + string(request.IDResponseValidCount): 1, + "http.server." + string(request.IDResponseValidOK): 1, + "apm-server.root." + string(request.IDRequestCount): 1, + "apm-server.root." + string(request.IDResponseCount): 1, + "apm-server.root." + string(request.IDResponseValidCount): 1, + "apm-server.root." + string(request.IDResponseValidOK): 1, + } + invalidMethodMetrics := map[string]any{ + "http.server." + string(request.IDRequestCount): 1, + "http.server." + string(request.IDResponseCount): 1, + "http.server." + string(request.IDResponseErrorsCount): 1, + "http.server." + string(request.IDResponseErrorsMethodNotAllowed): 1, + "apm-server.root." + string(request.IDRequestCount): 1, + "apm-server.root." + string(request.IDResponseCount): 1, + "apm-server.root." + string(request.IDResponseErrorsCount): 1, + "apm-server.root." + string(request.IDResponseErrorsMethodNotAllowed): 1, + } + for _, tc := range []struct { + method string + wantMetrics map[string]any + }{ + { + method: http.MethodGet, + wantMetrics: validMethodMetrics, + }, + { + method: http.MethodHead, + wantMetrics: validMethodMetrics, + }, + { + method: http.MethodPut, + wantMetrics: invalidMethodMetrics, + }, + { + method: http.MethodPost, + wantMetrics: invalidMethodMetrics, + }, + } { + t.Run(tc.method, func(t *testing.T) { + h, reader := newTestMux(t, config.DefaultConfig()) + req := httptest.NewRequest(tc.method, "/", nil) + h.ServeHTTP(httptest.NewRecorder(), req) + + monitoringtest.ExpectContainOtelMetrics(t, reader, tc.wantMetrics) + }) + } } diff --git a/internal/beater/api/root/handler.go b/internal/beater/api/root/handler.go index 2ef21a67bc4..1a4bc1b386b 100644 --- a/internal/beater/api/root/handler.go +++ b/internal/beater/api/root/handler.go @@ -18,6 +18,7 @@ package root import ( + "net/http" "time" "github.com/elastic/beats/v7/libbeat/version" @@ -38,10 +39,17 @@ type HandlerConfig struct { // Handler returns error if route does not exist, // otherwise returns information about the server. The detail level differs for authenticated and anonymous requests. -// TODO: only allow GET, HEAD requests (breaking change) func Handler(cfg HandlerConfig) request.Handler { - return func(c *request.Context) { + // Only allow GET, HEAD requests + switch c.Request.Method { + case http.MethodGet, http.MethodHead: + default: + c.Result.SetDefault(request.IDResponseErrorsMethodNotAllowed) + c.WriteResult() + return + } + serverInfo := mapstr.M{ "build_date": version.BuildTime().Format(time.RFC3339), "build_sha": version.Commit(), From 16cab2a7062813de2751bd4125da73a49162c159 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:26:20 +0000 Subject: [PATCH 02/10] Update changelog --- changelogs/9.0.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/9.0.asciidoc b/changelogs/9.0.asciidoc index e0e75ebb4e3..7b68ba9d13e 100644 --- a/changelogs/9.0.asciidoc +++ b/changelogs/9.0.asciidoc @@ -16,7 +16,7 @@ https://github.com/elastic/apm-server/compare/v\...v9.0.0[View commits] [float] ==== Breaking Changes - Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] {pull}15524[15524] -- Change route `/` to only accept GET and HEAD requests, and return 405 Method Not Allowed otherwise. +- Change health check endpoint `/` to only accept GET and HEAD requests, otherwise return HTTP 405 Method Not Allowed. [float] ==== Deprecations From f13131fee98d6bc3bbd6a33265f2428b4bf6523d Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:35:02 +0000 Subject: [PATCH 03/10] Verbose error message --- internal/beater/api/root/handler.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/internal/beater/api/root/handler.go b/internal/beater/api/root/handler.go index 1a4bc1b386b..f2de1fb6b96 100644 --- a/internal/beater/api/root/handler.go +++ b/internal/beater/api/root/handler.go @@ -18,6 +18,7 @@ package root import ( + "errors" "net/http" "time" @@ -45,7 +46,11 @@ func Handler(cfg HandlerConfig) request.Handler { switch c.Request.Method { case http.MethodGet, http.MethodHead: default: - c.Result.SetDefault(request.IDResponseErrorsMethodNotAllowed) + c.Result.SetWithError( + request.IDResponseErrorsMethodNotAllowed, + // include a verbose error message to alert users about a common misconfiguration + errors.New("this is the health check endpoint; did you mean to send data to another endpoint instead?"), + ) c.WriteResult() return } From 9d4122f8bee1ae8af381ff5b0e55832eecc9a9d1 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:40:39 +0000 Subject: [PATCH 04/10] Add to all breaking changes --- changelogs/all-breaking-changes.asciidoc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/changelogs/all-breaking-changes.asciidoc b/changelogs/all-breaking-changes.asciidoc index dbbce5cbc46..f11f30fdc90 100644 --- a/changelogs/all-breaking-changes.asciidoc +++ b/changelogs/all-breaking-changes.asciidoc @@ -24,6 +24,10 @@ which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467] and https://github.com/elastic/apm-server/pull/15524[PR #15524] +- Change health check endpoint `/` to only accept GET and HEAD requests. +Any other methods will return HTTP 405 Method Not Allowed. +This will surface any misconfiguration causing data to be sent to `/` instead of the correct endpoint, +e.g. `/v1/traces` for OTLP/HTTP. [PR #15976] // end::90-bc[] // tag::811-bc[] From bc091f7eedad2fabedc3d2128ad4eedb0aaa25bd Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:42:42 +0000 Subject: [PATCH 05/10] Naming --- changelogs/9.0.asciidoc | 2 +- changelogs/all-breaking-changes.asciidoc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/changelogs/9.0.asciidoc b/changelogs/9.0.asciidoc index 7b68ba9d13e..d93a20da36a 100644 --- a/changelogs/9.0.asciidoc +++ b/changelogs/9.0.asciidoc @@ -16,7 +16,7 @@ https://github.com/elastic/apm-server/compare/v\...v9.0.0[View commits] [float] ==== Breaking Changes - Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] {pull}15524[15524] -- Change health check endpoint `/` to only accept GET and HEAD requests, otherwise return HTTP 405 Method Not Allowed. +- Change server information endpoint `/` to only accept GET and HEAD requests, otherwise return HTTP 405 Method Not Allowed. [float] ==== Deprecations diff --git a/changelogs/all-breaking-changes.asciidoc b/changelogs/all-breaking-changes.asciidoc index f11f30fdc90..1d80381fa53 100644 --- a/changelogs/all-breaking-changes.asciidoc +++ b/changelogs/all-breaking-changes.asciidoc @@ -24,7 +24,7 @@ which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467] and https://github.com/elastic/apm-server/pull/15524[PR #15524] -- Change health check endpoint `/` to only accept GET and HEAD requests. +- Change server information endpoint `/` to only accept GET and HEAD requests. Any other methods will return HTTP 405 Method Not Allowed. This will surface any misconfiguration causing data to be sent to `/` instead of the correct endpoint, e.g. `/v1/traces` for OTLP/HTTP. [PR #15976] From 3ee70b718a0152abec4131db1cabe007d6b33856 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:48:11 +0000 Subject: [PATCH 06/10] Remove from openapi spec --- docs/spec/openapi/paths/server_info.yaml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/docs/spec/openapi/paths/server_info.yaml b/docs/spec/openapi/paths/server_info.yaml index a70149acfce..9a4a5365422 100644 --- a/docs/spec/openapi/paths/server_info.yaml +++ b/docs/spec/openapi/paths/server_info.yaml @@ -15,11 +15,3 @@ get: responses: '200': $ref: '../components/responses/200_server_info.yaml' -post: - summary: Get general server information - operationId: postServerHealth - tags: - - server info - responses: - '200': - $ref: '../components/responses/200_server_info.yaml' From 4e13d0e6cc084b93b1b9d80383d2a57c01cfee73 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 3 Mar 2025 16:54:57 +0000 Subject: [PATCH 07/10] Improve docs --- changelogs/all-breaking-changes.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/changelogs/all-breaking-changes.asciidoc b/changelogs/all-breaking-changes.asciidoc index 1d80381fa53..bfc207a1172 100644 --- a/changelogs/all-breaking-changes.asciidoc +++ b/changelogs/all-breaking-changes.asciidoc @@ -26,7 +26,7 @@ For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467 https://github.com/elastic/apm-server/pull/15524[PR #15524] - Change server information endpoint `/` to only accept GET and HEAD requests. Any other methods will return HTTP 405 Method Not Allowed. -This will surface any misconfiguration causing data to be sent to `/` instead of the correct endpoint, +This will surface any agent misconfiguration causing data to be sent to `/` instead of the correct endpoint, e.g. `/v1/traces` for OTLP/HTTP. [PR #15976] // end::90-bc[] From 9a50ac8ad4600601441d50d2fb6e3b6d3aed47e0 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Mar 2025 12:52:33 +0000 Subject: [PATCH 08/10] Remove changelog to asciidoc --- changelogs/9.0.asciidoc | 1 - changelogs/all-breaking-changes.asciidoc | 4 ---- 2 files changed, 5 deletions(-) diff --git a/changelogs/9.0.asciidoc b/changelogs/9.0.asciidoc index d93a20da36a..d890048ab25 100644 --- a/changelogs/9.0.asciidoc +++ b/changelogs/9.0.asciidoc @@ -16,7 +16,6 @@ https://github.com/elastic/apm-server/compare/v\...v9.0.0[View commits] [float] ==== Breaking Changes - Change `sampling.tail.storage_limit` default to `0`. While `0` means unlimited local tail-sampling database size, it now enforces a max 80% disk usage on the disk where the data directory is located. Any tail sampling writes after this threshold will be rejected, similar to what happens when tail-sampling database size exceeds a non-0 storage limit. Setting `sampling.tail.storage_limit` to non-0 maintains the existing behavior which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. {pull}15467[15467] {pull}15524[15524] -- Change server information endpoint `/` to only accept GET and HEAD requests, otherwise return HTTP 405 Method Not Allowed. [float] ==== Deprecations diff --git a/changelogs/all-breaking-changes.asciidoc b/changelogs/all-breaking-changes.asciidoc index bfc207a1172..dbbce5cbc46 100644 --- a/changelogs/all-breaking-changes.asciidoc +++ b/changelogs/all-breaking-changes.asciidoc @@ -24,10 +24,6 @@ which limits the tail-sampling database size to `sampling.tail.storage_limit` and does not have the new disk usage threshold check. For more details, see https://github.com/elastic/apm-server/pull/15467[PR #15467] and https://github.com/elastic/apm-server/pull/15524[PR #15524] -- Change server information endpoint `/` to only accept GET and HEAD requests. -Any other methods will return HTTP 405 Method Not Allowed. -This will surface any agent misconfiguration causing data to be sent to `/` instead of the correct endpoint, -e.g. `/v1/traces` for OTLP/HTTP. [PR #15976] // end::90-bc[] // tag::811-bc[] From c3781f239f2d99fd66c926beea226871cc722742 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Tue, 4 Mar 2025 12:59:06 +0000 Subject: [PATCH 09/10] Fix test --- internal/beater/api/mux_root_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/beater/api/mux_root_test.go b/internal/beater/api/mux_root_test.go index 2d2d4956c56..5c814786abd 100644 --- a/internal/beater/api/mux_root_test.go +++ b/internal/beater/api/mux_root_test.go @@ -38,7 +38,7 @@ func TestRootHandler_AuthorizationMiddleware(t *testing.T) { cfg.AgentAuth.SecretToken = "1234" t.Run("No auth", func(t *testing.T) { - rec, err := requestToMuxerWithPattern(cfg, RootPath) + rec, err := requestToMuxerWithHeader(cfg, RootPath, http.MethodGet, nil) require.NoError(t, err) assert.Equal(t, http.StatusOK, rec.Code) assert.Empty(t, rec.Body.String()) From 9a55507a78fa92b9dd85349df066c26825bd4749 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Wed, 5 Mar 2025 13:46:56 +0000 Subject: [PATCH 10/10] Naming --- internal/beater/api/root/handler.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/beater/api/root/handler.go b/internal/beater/api/root/handler.go index f2de1fb6b96..adce51c3421 100644 --- a/internal/beater/api/root/handler.go +++ b/internal/beater/api/root/handler.go @@ -49,7 +49,7 @@ func Handler(cfg HandlerConfig) request.Handler { c.Result.SetWithError( request.IDResponseErrorsMethodNotAllowed, // include a verbose error message to alert users about a common misconfiguration - errors.New("this is the health check endpoint; did you mean to send data to another endpoint instead?"), + errors.New("this is the server information endpoint; did you mean to send data to another endpoint instead?"), ) c.WriteResult() return