From c8934b46470dbeb3488b4ade5f49314c0cab5cb3 Mon Sep 17 00:00:00 2001 From: Christiano Haesbaert Date: Mon, 10 Feb 2025 20:49:49 +0100 Subject: [PATCH] Fix TCP close events for "idle" connections. Fixes #217 (#220) Change the condition to supress a close, before it would supress any sockets that had no traffic. Now we keep state of sockets we sent an attempt/accept and supress the ones we didn't track _AND_ had no traffic. The map is limited, once we have 128k active sockets we can't track them, so fallback to looking at bytes_received and bytes_sent. We don't really look at tgid, but we will on upcoming probes. Maps are not small, most of the allocation size goes to the hash bucket. The empty map wires 2MB of memory. 6306: hash name sk_to_tgid flags 0x1 key 8B value 4B max_entries 131072 memlock 2098760B btf_id 5117 pids EventsTrace(2868637) --- GPL/Events/Network/Probe.bpf.c | 34 +++++++---- testing/test_bins/tcpv4_connect.c | 5 +- testing/test_bins/tcpv6_connect.c | 5 +- testing/testrunner/ebpf_test.go | 94 +++++++++++++++++-------------- 4 files changed, 81 insertions(+), 57 deletions(-) diff --git a/GPL/Events/Network/Probe.bpf.c b/GPL/Events/Network/Probe.bpf.c index c46401bb..8099f487 100644 --- a/GPL/Events/Network/Probe.bpf.c +++ b/GPL/Events/Network/Probe.bpf.c @@ -21,6 +21,14 @@ DECL_FUNC_RET(inet_csk_accept); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 131072); + __type(key, struct sock *); + __type(value, u32); + __uint(map_flags, BPF_F_NO_PREALLOC); +} sk_to_tgid SEC(".maps"); + static int inet_csk_accept__exit(struct sock *sk) { if (!sk) @@ -36,6 +44,9 @@ static int inet_csk_accept__exit(struct sock *sk) bpf_ringbuf_discard(event, 0); goto out; } + // Record this socket so we can emit a close + u32 tgid = event->pids.tgid; + (void)bpf_map_update_elem(&sk_to_tgid, &sk, &tgid, BPF_ANY); event->hdr.type = EBPF_EVENT_NETWORK_CONNECTION_ACCEPTED; bpf_ringbuf_submit(event, 0); @@ -233,6 +244,10 @@ static int tcp_connect(struct sock *sk, int ret) goto out; } + // Record this socket so we can emit a close + u32 tgid = event->pids.tgid; + (void)bpf_map_update_elem(&sk_to_tgid, &sk, &tgid, BPF_ANY); + event->hdr.type = EBPF_EVENT_NETWORK_CONNECTION_ATTEMPTED; bpf_ringbuf_submit(event, 0); @@ -303,6 +318,15 @@ static int tcp_close__enter(struct sock *sk) if (ebpf_events_is_trusted_pid()) goto out; + struct tcp_sock *tp = (struct tcp_sock *)sk; + u64 bytes_sent = BPF_CORE_READ(tp, bytes_sent); + u64 bytes_received = BPF_CORE_READ(tp, bytes_received); + + // Only process sockets we added, but since storage is limited, fall back to + // looking at bytes if we're full + if (bpf_map_delete_elem(&sk_to_tgid, &sk) != 0 && bytes_sent == 0 && bytes_received == 0) + goto out; + struct ebpf_net_event *event = bpf_ringbuf_reserve(&ringbuf, sizeof(*event), 0); if (!event) goto out; @@ -312,16 +336,6 @@ static int tcp_close__enter(struct sock *sk) goto out; } - struct tcp_sock *tp = (struct tcp_sock *)sk; - u64 bytes_sent = BPF_CORE_READ(tp, bytes_sent); - u64 bytes_received = BPF_CORE_READ(tp, bytes_received); - - if (!bytes_sent && !bytes_received) { - // Uninteresting event, most likely unbound or unconnected socket. - bpf_ringbuf_discard(event, 0); - goto out; - } - event->net.tcp.close.bytes_sent = bytes_sent; event->net.tcp.close.bytes_received = bytes_received; diff --git a/testing/test_bins/tcpv4_connect.c b/testing/test_bins/tcpv4_connect.c index 935dff34..99598bb6 100644 --- a/testing/test_bins/tcpv4_connect.c +++ b/testing/test_bins/tcpv4_connect.c @@ -73,10 +73,9 @@ int main() dump_info(ntohs(acceptaddr.sin_port), BOUND_PORT); - // The order of these two closes is important, see - // comments in Go test code - close(acceptfd); + // The order of these two closes is important, must match the order in ebpf_test.go close(connectfd); + close(acceptfd); close(listenfd); diff --git a/testing/test_bins/tcpv6_connect.c b/testing/test_bins/tcpv6_connect.c index c29b2c96..db6d3db8 100644 --- a/testing/test_bins/tcpv6_connect.c +++ b/testing/test_bins/tcpv6_connect.c @@ -73,10 +73,9 @@ int main() dump_info(ntohs(acceptaddr.sin6_port), BOUND_PORT); - // The order of these two closes is important, see - // comments in Go test code - close(acceptfd); + // The order of these two closes is important, must match the order in ebpf_test.go close(connectfd); + close(acceptfd); close(listenfd); diff --git a/testing/testrunner/ebpf_test.go b/testing/testrunner/ebpf_test.go index a91fa883..e75eaf29 100644 --- a/testing/testrunner/ebpf_test.go +++ b/testing/testrunner/ebpf_test.go @@ -563,43 +563,39 @@ func Tcpv4ConnectionClose(t *testing.T, et *Runner) { binOutput := NetBinOut{} runTestUnmarshalOutput(t, "tcpv4_connect", &binOutput) - var ev NetConnCloseEvent + var evs []NetConnCloseEvent for { + var ev NetConnCloseEvent et.UnmarshalNextEvent(&ev, "NETWORK_CONNECTION_CLOSED") - if ev.Pids.Tgid == binOutput.PidInfo.Tgid { + if ev.Pids.Tgid != binOutput.PidInfo.Tgid { + continue + } + evs = append(evs, ev) + if len(evs) == 2 { break } } - // NETWORK_CONNECTION_CLOSED is an interesting case. - // - // While NETWORK_CONNECTION_ATTEMPTED is generated exclusively on the - // client-side via a connect(...) and NETWORK_CONNECTION_ACCEPTED is - // generated exclusively on the server side via an accept(...) - // NETWORK_CONNECTION_CLOSED may be generated on either side upon a - // close(...) of a socket fd. This means that the source and desination - // ports might be "flipped" depending on what side the connection is on - // (server/client) for a close event. - // - // Our tcpv4_connect binary creates a server and client socket on the same - // machine, so what port is reported as the source and destination port - // will vary depending on which socket is closed first (client / server). - // - // The test binary closes the server socket first, which counterintuitively - // results in the _client_ socket being torn down first in the kernel. - // Thus, our BPF probes report the source/dest ports from the client - // socket's point of view for the close event. The SourcePort and DestPort - // assertions below verify this is correct. + TestPidEqual(t, binOutput.PidInfo, evs[0].Pids) + require.Equal(t, evs[0].Net.Transport, "TCP") + require.Equal(t, evs[0].Net.Family, "AF_INET") + require.Equal(t, evs[0].Net.SourceAddr, "127.0.0.1") + require.Equal(t, evs[0].Net.SourcePort, binOutput.ClientPort) + require.Equal(t, evs[0].Net.DestAddr, "127.0.0.1") + require.Equal(t, evs[0].Net.DestPort, binOutput.ServerPort) + require.Equal(t, evs[0].Net.NetNs, binOutput.NetNs) + require.Equal(t, evs[0].Comm, "tcpv4_connect") + + TestPidEqual(t, binOutput.PidInfo, evs[1].Pids) + require.Equal(t, evs[1].Net.Transport, "TCP") + require.Equal(t, evs[1].Net.Family, "AF_INET") + require.Equal(t, evs[1].Net.SourceAddr, "127.0.0.1") + require.Equal(t, evs[1].Net.SourcePort, binOutput.ServerPort) + require.Equal(t, evs[1].Net.DestAddr, "127.0.0.1") + require.Equal(t, evs[1].Net.DestPort, binOutput.ClientPort) + require.Equal(t, evs[1].Net.NetNs, binOutput.NetNs) + require.Equal(t, evs[1].Comm, "tcpv4_connect") - TestPidEqual(t, binOutput.PidInfo, ev.Pids) - require.Equal(t, ev.Net.Transport, "TCP") - require.Equal(t, ev.Net.Family, "AF_INET") - require.Equal(t, ev.Net.SourceAddr, "127.0.0.1") - require.Equal(t, ev.Net.SourcePort, binOutput.ClientPort) - require.Equal(t, ev.Net.DestAddr, "127.0.0.1") - require.Equal(t, ev.Net.DestPort, binOutput.ServerPort) - require.Equal(t, ev.Net.NetNs, binOutput.NetNs) - require.Equal(t, ev.Comm, "tcpv4_connect") } func Tcpv6ConnectionAttempt(t *testing.T, et *Runner) { @@ -652,23 +648,39 @@ func Tcpv6ConnectionClose(t *testing.T, et *Runner) { binOutput := NetBinOut{} runTestUnmarshalOutput(t, "tcpv6_connect", &binOutput) - var ev NetConnCloseEvent + var evs []NetConnCloseEvent for { + var ev NetConnCloseEvent et.UnmarshalNextEvent(&ev, "NETWORK_CONNECTION_CLOSED") - if ev.Pids.Tgid == binOutput.PidInfo.Tgid { + if ev.Pids.Tgid != binOutput.PidInfo.Tgid { + continue + } + evs = append(evs, ev) + if len(evs) == 2 { break } } - TestPidEqual(t, binOutput.PidInfo, ev.Pids) - require.Equal(t, ev.Net.Transport, "TCP") - require.Equal(t, ev.Net.Family, "AF_INET6") - require.Equal(t, ev.Net.SourceAddr, "::1") - require.Equal(t, ev.Net.SourcePort, binOutput.ClientPort) - require.Equal(t, ev.Net.DestAddr, "::1") - require.Equal(t, ev.Net.DestPort, binOutput.ServerPort) - require.Equal(t, ev.Net.NetNs, binOutput.NetNs) - require.Equal(t, ev.Comm, "tcpv6_connect") + TestPidEqual(t, binOutput.PidInfo, evs[0].Pids) + require.Equal(t, evs[0].Net.Transport, "TCP") + require.Equal(t, evs[0].Net.Family, "AF_INET6") + require.Equal(t, evs[0].Net.SourceAddr, "::1") + require.Equal(t, evs[0].Net.SourcePort, binOutput.ClientPort) + require.Equal(t, evs[0].Net.DestAddr, "::1") + require.Equal(t, evs[0].Net.DestPort, binOutput.ServerPort) + require.Equal(t, evs[0].Net.NetNs, binOutput.NetNs) + require.Equal(t, evs[0].Comm, "tcpv6_connect") + + TestPidEqual(t, binOutput.PidInfo, evs[1].Pids) + require.Equal(t, evs[1].Net.Transport, "TCP") + require.Equal(t, evs[1].Net.Family, "AF_INET6") + require.Equal(t, evs[1].Net.SourceAddr, "::1") + require.Equal(t, evs[1].Net.SourcePort, binOutput.ServerPort) + require.Equal(t, evs[1].Net.DestAddr, "::1") + require.Equal(t, evs[1].Net.DestPort, binOutput.ClientPort) + require.Equal(t, evs[1].Net.NetNs, binOutput.NetNs) + require.Equal(t, evs[1].Comm, "tcpv6_connect") + } func DNSMonitor(t *testing.T, et *Runner) {