Skip to content

Commit da2f5e5

Browse files
committed
NETOBSERV-559: use LookupAndDelete to read maps
Keep legacy code for old kernels Do not base support detection on kernel version Instead, just try and fallback to legacy when relevant
1 parent 58d01d9 commit da2f5e5

File tree

4 files changed

+186
-106
lines changed

4 files changed

+186
-106
lines changed

pkg/ebpf/tracer.go

Lines changed: 97 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -53,17 +53,18 @@ var plog = logrus.WithField("component", "ebpf.PacketFetcher")
5353
// and to flows that are forwarded by the kernel via ringbuffer because could not be aggregated
5454
// in the map
5555
type FlowFetcher struct {
56-
objects *BpfObjects
57-
qdiscs map[ifaces.Interface]*netlink.GenericQdisc
58-
egressFilters map[ifaces.Interface]*netlink.BpfFilter
59-
ingressFilters map[ifaces.Interface]*netlink.BpfFilter
60-
ringbufReader *ringbuf.Reader
61-
cacheMaxSize int
62-
enableIngress bool
63-
enableEgress bool
64-
pktDropsTracePoint link.Link
65-
rttFentryLink link.Link
66-
rttKprobeLink link.Link
56+
objects *BpfObjects
57+
qdiscs map[ifaces.Interface]*netlink.GenericQdisc
58+
egressFilters map[ifaces.Interface]*netlink.BpfFilter
59+
ingressFilters map[ifaces.Interface]*netlink.BpfFilter
60+
ringbufReader *ringbuf.Reader
61+
cacheMaxSize int
62+
enableIngress bool
63+
enableEgress bool
64+
pktDropsTracePoint link.Link
65+
rttFentryLink link.Link
66+
rttKprobeLink link.Link
67+
lookupAndDeleteSupported bool
6768
}
6869

6970
type FlowFetcherConfig struct {
@@ -119,7 +120,10 @@ func NewFlowFetcher(cfg *FlowFetcherConfig) (*FlowFetcher, error) {
119120
return nil, fmt.Errorf("rewriting BPF constants definition: %w", err)
120121
}
121122

122-
oldKernel := utils.IskernelOlderthan514()
123+
oldKernel := utils.IsKernelOlderThan("5.14.0")
124+
if oldKernel {
125+
log.Infof("kernel older than 5.14.0 detected: not all hooks are supported")
126+
}
123127
objects, err := kernelSpecificLoadAndAssign(oldKernel, spec)
124128
if err != nil {
125129
return nil, err
@@ -165,17 +169,18 @@ func NewFlowFetcher(cfg *FlowFetcherConfig) (*FlowFetcher, error) {
165169
}
166170

167171
return &FlowFetcher{
168-
objects: &objects,
169-
ringbufReader: flows,
170-
egressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
171-
ingressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
172-
qdiscs: map[ifaces.Interface]*netlink.GenericQdisc{},
173-
cacheMaxSize: cfg.CacheMaxSize,
174-
enableIngress: cfg.EnableIngress,
175-
enableEgress: cfg.EnableEgress,
176-
pktDropsTracePoint: pktDropsLink,
177-
rttFentryLink: rttFentryLink,
178-
rttKprobeLink: rttKprobeLink,
172+
objects: &objects,
173+
ringbufReader: flows,
174+
egressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
175+
ingressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
176+
qdiscs: map[ifaces.Interface]*netlink.GenericQdisc{},
177+
cacheMaxSize: cfg.CacheMaxSize,
178+
enableIngress: cfg.EnableIngress,
179+
enableEgress: cfg.EnableEgress,
180+
pktDropsTracePoint: pktDropsLink,
181+
rttFentryLink: rttFentryLink,
182+
rttKprobeLink: rttKprobeLink,
183+
lookupAndDeleteSupported: true, // this will be turned off later if found to be not supported
179184
}, nil
180185
}
181186

@@ -404,35 +409,41 @@ func (m *FlowFetcher) ReadRingBuf() (ringbuf.Record, error) {
404409
}
405410

406411
// LookupAndDeleteMap reads all the entries from the eBPF map and removes them from it.
407-
// It returns a map where the key
408-
// For synchronization purposes, we get/delete a whole snapshot of the flows map.
409-
// This way we avoid missing packets that could be updated on the
410-
// ebpf side while we process/aggregate them here
411-
// Changing this method invocation by BatchLookupAndDelete could improve performance
412412
// TODO: detect whether BatchLookupAndDelete is supported (Kernel>=5.6) and use it selectively
413413
// Supported Lookup/Delete operations by kernel: https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md
414-
// Race conditions here causes that some flows are lost in high-load scenarios
415414
func (m *FlowFetcher) LookupAndDeleteMap(met *metrics.Metrics) map[BpfFlowId][]BpfFlowMetrics {
415+
if !m.lookupAndDeleteSupported {
416+
return m.legacyLookupAndDeleteMap(met)
417+
}
418+
416419
flowMap := m.objects.AggregatedFlows
417420

418421
iterator := flowMap.Iterate()
419422
var flows = make(map[BpfFlowId][]BpfFlowMetrics, m.cacheMaxSize)
423+
var ids []BpfFlowId
420424
var id BpfFlowId
421425
var metrics []BpfFlowMetrics
422426

423-
count := 0
424-
// Changing Iterate+Delete by LookupAndDelete would prevent some possible race conditions
425-
// TODO: detect whether LookupAndDelete is supported (Kernel>=4.20) and use it selectively
427+
// First, get all ids and don't care about metrics (we need lookup+delete to be atomic)
426428
for iterator.Next(&id, &metrics) {
429+
ids = append(ids, id)
430+
}
431+
432+
count := 0
433+
// Run the atomic Lookup+Delete; if new ids have been inserted in the meantime, they'll be fetched next time
434+
for i, id := range ids {
427435
count++
428-
if err := flowMap.Delete(id); err != nil {
436+
if err := flowMap.LookupAndDelete(&id, &metrics); err != nil {
437+
if i == 0 && errors.Is(err, ebpf.ErrNotSupported) {
438+
log.WithError(err).Warnf("switching to legacy mode")
439+
m.lookupAndDeleteSupported = false
440+
return m.legacyLookupAndDeleteMap(met)
441+
}
429442
log.WithError(err).WithField("flowId", id).Warnf("couldn't delete flow entry")
430443
met.Errors.WithErrorName("flow-fetcher", "CannotDeleteFlows").Inc()
444+
continue
431445
}
432-
// We observed that eBFP PerCPU map might insert multiple times the same key in the map
433-
// (probably due to race conditions) so we need to re-join metrics again at userspace
434-
// TODO: instrument how many times the keys are is repeated in the same eviction
435-
flows[id] = append(flows[id], metrics...)
446+
flows[id] = metrics
436447
}
437448
met.BufferSizeGauge.WithBufferName("hashmap-total").Set(float64(count))
438449
met.BufferSizeGauge.WithBufferName("hashmap-unique").Set(float64(len(flows)))
@@ -451,16 +462,21 @@ func (m *FlowFetcher) lookupAndDeleteDNSMap(timeOut time.Duration) {
451462
monotonicTimeNow := monotime.Now()
452463
dnsMap := m.objects.DnsFlows
453464
var dnsKey BpfDnsFlowId
465+
var keysToDelete []BpfDnsFlowId
454466
var dnsVal uint64
455467

456468
if dnsMap != nil {
469+
// Ideally the Lookup + Delete should be atomic, however we cannot use LookupAndDelete since the deletion is conditional
470+
// Do not delete while iterating, as it causes severe performance degradation
457471
iterator := dnsMap.Iterate()
458472
for iterator.Next(&dnsKey, &dnsVal) {
459473
if time.Duration(uint64(monotonicTimeNow)-dnsVal) >= timeOut {
460-
if err := dnsMap.Delete(dnsKey); err != nil {
461-
log.WithError(err).WithField("dnsKey", dnsKey).
462-
Warnf("couldn't delete DNS record entry")
463-
}
474+
keysToDelete = append(keysToDelete, dnsKey)
475+
}
476+
}
477+
for _, dnsKey = range keysToDelete {
478+
if err := dnsMap.Delete(dnsKey); err != nil {
479+
log.WithError(err).WithField("dnsKey", dnsKey).Warnf("couldn't delete DNS record entry")
464480
}
465481
}
466482
}
@@ -529,14 +545,15 @@ func kernelSpecificLoadAndAssign(oldKernel bool, spec *ebpf.CollectionSpec) (Bpf
529545

530546
// It provides access to packets from the kernel space (via PerfCPU hashmap)
531547
type PacketFetcher struct {
532-
objects *BpfObjects
533-
qdiscs map[ifaces.Interface]*netlink.GenericQdisc
534-
egressFilters map[ifaces.Interface]*netlink.BpfFilter
535-
ingressFilters map[ifaces.Interface]*netlink.BpfFilter
536-
perfReader *perf.Reader
537-
cacheMaxSize int
538-
enableIngress bool
539-
enableEgress bool
548+
objects *BpfObjects
549+
qdiscs map[ifaces.Interface]*netlink.GenericQdisc
550+
egressFilters map[ifaces.Interface]*netlink.BpfFilter
551+
ingressFilters map[ifaces.Interface]*netlink.BpfFilter
552+
perfReader *perf.Reader
553+
cacheMaxSize int
554+
enableIngress bool
555+
enableEgress bool
556+
lookupAndDeleteSupported bool
540557
}
541558

542559
func NewPacketFetcher(
@@ -605,14 +622,15 @@ func NewPacketFetcher(
605622
}
606623

607624
return &PacketFetcher{
608-
objects: &objects,
609-
perfReader: packets,
610-
egressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
611-
ingressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
612-
qdiscs: map[ifaces.Interface]*netlink.GenericQdisc{},
613-
cacheMaxSize: cacheMaxSize,
614-
enableIngress: ingress,
615-
enableEgress: egress,
625+
objects: &objects,
626+
perfReader: packets,
627+
egressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
628+
ingressFilters: map[ifaces.Interface]*netlink.BpfFilter{},
629+
qdiscs: map[ifaces.Interface]*netlink.GenericQdisc{},
630+
cacheMaxSize: cacheMaxSize,
631+
enableIngress: ingress,
632+
enableEgress: egress,
633+
lookupAndDeleteSupported: true, // this will be turned off later if found to be not supported
616634
}, nil
617635
}
618636

@@ -797,19 +815,35 @@ func (p *PacketFetcher) ReadPerf() (perf.Record, error) {
797815
}
798816

799817
func (p *PacketFetcher) LookupAndDeleteMap(met *metrics.Metrics) map[int][]*byte {
818+
if !p.lookupAndDeleteSupported {
819+
return p.legacyLookupAndDeleteMap(met)
820+
}
821+
800822
packetMap := p.objects.PacketRecord
801823
iterator := packetMap.Iterate()
802824
packets := make(map[int][]*byte, p.cacheMaxSize)
803-
804825
var id int
826+
var ids []int
805827
var packet []*byte
828+
829+
// First, get all ids and ignore content (we need lookup+delete to be atomic)
806830
for iterator.Next(&id, &packet) {
807-
if err := packetMap.Delete(id); err != nil {
808-
log.WithError(err).WithField("packetID ", id).
809-
Warnf("couldn't delete entry")
810-
met.Errors.WithErrorName("pkt-fetcher", "CannotDeleteFlows").Inc()
831+
ids = append(ids, id)
832+
}
833+
834+
// Run the atomic Lookup+Delete; if new ids have been inserted in the meantime, they'll be fetched next time
835+
for i, id := range ids {
836+
if err := packetMap.LookupAndDelete(&id, &packet); err != nil {
837+
if i == 0 && errors.Is(err, ebpf.ErrNotSupported) {
838+
log.WithError(err).Warnf("switching to legacy mode")
839+
p.lookupAndDeleteSupported = false
840+
return p.legacyLookupAndDeleteMap(met)
841+
}
842+
log.WithError(err).WithField("packetID", id).Warnf("couldn't delete entry")
843+
met.Errors.WithErrorName("pkt-fetcher", "CannotDeleteEntry").Inc()
811844
}
812-
packets[id] = append(packets[id], packet...)
845+
packets[id] = packet
813846
}
847+
814848
return packets
815849
}

pkg/ebpf/tracer_legacy.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package ebpf
2+
3+
import "github.com/netobserv/netobserv-ebpf-agent/pkg/metrics"
4+
5+
// This file contains legacy implementations kept for old kernels
6+
7+
func (m *FlowFetcher) legacyLookupAndDeleteMap(met *metrics.Metrics) map[BpfFlowId][]BpfFlowMetrics {
8+
flowMap := m.objects.AggregatedFlows
9+
10+
iterator := flowMap.Iterate()
11+
var flows = make(map[BpfFlowId][]BpfFlowMetrics, m.cacheMaxSize)
12+
var id BpfFlowId
13+
var metrics []BpfFlowMetrics
14+
count := 0
15+
16+
// Deleting while iterating is really bad for performance (like, really!) as it causes seeing multiple times the same key
17+
// This is solved in >=4.20 kernels with LookupAndDelete
18+
for iterator.Next(&id, &metrics) {
19+
count++
20+
if err := flowMap.Delete(id); err != nil {
21+
log.WithError(err).WithField("flowId", id).Warnf("couldn't delete flow entry")
22+
met.Errors.WithErrorName("flow-fetcher-legacy", "CannotDeleteFlows").Inc()
23+
}
24+
// We observed that eBFP PerCPU map might insert multiple times the same key in the map
25+
// (probably due to race conditions) so we need to re-join metrics again at userspace
26+
flows[id] = append(flows[id], metrics...)
27+
}
28+
met.BufferSizeGauge.WithBufferName("hashmap-legacy-total").Set(float64(count))
29+
met.BufferSizeGauge.WithBufferName("hashmap-legacy-unique").Set(float64(len(flows)))
30+
31+
return flows
32+
}
33+
34+
func (p *PacketFetcher) legacyLookupAndDeleteMap(met *metrics.Metrics) map[int][]*byte {
35+
packetMap := p.objects.PacketRecord
36+
iterator := packetMap.Iterate()
37+
packets := make(map[int][]*byte, p.cacheMaxSize)
38+
39+
var id int
40+
var packet []*byte
41+
for iterator.Next(&id, &packet) {
42+
if err := packetMap.Delete(id); err != nil {
43+
log.WithError(err).WithField("packetID ", id).Warnf("couldn't delete entry")
44+
met.Errors.WithErrorName("pkt-fetcher-legacy", "CannotDeleteEntry").Inc()
45+
}
46+
packets[id] = append(packets[id], packet...)
47+
}
48+
return packets
49+
}

pkg/utils/utils.go

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,18 @@ import (
1212
)
1313

1414
var (
15-
getCurrentKernelVersion = currentKernelVersion
16-
log = logrus.WithField("component", "utils")
15+
kernelVersion uint32
16+
log = logrus.WithField("component", "utils")
1717
)
1818

19+
func init() {
20+
var err error
21+
kernelVersion, err = currentKernelVersion()
22+
if err != nil {
23+
log.Errorf("failed to get current kernel version: %v", err)
24+
}
25+
}
26+
1927
// GetSocket returns socket string in the correct format based on address family
2028
func GetSocket(hostIP string, hostPort int) string {
2129
socket := fmt.Sprintf("%s:%d", hostIP, hostPort)
@@ -26,22 +34,13 @@ func GetSocket(hostIP string, hostPort int) string {
2634
return socket
2735
}
2836

29-
func IskernelOlderthan514() bool {
30-
kernelVersion514, err := kernelVersionFromReleaseString("5.14.0")
37+
func IsKernelOlderThan(version string) bool {
38+
refVersion, err := kernelVersionFromReleaseString(version)
3139
if err != nil {
3240
log.Warnf("failed to get kernel version from release string: %v", err)
3341
return false
3442
}
35-
currentVersion, err := getCurrentKernelVersion()
36-
if err != nil {
37-
log.Warnf("failed to get current kernel version: %v", err)
38-
return false
39-
}
40-
if currentVersion < kernelVersion514 {
41-
log.Infof("older kernel version not all hooks will be supported")
42-
return true
43-
}
44-
return false
43+
return kernelVersion != 0 && kernelVersion < refVersion
4544
}
4645

4746
var versionRegex = regexp.MustCompile(`^(\d+)\.(\d+).(\d+).*$`)

0 commit comments

Comments
 (0)