Skip to content

Commit d12b0b4

Browse files
jaredledvinasteveny91Kyle-Neale
authored
[cilium] Add support for v1.14 metrics (#17447)
* [cilium] Add v1.14+ agent & operator metrics Signed-off-by: Jared Ledvina <jared.ledvina@datadoghq.com> * Add changelog Signed-off-by: Jared Ledvina <jared.ledvina@datadoghq.com> * Fixup some of the metadata.csv Signed-off-by: Jared Ledvina <jared.ledvina@datadoghq.com> * fix some tests * fix some minor naming issues * sort metadata * fix e2e * clean up * [cilium] Add metadata info for ipam allocation and release Signed-off-by: Jared Ledvina <jared.ledvina@datadoghq.com> * Update cilium/changelog.d/17447.added Co-authored-by: Kyle Neale <kyle.a.neale@gmail.com> --------- Signed-off-by: Jared Ledvina <jared.ledvina@datadoghq.com> Co-authored-by: steveny91 <steven.yuen@datadoghq.com> Co-authored-by: Kyle Neale <kyle.a.neale@gmail.com>
1 parent c202d6f commit d12b0b4

File tree

9 files changed

+375
-18
lines changed

9 files changed

+375
-18
lines changed

cilium/changelog.d/17447.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add support for Cilumn v1.14 metrics

cilium/datadog_checks/cilium/metrics.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
'cilium_process_start_time_seconds': 'process.start_time.seconds',
5050
'cilium_process_virtual_memory_bytes': 'process.virtual_memory.bytes',
5151
'cilium_process_virtual_memory_max_bytes': 'process.virtual_memory.max.bytes',
52+
'cilium_services_events_total': 'services.events.total',
5253
'cilium_subprocess_start_total': 'subprocess.start.total',
5354
'cilium_triggers_policy_update_call_duration_seconds': 'triggers_policy.update_call_duration.seconds',
5455
'cilium_triggers_policy_update_folds': 'triggers_policy.update_folds',
@@ -99,8 +100,13 @@
99100
'cilium_fqdn_active_ips': 'fqdn.active_ips',
100101
'cilium_fqdn_alive_zombie_connections': 'fqdn.alive_zombie_connections',
101102
# Cilium 1.14+
103+
'cilium_endpoint': 'endpoint.count',
104+
'cilium_endpoint_max_ifindex': 'endpoint.max_ifindex',
105+
'cilium_cidrgroup_policies': 'cidrgroup.policies',
102106
'cilium_kvstore_sync_queue_size': 'kvstore.sync_queue_size',
103107
'cilium_kvstore_initial_sync_completed': 'kvstore.initial_sync_completed',
108+
'cilium_k8s_client_rate_limiter_duration_seconds': 'k8s_client.rate_limiter_duration.seconds',
109+
'cilium_policy_change_total': 'policy.change.total',
104110
}
105111

106112
OPERATOR_V2_OVERRIDES = {
@@ -176,6 +182,15 @@
176182
'cilium_operator_ipam_interface_candidates': 'operator.ipam.interface_candidates',
177183
'cilium_operator_ipam_empty_interface_slots': 'operator.ipam.empty_interface_slots',
178184
'cilium_operator_ipam_ip_allocation_ops': 'operator.ipam.ip_allocation_ops',
185+
# Cilium 1.14+
186+
'cilium_operator_ces_sync_total': 'operator.ces.sync.total',
187+
'cilium_operator_ipam_allocation_duration_seconds': 'operator.ipam.allocation.duration.seconds',
188+
'cilium_operator_ipam_available_interfaces': 'operator.ipam.available_interfaces',
189+
'cilium_operator_ipam_available_ips': 'operator.ipam.available_ips',
190+
'cilium_operator_ipam_ip_release_ops': 'operator.ipam.ip_release_ops',
191+
'cilium_operator_ipam_needed_ips': 'operator.ipam.needed_ips',
192+
'cilium_operator_ipam_release_duration_seconds': 'operator.ipam.release.duration.seconds',
193+
'cilium_operator_ipam_used_ips': 'operator.ipam.used_ips',
179194
}
180195

181196
AGENT_V2_METRICS = deepcopy(AGENT_METRICS)

cilium/metadata.csv

Lines changed: 22 additions & 0 deletions
Large diffs are not rendered by default.

cilium/tests/common.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,16 @@
128128
"cilium.kvstore.initial_sync_completed",
129129
]
130130

131+
AGENT_V2_METRICS_1_14 = [
132+
# E2E not updated yet to 1.14+ of Cilium
133+
'cilium.cidrgroup.policies',
134+
'cilium.k8s_client.rate_limiter_duration.seconds.bucket',
135+
'cilium.k8s_client.rate_limiter_duration.seconds.count',
136+
'cilium.k8s_client.rate_limiter_duration.seconds.sum',
137+
'cilium.policy.change.count',
138+
'cilium.services.events.count',
139+
]
140+
131141
AGENT_V1_METRICS = [
132142
"cilium.agent.api_process_time.seconds.count",
133143
"cilium.agent.api_process_time.seconds.sum",
@@ -220,6 +230,15 @@
220230
"cilium.kvstore.initial_sync_completed",
221231
]
222232

233+
AGENT_V1_METRICS_1_14 = [
234+
# E2E not updated yet to 1.14+ of Cilium
235+
'cilium.cidrgroup.policies',
236+
'cilium.k8s_client.rate_limiter_duration.seconds.count',
237+
'cilium.k8s_client.rate_limiter_duration.seconds.sum',
238+
'cilium.policy.change.total',
239+
'cilium.services.events.total',
240+
]
241+
223242
# Some types changed moving from v1 to v2. We keep v2 in the metadata.csv file.
224243
AGENT_V1_METRICS_EXCLUDE_METADATA_CHECK = [
225244
"cilium.agent.api_process_time.seconds.count",
@@ -243,6 +262,8 @@
243262
"cilium.policy.implementation_delay.sum",
244263
"cilium.proxy.upstream_reply.seconds.count",
245264
"cilium.proxy.upstream_reply.seconds.sum",
265+
'cilium.k8s_client.rate_limiter_duration.seconds.count',
266+
'cilium.k8s_client.rate_limiter_duration.seconds.sum',
246267
]
247268

248269
OPERATOR_V2_PROCESS_METRICS = [
@@ -335,6 +356,21 @@
335356
"cilium.operator.ipam.ip_allocation_ops.count",
336357
] + OPERATOR_V2_PROCESS_METRICS
337358

359+
OPERATOR_V2_METRICS_1_14 = [
360+
# E2E not updated yet to 1.14+ of Cilium
361+
"cilium.operator.ipam.allocation.duration.seconds.bucket",
362+
"cilium.operator.ipam.allocation.duration.seconds.sum",
363+
"cilium.operator.ipam.allocation.duration.seconds.count",
364+
"cilium.operator.ipam.available_interfaces",
365+
"cilium.operator.ipam.available_ips",
366+
"cilium.operator.ipam.ip_release_ops.count",
367+
"cilium.operator.ipam.needed_ips",
368+
"cilium.operator.ipam.release.duration.seconds.bucket",
369+
"cilium.operator.ipam.release.duration.seconds.sum",
370+
"cilium.operator.ipam.release.duration.seconds.count",
371+
"cilium.operator.ipam.used_ips",
372+
]
373+
338374
# Not available in test metric fixtures
339375
ADDL_OPERATOR_AWS_METRICS = [
340376
"cilium.operator.ec2.api.rate_limit.duration.seconds.sum",

cilium/tests/fixtures/agent_metrics.txt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,3 +1290,31 @@ cilium_kvstore_initial_sync_completed{scope="identities/v1",source_cluster="foo"
12901290
cilium_kvstore_initial_sync_completed{scope="identities/v1",source_cluster="foo", action="read"} 1
12911291
cilium_kvstore_initial_sync_completed{scope="ip/v1",source_cluster="foo", action="write"} 1
12921292
cilium_kvstore_initial_sync_completed{scope="ip/v1",source_cluster="foo", action="read"} 1
1293+
# HELP cilium_services_events_total Number of services events labeled by action type
1294+
# TYPE cilium_services_events_total counter
1295+
cilium_services_events_total{action="add"} 117
1296+
cilium_services_events_total{action="update"} 1063
1297+
# HELP cilium_cidrgroup_policies Number of CNPs and CCNPs referencing at least one CiliumCIDRGroup
1298+
# TYPE cilium_cidrgroup_policies gauge
1299+
cilium_cidrgroup_policies 0
1300+
# HELP cilium_k8s_client_rate_limiter_duration_seconds Kubernetes client rate limiter latency in seconds. Broken down by path and method.
1301+
# TYPE cilium_k8s_client_rate_limiter_duration_seconds histogram
1302+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="0.005"} 54
1303+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="0.025"} 54
1304+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="0.1"} 54
1305+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="0.25"} 54
1306+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="0.5"} 54
1307+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="1"} 54
1308+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="2"} 54
1309+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="4"} 54
1310+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="8"} 54
1311+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="15"} 54
1312+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="30"} 54
1313+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="60"} 54
1314+
cilium_k8s_client_rate_limiter_duration_seconds_bucket{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}",le="+Inf"} 54
1315+
cilium_k8s_client_rate_limiter_duration_seconds_sum{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}"} 0.00015540600000000005
1316+
cilium_k8s_client_rate_limiter_duration_seconds_count{method="DELETE",path="/apis/cilium.io/v2/namespaces/{namespace}/ciliumendpoints/{name}"} 54
1317+
# HELP cilium_policy_change_total Number of policy changes by outcome
1318+
# TYPE cilium_policy_change_total counter
1319+
cilium_policy_change_total{outcome="fail"} 106
1320+
cilium_policy_change_total{outcome="success"} 24128

0 commit comments

Comments
 (0)