-
Notifications
You must be signed in to change notification settings - Fork 1.5k
[NDM] Add NDM metadata support for Cisco ACI #17735
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 27 commits
eece332
2dc6b77
0fbd388
308381e
aaaf282
d7e3d76
df7ccfa
2874add
79954aa
3c84f1c
8dd93c7
7d7c5a4
ac9fe08
7848a1c
3457360
3f3cd66
2b4ee06
c6c20f9
6137d1f
502e682
f51307d
ff002e5
eb0ec6b
63e2f08
6d8c377
04a4d2e
a93dfa7
86c4fea
ddcd8d7
aeacd2e
0c58f52
2a8f97e
c9f81a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
[NDM] Add NDM metadata support for Cisco ACI |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -2,21 +2,38 @@ | |||||
# All rights reserved | ||||||
# Licensed under a 3-clause BSD style license (see LICENSE) | ||||||
|
||||||
from six import iteritems | ||||||
from six import PY3, iteritems | ||||||
|
||||||
from datadog_checks.base.utils.serialization import json | ||||||
|
||||||
if PY3: | ||||||
import time | ||||||
|
||||||
from datadog_checks.cisco_aci.models import DeviceMetadata, InterfaceMetadata, NetworkDevicesMetadata, Node, PhysIf | ||||||
|
||||||
else: | ||||||
DeviceMetadata = None | ||||||
Eth = None | ||||||
InterfaceMetadata = None | ||||||
Node = None | ||||||
|
||||||
from . import aci_metrics, exceptions, helpers | ||||||
|
||||||
VENDOR_CISCO = 'cisco' | ||||||
PAYLOAD_METADATA_BATCH_SIZE = 100 | ||||||
|
||||||
|
||||||
class Fabric: | ||||||
""" | ||||||
Collect fabric metrics from the APIC | ||||||
""" | ||||||
|
||||||
def __init__(self, check, api, instance): | ||||||
def __init__(self, check, api, instance, namespace): | ||||||
self.check = check | ||||||
self.api = api | ||||||
self.instance = instance | ||||||
self.check_tags = check.check_tags | ||||||
self.namespace = namespace | ||||||
|
||||||
# grab some functions from the check | ||||||
self.gauge = check.gauge | ||||||
|
@@ -25,13 +42,19 @@ def __init__(self, check, api, instance): | |||||
self.submit_metrics = check.submit_metrics | ||||||
self.tagger = self.check.tagger | ||||||
self.external_host_tags = self.check.external_host_tags | ||||||
self.ndm_metadata = check.ndm_metadata | ||||||
|
||||||
def collect(self): | ||||||
fabric_pods = self.api.get_fabric_pods() | ||||||
fabric_nodes = self.api.get_fabric_nodes() | ||||||
self.log.info("%s pods and %s nodes computed", len(fabric_nodes), len(fabric_pods)) | ||||||
pods = self.submit_pod_health(fabric_pods) | ||||||
self.submit_nodes_health(fabric_nodes, pods) | ||||||
devices, interfaces = self.submit_nodes_health_and_metadata(fabric_nodes, pods) | ||||||
if PY3: | ||||||
collect_timestamp = int(time.time()) | ||||||
batches = self.batch_payloads(devices, interfaces, collect_timestamp) | ||||||
for batch in batches: | ||||||
self.ndm_metadata(json.dumps(batch.model_dump(exclude_none=True))) | ||||||
|
||||||
def submit_pod_health(self, pods): | ||||||
pods_dict = {} | ||||||
|
@@ -53,7 +76,9 @@ def submit_pod_health(self, pods): | |||||
|
||||||
return pods_dict | ||||||
|
||||||
def submit_nodes_health(self, nodes, pods): | ||||||
def submit_nodes_health_and_metadata(self, nodes, pods): | ||||||
device_metadata = [] | ||||||
interface_metadata = [] | ||||||
for n in nodes: | ||||||
hostname = helpers.get_fabric_hostname(n) | ||||||
|
||||||
|
@@ -70,17 +95,22 @@ def submit_nodes_health(self, nodes, pods): | |||||
continue | ||||||
self.log.info("processing node %s on pod %s", node_id, pod_id) | ||||||
try: | ||||||
if PY3: | ||||||
device_metadata.append(self.submit_node_metadata(node_attrs, tags)) | ||||||
self.submit_process_metric(n, tags + self.check_tags + user_tags, hostname=hostname) | ||||||
except (exceptions.APIConnectionException, exceptions.APIParsingException): | ||||||
pass | ||||||
if node_attrs.get('role') != "controller": | ||||||
try: | ||||||
stats = self.api.get_node_stats(pod_id, node_id) | ||||||
self.submit_fabric_metric(stats, tags, 'fabricNode', hostname=hostname) | ||||||
self.process_eth(node_attrs) | ||||||
eth_metadata = self.process_eth(node_attrs) | ||||||
if PY3: | ||||||
interface_metadata.extend(eth_metadata) | ||||||
except (exceptions.APIConnectionException, exceptions.APIParsingException): | ||||||
pass | ||||||
self.log.info("finished processing node %s", node_id) | ||||||
return device_metadata, interface_metadata | ||||||
|
||||||
def process_eth(self, node): | ||||||
self.log.info("processing ethernet ports for %s", node.get('id')) | ||||||
|
@@ -90,16 +120,20 @@ def process_eth(self, node): | |||||
eth_list = self.api.get_eth_list(pod_id, node['id']) | ||||||
except (exceptions.APIConnectionException, exceptions.APIParsingException): | ||||||
pass | ||||||
interfaces = [] | ||||||
for e in eth_list: | ||||||
eth_attrs = helpers.get_attributes(e) | ||||||
eth_id = eth_attrs['id'] | ||||||
tags = self.tagger.get_fabric_tags(e, 'l1PhysIf') | ||||||
if PY3: | ||||||
interfaces.append(self.create_interface_metadata(e, node['address'], tags, hostname)) | ||||||
try: | ||||||
stats = self.api.get_eth_stats(pod_id, node['id'], eth_id) | ||||||
self.submit_fabric_metric(stats, tags, 'l1PhysIf', hostname=hostname) | ||||||
except (exceptions.APIConnectionException, exceptions.APIParsingException): | ||||||
pass | ||||||
self.log.info("finished processing ethernet ports for %s", node['id']) | ||||||
return interfaces | ||||||
|
||||||
def submit_fabric_metric(self, stats, tags, obj_type, hostname=None): | ||||||
for s in stats: | ||||||
|
@@ -209,3 +243,71 @@ def get_fabric_type(self, obj_type): | |||||
return 'pod' | ||||||
if obj_type == 'l1PhysIf': | ||||||
return 'port' | ||||||
|
||||||
def batch_payloads(self, devices, interfaces, collect_ts): | ||||||
for device in devices: | ||||||
yield NetworkDevicesMetadata(namespace=self.namespace, devices=[device], collect_timestamp=collect_ts) | ||||||
|
||||||
payloads = [] | ||||||
for interface in interfaces: | ||||||
if len(payloads) == PAYLOAD_METADATA_BATCH_SIZE: | ||||||
yield NetworkDevicesMetadata( | ||||||
namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts | ||||||
) | ||||||
payloads = [] | ||||||
payloads.append(interface) | ||||||
if payloads: | ||||||
yield NetworkDevicesMetadata(namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts) | ||||||
|
||||||
def submit_node_metadata(self, node_attrs, tags): | ||||||
node = Node(attributes=node_attrs) | ||||||
id_tags = ['namespace:{}'.format(self.namespace), 'system_ip:{}'.format(node.attributes.address)] | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Do we add the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i have a separate ticket for that work ! i'm doing that in the bg, but if you prefer it all in one PR i can amend that! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good for me to add it separately 👍 i think here we just want to ensure that the id tags can be used to filter metrics down to a specific device (so |
||||||
device_tags = [ | ||||||
'device_vendor:{}'.format(VENDOR_CISCO), | ||||||
'device_namespace:{}'.format(self.namespace), | ||||||
'device_hostname:{}'.format(node.attributes.dn), | ||||||
'hostname:{}'.format(node.attributes.dn), | ||||||
'system_ip:{}'.format(node.attributes.address), | ||||||
zoedt marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
'device_ip:{}'.format(node.attributes.address), | ||||||
'device_id:{}:{}'.format(self.namespace, node.attributes.address), | ||||||
"source:cisco-aci", | ||||||
] | ||||||
device = DeviceMetadata( | ||||||
id='{}:{}'.format(self.namespace, node.attributes.address), | ||||||
id_tags=id_tags, | ||||||
tags=device_tags + tags, | ||||||
name=node.attributes.dn, | ||||||
ip_address=node.attributes.address, | ||||||
model=node.attributes.model, | ||||||
fabric_st=node.attributes.fabric_st, | ||||||
vendor=VENDOR_CISCO, | ||||||
version=node.attributes.version, | ||||||
serial_number=node.attributes.serial, | ||||||
device_type=node.attributes.device_type, | ||||||
) | ||||||
return device.model_dump(exclude_none=True) | ||||||
|
||||||
def create_interface_metadata(self, phys_if, address, tags, hostname): | ||||||
eth = PhysIf(**phys_if.get('l1PhysIf', {})) | ||||||
interface = InterfaceMetadata( | ||||||
device_id='{}:{}'.format(self.namespace, address), | ||||||
id_tags=tags, | ||||||
zoedt marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
index=eth.attributes.id, | ||||||
name=eth.attributes.name, | ||||||
description=eth.attributes.desc, | ||||||
mac_address=eth.attributes.router_mac, | ||||||
admin_status=eth.attributes.admin_st, | ||||||
) | ||||||
if eth.ethpm_phys_if: | ||||||
interface.oper_status = eth.ethpm_phys_if.attributes.oper_st | ||||||
if interface.status: | ||||||
new_tags = tags.copy() | ||||||
new_tags.extend( | ||||||
zoedt marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
[ | ||||||
"device_ip:{}".format(address), | ||||||
"device_namespace:{}".format(self.namespace), | ||||||
"interface.status:{}".format(interface.status), | ||||||
] | ||||||
) | ||||||
self.gauge('cisco_aci.fabric.node.interface.status', 1, tags=tags, hostname=hostname) | ||||||
return interface.model_dump(exclude_none=True) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🤔 I can't remember if our backend really supports receiving multiple interfaces in the same batch when they come from different devices. Do you know how Meraki does it? We might want to check other examples just to be safe
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
here's reference to the meraki NDM ingestion - the crawler is running on a per org basis, so multiple devices / interfaces may overlap at some point i'd assume? 🤔