Skip to content

Commit b915f56

Browse files
authored
[NDM] Add NDM metadata support for Cisco ACI (#17735)
* Add support for sending device metadata * Add unit test for device metadata, update fixture * Add license header, changelogs * Lint * First pass at submitting interface metadata, cleanup for test fixtures * Fix for py2.7 support * Try to fix imports * Deal with pydantic stuff py2.7 * Allow namespace for Cisco ACI devices, static var for vendor * Update device metadata to use the correct fieldname, add pydantic model for EvP intake * Sync the conf.yaml example * Add device type and integration to device metadata, fix ID field name * Update interface statuses * Deal with device status (use fabricSt) * Update get_eth_list to get operStatus, update all tests and fixtures * Amend docs for namespace * Batch events sent to EvP * Add interface status metric * Only add to list for >py3.0 * Update default value for vendor, yield for batch events, use device type other * Add source field to device metadata tags * Add enums for interface status * Use correct track type for NDM metadata * Amend device id tag, collect timestamp ms -> s * Add interface integration field * More generic method to send EvP event * Add docstring for the EvP method * Update interface tagging, remove system_ip tag * Fix linting for submit event platform event * Use interface ID tags
1 parent 5d6a13e commit b915f56

25 files changed

+28097
-36
lines changed

cisco_aci/assets/configuration/spec.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ files:
9292
value:
9393
type: boolean
9494
example: False
95+
- name: namespace
96+
description: |
97+
Namespace for differentiating between devices that share the same IP.
98+
If not specified, the namespace will be 'default'.
99+
value:
100+
type: string
101+
example: default
95102
- template: instances/http
96103
overrides:
97104
username.display_priority: 9

cisco_aci/changelog.d/17735.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
[NDM] Add NDM metadata support for Cisco ACI

cisco_aci/datadog_checks/cisco_aci/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
# Licensed under a 3-clause BSD style license (see LICENSE)
44

55
from .__about__ import __version__
6-
from .cisco import CiscoACICheck
6+
from datadog_checks.cisco_aci.cisco import CiscoACICheck
77

88
__all__ = ['__version__', 'CiscoACICheck']

cisco_aci/datadog_checks/cisco_aci/api.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,8 @@ def get_spine_proc_metrics(self, pod, node):
286286
return self._parse_response(response)
287287

288288
def get_eth_list(self, pod, node):
289-
query = 'query-target=subtree&target-subtree-class=l1PhysIf'
290-
path = '/api/mo/topology/pod-{}/node-{}/sys.json?{}'.format(pod, node, query)
289+
query = 'rsp-subtree=children&rsp-subtree-class=ethpmPhysIf'
290+
path = '/api/node/class/topology/pod-{}/node-{}/l1PhysIf.json?{}'.format(pod, node, query)
291291
response = self.make_request(path)
292292
return self._parse_response(response)
293293

cisco_aci/datadog_checks/cisco_aci/cisco.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,12 @@
66
from datadog_checks.base import AgentCheck, ConfigurationError
77
from datadog_checks.base.config import _is_affirmative
88
from datadog_checks.base.utils.containers import hash_mutable
9-
10-
from . import aci_metrics
11-
from .api import Api
12-
from .capacity import Capacity
13-
from .fabric import Fabric
14-
from .tags import CiscoTags
15-
from .tenant import Tenant
9+
from datadog_checks.cisco_aci.aci_metrics import make_tenant_metrics
10+
from datadog_checks.cisco_aci.api import Api
11+
from datadog_checks.cisco_aci.capacity import Capacity
12+
from datadog_checks.cisco_aci.fabric import Fabric
13+
from datadog_checks.cisco_aci.tags import CiscoTags
14+
from datadog_checks.cisco_aci.tenant import Tenant
1615

1716
SOURCE_TYPE = 'cisco_aci'
1817

@@ -25,7 +24,7 @@ class CiscoACICheck(AgentCheck):
2524

2625
def __init__(self, name, init_config, instances):
2726
super(CiscoACICheck, self).__init__(name, init_config, instances)
28-
self.tenant_metrics = aci_metrics.make_tenant_metrics()
27+
self.tenant_metrics = make_tenant_metrics()
2928
self.last_events_ts = {}
3029
self.external_host_tags = {}
3130
self._api_cache = {}
@@ -109,7 +108,7 @@ def check(self, _):
109108
raise
110109

111110
try:
112-
fabric = Fabric(self, api, self.instance)
111+
fabric = Fabric(self, api, self.instance, self.instance.get('namespace', 'default'))
113112
fabric.collect()
114113
except Exception as e:
115114
self.log.error('fabric collection failed: %s', e)

cisco_aci/datadog_checks/cisco_aci/config_models/defaults.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ def instance_min_collection_interval():
6060
return 15
6161

6262

63+
def instance_namespace():
64+
return 'default'
65+
66+
6367
def instance_persist_connections():
6468
return False
6569

cisco_aci/datadog_checks/cisco_aci/config_models/instance.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ class InstanceConfig(BaseModel):
8282
log_requests: Optional[bool] = None
8383
metric_patterns: Optional[MetricPatterns] = None
8484
min_collection_interval: Optional[float] = None
85+
namespace: Optional[str] = None
8586
ntlm_domain: Optional[str] = None
8687
password: Optional[str] = None
8788
persist_connections: Optional[bool] = None

cisco_aci/datadog_checks/cisco_aci/data/conf.yaml.example

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ instances:
124124
#
125125
# appcenter: false
126126

127+
## @param namespace - string - optional - default: default
128+
## Namespace for differentiating between devices that share the same IP.
129+
## If not specified, the namespace will be 'default'.
130+
#
131+
# namespace: default
132+
127133
## @param proxy - mapping - optional
128134
## This overrides the `proxy` setting in `init_config`.
129135
##

cisco_aci/datadog_checks/cisco_aci/fabric.py

Lines changed: 106 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,38 @@
22
# All rights reserved
33
# Licensed under a 3-clause BSD style license (see LICENSE)
44

5-
from six import iteritems
5+
from six import PY3, iteritems
6+
7+
from datadog_checks.base.utils.serialization import json
8+
9+
if PY3:
10+
import time
11+
12+
from datadog_checks.cisco_aci.models import DeviceMetadata, InterfaceMetadata, NetworkDevicesMetadata, Node, PhysIf
13+
14+
else:
15+
DeviceMetadata = None
16+
Eth = None
17+
InterfaceMetadata = None
18+
Node = None
619

720
from . import aci_metrics, exceptions, helpers
821

22+
VENDOR_CISCO = 'cisco'
23+
PAYLOAD_METADATA_BATCH_SIZE = 100
24+
925

1026
class Fabric:
1127
"""
1228
Collect fabric metrics from the APIC
1329
"""
1430

15-
def __init__(self, check, api, instance):
31+
def __init__(self, check, api, instance, namespace):
1632
self.check = check
1733
self.api = api
1834
self.instance = instance
1935
self.check_tags = check.check_tags
36+
self.namespace = namespace
2037

2138
# grab some functions from the check
2239
self.gauge = check.gauge
@@ -25,13 +42,19 @@ def __init__(self, check, api, instance):
2542
self.submit_metrics = check.submit_metrics
2643
self.tagger = self.check.tagger
2744
self.external_host_tags = self.check.external_host_tags
45+
self.event_platform_event = check.event_platform_event
2846

2947
def collect(self):
3048
fabric_pods = self.api.get_fabric_pods()
3149
fabric_nodes = self.api.get_fabric_nodes()
3250
self.log.info("%s pods and %s nodes computed", len(fabric_nodes), len(fabric_pods))
3351
pods = self.submit_pod_health(fabric_pods)
34-
self.submit_nodes_health(fabric_nodes, pods)
52+
devices, interfaces = self.submit_nodes_health_and_metadata(fabric_nodes, pods)
53+
if PY3:
54+
collect_timestamp = int(time.time())
55+
batches = self.batch_payloads(devices, interfaces, collect_timestamp)
56+
for batch in batches:
57+
self.event_platform_event(json.dumps(batch.model_dump(exclude_none=True)), "network-devices-metadata")
3558

3659
def submit_pod_health(self, pods):
3760
pods_dict = {}
@@ -53,7 +76,9 @@ def submit_pod_health(self, pods):
5376

5477
return pods_dict
5578

56-
def submit_nodes_health(self, nodes, pods):
79+
def submit_nodes_health_and_metadata(self, nodes, pods):
80+
device_metadata = []
81+
interface_metadata = []
5782
for n in nodes:
5883
hostname = helpers.get_fabric_hostname(n)
5984

@@ -70,17 +95,22 @@ def submit_nodes_health(self, nodes, pods):
7095
continue
7196
self.log.info("processing node %s on pod %s", node_id, pod_id)
7297
try:
98+
if PY3:
99+
device_metadata.append(self.submit_node_metadata(node_attrs, tags))
73100
self.submit_process_metric(n, tags + self.check_tags + user_tags, hostname=hostname)
74101
except (exceptions.APIConnectionException, exceptions.APIParsingException):
75102
pass
76103
if node_attrs.get('role') != "controller":
77104
try:
78105
stats = self.api.get_node_stats(pod_id, node_id)
79106
self.submit_fabric_metric(stats, tags, 'fabricNode', hostname=hostname)
80-
self.process_eth(node_attrs)
107+
eth_metadata = self.process_eth(node_attrs)
108+
if PY3:
109+
interface_metadata.extend(eth_metadata)
81110
except (exceptions.APIConnectionException, exceptions.APIParsingException):
82111
pass
83112
self.log.info("finished processing node %s", node_id)
113+
return device_metadata, interface_metadata
84114

85115
def process_eth(self, node):
86116
self.log.info("processing ethernet ports for %s", node.get('id'))
@@ -90,16 +120,20 @@ def process_eth(self, node):
90120
eth_list = self.api.get_eth_list(pod_id, node['id'])
91121
except (exceptions.APIConnectionException, exceptions.APIParsingException):
92122
pass
123+
interfaces = []
93124
for e in eth_list:
94125
eth_attrs = helpers.get_attributes(e)
95126
eth_id = eth_attrs['id']
96127
tags = self.tagger.get_fabric_tags(e, 'l1PhysIf')
128+
if PY3:
129+
interfaces.append(self.create_interface_metadata(e, node['address'], tags, hostname))
97130
try:
98131
stats = self.api.get_eth_stats(pod_id, node['id'], eth_id)
99132
self.submit_fabric_metric(stats, tags, 'l1PhysIf', hostname=hostname)
100133
except (exceptions.APIConnectionException, exceptions.APIParsingException):
101134
pass
102135
self.log.info("finished processing ethernet ports for %s", node['id'])
136+
return interfaces
103137

104138
def submit_fabric_metric(self, stats, tags, obj_type, hostname=None):
105139
for s in stats:
@@ -209,3 +243,70 @@ def get_fabric_type(self, obj_type):
209243
return 'pod'
210244
if obj_type == 'l1PhysIf':
211245
return 'port'
246+
247+
def batch_payloads(self, devices, interfaces, collect_ts):
248+
for device in devices:
249+
yield NetworkDevicesMetadata(namespace=self.namespace, devices=[device], collect_timestamp=collect_ts)
250+
251+
payloads = []
252+
for interface in interfaces:
253+
if len(payloads) == PAYLOAD_METADATA_BATCH_SIZE:
254+
yield NetworkDevicesMetadata(
255+
namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts
256+
)
257+
payloads = []
258+
payloads.append(interface)
259+
if payloads:
260+
yield NetworkDevicesMetadata(namespace=self.namespace, interfaces=payloads, collect_timestamp=collect_ts)
261+
262+
def submit_node_metadata(self, node_attrs, tags):
263+
node = Node(attributes=node_attrs)
264+
id_tags = ['namespace:{}'.format(self.namespace)]
265+
device_tags = [
266+
'device_vendor:{}'.format(VENDOR_CISCO),
267+
'device_namespace:{}'.format(self.namespace),
268+
'device_hostname:{}'.format(node.attributes.dn),
269+
'hostname:{}'.format(node.attributes.dn),
270+
'device_ip:{}'.format(node.attributes.address),
271+
'device_id:{}:{}'.format(self.namespace, node.attributes.address),
272+
"source:cisco-aci",
273+
]
274+
device = DeviceMetadata(
275+
id='{}:{}'.format(self.namespace, node.attributes.address),
276+
id_tags=id_tags,
277+
tags=device_tags + tags,
278+
name=node.attributes.dn,
279+
ip_address=node.attributes.address,
280+
model=node.attributes.model,
281+
fabric_st=node.attributes.fabric_st,
282+
vendor=VENDOR_CISCO,
283+
version=node.attributes.version,
284+
serial_number=node.attributes.serial,
285+
device_type=node.attributes.device_type,
286+
)
287+
return device.model_dump(exclude_none=True)
288+
289+
def create_interface_metadata(self, phys_if, address, tags, hostname):
290+
eth = PhysIf(**phys_if.get('l1PhysIf', {}))
291+
interface = InterfaceMetadata(
292+
device_id='{}:{}'.format(self.namespace, address),
293+
id_tags=['interface:{}'.format(eth.attributes.name)],
294+
index=eth.attributes.id,
295+
name=eth.attributes.name,
296+
description=eth.attributes.desc,
297+
mac_address=eth.attributes.router_mac,
298+
admin_status=eth.attributes.admin_st,
299+
)
300+
if eth.ethpm_phys_if:
301+
interface.oper_status = eth.ethpm_phys_if.attributes.oper_st
302+
if interface.status:
303+
new_tags = tags.copy()
304+
new_tags.extend(
305+
[
306+
"device_ip:{}".format(address),
307+
"device_namespace:{}".format(self.namespace),
308+
"interface.status:{}".format(interface.status),
309+
]
310+
)
311+
self.gauge('cisco_aci.fabric.node.interface.status', 1, tags=new_tags, hostname=hostname)
312+
return interface.model_dump(exclude_none=True)

0 commit comments

Comments
 (0)