Skip to content

Commit 66b359f

Browse files
Sy/slurm disk usage (#20231)
* adds new disk read metrics, and fixes some memory and cpu metrics * remoce disk read metrics * changelog * Add new metrics for disk reads for sacct * Rename 20230.added to 20231.added * Update constants.py * Update common.py * Update common.py * Update common.py * Update common.py --------- Co-authored-by: Kyle Neale <kyle.neale@datadoghq.com>
1 parent 29610b5 commit 66b359f

File tree

4 files changed

+72
-3
lines changed

4 files changed

+72
-3
lines changed

slurm/changelog.d/20231.added

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Added metrics for disk reads for sacct metrics set

slurm/datadog_checks/slurm/constants.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
SSHARE_PARAMS = ["-alnPU"]
1313
SACCT_PARAMS = [
1414
"-anpo",
15-
"JobID,JobName%40,Partition,Account,AllocCPUs,AllocTRES%40,Elapsed,CPUTimeRAW,MaxRSS,MaxVMSize,AveCPU,AveRSS,State,ExitCode,Start,End,NodeList",
15+
"JobID,JobName%40,Partition,Account,AllocCPUs,AllocTRES%40,Elapsed,CPUTimeRAW,MaxRSS,MaxVMSize,AveCPU,AveRSS,State,ExitCode,Start,End,NodeList,AveDiskRead,MaxDiskRead",
1616
"--units=K",
1717
]
1818
SCONTROL_PARAMS = ["listpid"]
@@ -98,6 +98,8 @@
9898
{"name": "sacct.slurm_job_maxrss", "index": 8},
9999
{"name": "sacct.slurm_job_maxvm", "index": 9},
100100
{"name": "sacct.slurm_job_avgrss", "index": 11},
101+
{"name": "sacct.slurm_job_ave_disk_read", "index": 17},
102+
{"name": "sacct.slurm_job_max_disk_read", "index": 18},
101103
],
102104
}
103105

slurm/tests/common.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1103,6 +1103,38 @@ def mock_output(filename):
11031103
'slurm_job_tres_per_node:billing=1,cpu=1,mem=500M,node=1',
11041104
],
11051105
},
1106+
{
1107+
'name': 'slurm.sacct.slurm_job_ave_disk_read',
1108+
'value': 900000,
1109+
'tags': [
1110+
'slurm_job_account:root',
1111+
'slurm_job_cpus:1',
1112+
'slurm_job_exitcode:0:0',
1113+
'slurm_job_id:56',
1114+
'slurm_job_name:wrap',
1115+
'slurm_job_node_list:c1',
1116+
'slurm_job_partition:normal',
1117+
'slurm_partition_name:normal',
1118+
'slurm_job_state:COMPLETED',
1119+
'slurm_job_tres_per_node:billing=1,cpu=1,mem=500M,node=1',
1120+
],
1121+
},
1122+
{
1123+
'name': 'slurm.sacct.slurm_job_max_disk_read',
1124+
'value': 900000,
1125+
'tags': [
1126+
'slurm_job_account:root',
1127+
'slurm_job_cpus:1',
1128+
'slurm_job_exitcode:0:0',
1129+
'slurm_job_id:56',
1130+
'slurm_job_name:wrap',
1131+
'slurm_job_node_list:c1',
1132+
'slurm_job_partition:normal',
1133+
'slurm_partition_name:normal',
1134+
'slurm_job_state:COMPLETED',
1135+
'slurm_job_tres_per_node:billing=1,cpu=1,mem=500M,node=1',
1136+
],
1137+
},
11061138
{
11071139
'name': 'slurm.sacct.slurm_job_maxvm',
11081140
'value': 12000,
@@ -1223,6 +1255,40 @@ def mock_output(filename):
12231255
'slurm_job_tres_per_node:cpu=1,mem=500M,node=1',
12241256
],
12251257
},
1258+
{
1259+
'name': 'slurm.sacct.slurm_job_ave_disk_read',
1260+
'value': 900000,
1261+
'tags': [
1262+
'slurm_job_account:root',
1263+
'slurm_job_cpus:1',
1264+
'slurm_job_exitcode:0:0',
1265+
'slurm_job_id:56',
1266+
'slurm_job_id_suffix:batch',
1267+
'slurm_job_name:batch',
1268+
'slurm_job_node_list:c1',
1269+
'slurm_job_partition:null',
1270+
'slurm_partition_name:null',
1271+
'slurm_job_state:COMPLETED',
1272+
'slurm_job_tres_per_node:cpu=1,mem=500M,node=1',
1273+
],
1274+
},
1275+
{
1276+
'name': 'slurm.sacct.slurm_job_max_disk_read',
1277+
'value': 900000,
1278+
'tags': [
1279+
'slurm_job_account:root',
1280+
'slurm_job_cpus:1',
1281+
'slurm_job_exitcode:0:0',
1282+
'slurm_job_id:56',
1283+
'slurm_job_id_suffix:batch',
1284+
'slurm_job_name:batch',
1285+
'slurm_job_node_list:c1',
1286+
'slurm_job_partition:null',
1287+
'slurm_partition_name:null',
1288+
'slurm_job_state:COMPLETED',
1289+
'slurm_job_tres_per_node:cpu=1,mem=500M,node=1',
1290+
],
1291+
},
12261292
{
12271293
'name': 'slurm.sacct.slurm_job_maxvm',
12281294
'value': 22000,

slurm/tests/fixtures/sacct.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
56|wrap|normal|root|1|billing=1,cpu=1,mem=500M,node=1|00:12:34|10|11K|12K|00:07:56|14K|COMPLETED|0:0|2024-10-20T22:14:25|2024-10-20T22:14:25|c1
2-
56.batch|batch||root|1|cpu=1,mem=500M,node=1|01:23:45|20|21K|22K|00:09:56|24K|COMPLETED|0:0|2024-10-20T22:14:25|2024-10-20T22:14:25|c1
1+
56|wrap|normal|root|1|billing=1,cpu=1,mem=500M,node=1|00:12:34|10|11K|12K|00:07:56|14K|COMPLETED|0:0|2024-10-20T22:14:25|2024-10-20T22:14:25|c1|0.9M|0.9M
2+
56.batch|batch||root|1|cpu=1,mem=500M,node=1|01:23:45|20|21K|22K|00:09:56|24K|COMPLETED|0:0|2024-10-20T22:14:25|2024-10-20T22:14:25|c1|0.9M|0.9M

0 commit comments

Comments
 (0)