Skip to content

Commit

Permalink
-N option
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinavDhulipala committed May 28, 2024
1 parent 1ae8201 commit 9f075ba
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 12 deletions.
13 changes: 1 addition & 12 deletions exporter/fixtures/sinfo_dataparser.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,7 @@
"total": 10,
"hostnames": [],
"addresses": [],
"nodes": [
"cs62",
"cs63",
"cs64",
"cs65",
"cs66",
"cs68",
"cs71",
"cs200",
"cs201",
"cs204"
]
"nodes": ["cs62"]
},
"cpus": {
"allocated": 114,
Expand Down
68 changes: 68 additions & 0 deletions exporter/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,77 @@ type sinfoDataParserResponse struct {
Other int `json:"other"`
Total int `json:"total"`
}
Memory struct {
Minimum int `json:"minimum"`
Maximum int `json:"maximum"`
Allocated int `json:"allocated"`
Free struct {
Minimum struct {
Set bool `json:"set"`
Number int `json:"number"`
} `json:"minimum"`
Maximum struct {
Set bool `json:"set"`
Number int `json:"number"`
} `json:"maximum"`
} `json:"free"`
}
Partition struct {
Name string `json:"name"`
Alternate string `json:"alternate"`
} `json:"parittion"`
} `json:"sinfo"`
}

type DataParserJsonFetcher struct {
scraper SlurmByteScraper
errorCounter prometheus.Counter
cache *AtomicThrottledCache[NodeMetric]
}

func (dpj *DataParserJsonFetcher) fetch() ([]NodeMetric, error) {
squeue := new(sinfoDataParserResponse)
cliJson, err := dpj.scraper.FetchRawBytes()
if err != nil {
return nil, err
}
if err := json.Unmarshal(cliJson, squeue); err != nil {
return nil, err
}
nodeMetrics := make([]NodeMetric, 0)
for _, entry := range squeue.Sinfo {
nodes := entry.Nodes
// validate single node parse
if nodes.Total != 1 {
return nil, fmt.Errorf("must contain only 1 node per entry, please use the -N option exp. `sinfo -N --json`")
}
if entry.Memory.Free.Maximum.Set && entry.Memory.Free.Minimum.Set {
return nil, fmt.Errorf("unable to scrape free mem metrics")
}
if entry.Memory.Free.Minimum.Number != entry.Memory.Free.Maximum.Number {
return nil, fmt.Errorf("must contain only 1 node per entry, please use the -N option exp. `sinfo -N --json`")
}
if entry.Memory.Minimum != entry.Memory.Maximum {
return nil, fmt.Errorf("must contain only 1 node per entry, please use the -N option exp. `sinfo -N --json`")
}
metric := NodeMetric{
Hostname: nodes.Nodes[0],
Cpus: float64(entry.Cpus.Total),
RealMemory: float64(entry.Memory.Maximum),
FreeMemory: float64(entry.Memory.Free.Maximum.Number),
State: strings.Join(entry.Node.State, "&"),
}
if !slices.Contains(metric.Partitions, entry.Partition.Name) {
metric.Partitions = append(metric.Partitions, entry.Partition.Name)
}
if entry.Partition.Alternate != "" && !slices.Contains(metric.Partitions, entry.Partition.Alternate) {
metric.Partitions = append(metric.Partitions, entry.Partition.Alternate)
}
nodeMetrics = append(nodeMetrics, metric)
}
return nodeMetrics, nil
}

type sinfoResponse struct {
Meta struct {
SlurmVersion struct {
Expand Down

0 comments on commit 9f075ba

Please sign in to comment.