Skip to content

Commit

Permalink
[exporter/diags] make json sdiag response struct 24.05 comptible (#114)
Browse files Browse the repository at this point in the history
* make json sdiag response struct 24.05 comptible

* keep testing against slurm 23
  • Loading branch information
abhinavDhulipala authored Feb 19, 2025
1 parent 4c4d36c commit 7c4f76d
Show file tree
Hide file tree
Showing 6 changed files with 433 additions and 28 deletions.
6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ RUN mkdir -p /etc/slurm && \
# install go deps
RUN arch=`uname -m` && \
if [ $arch == "aarch64" ]; then arch="arm64"; elif [ "$arch" == "x86_64" ]; then arch="amd64" ;fi && \
wget "https://go.dev/dl/go1.20.12.linux-${arch}.tar.gz" && \
tar -C /usr/local -xzf "go1.20.12.linux-${arch}.tar.gz" && \
rm "go1.20.12.linux-${arch}.tar.gz" && \
wget "https://go.dev/dl/go1.23.1.linux-${arch}.tar.gz" && \
tar -C /usr/local -xzf "go1.23.1.linux-${arch}.tar.gz" && \
rm "go1.23.1.linux-${arch}.tar.gz" && \
mkdir /src

# default wrapper deps for e2e tests
Expand Down
89 changes: 65 additions & 24 deletions exporter/diags.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,58 @@ import (
"encoding/json"
"fmt"

"github.com/prometheus/client_golang/prometheus"
"log/slog"

"github.com/prometheus/client_golang/prometheus"
)

type IntFromOptionalStruct int

func (ffoo *IntFromOptionalStruct) UnmarshalJSON(data []byte) error {
// in between certain versions of data_parser, certain integer fields
// can be given in actual int or in the form
// {"average_time": {"set": true, "number": 1234, "infinite": false}}
// create type to coerce to int
var nativeInt int
if err := json.Unmarshal(data, &nativeInt); err == nil {
*ffoo = IntFromOptionalStruct(nativeInt)
return nil
}
var numStruct struct {
Set bool `json:"set"`
Infinite bool `json:"infinite"`
Number int `json:"number"`
}
err := json.Unmarshal(data, &numStruct)
if err != nil {
return err
}
if !numStruct.Set {
*ffoo = IntFromOptionalStruct(-1)
return fmt.Errorf("avg num not set")
}
if numStruct.Infinite {
*ffoo = IntFromOptionalStruct(-1)
return fmt.Errorf("num set to infinite")
}
*ffoo = IntFromOptionalStruct(numStruct.Number)
return nil
}

type UserRpcInfo struct {
User string `json:"user"`
UserId int `json:"user_id"`
Count int `json:"count"`
AvgTime int `json:"average_time"`
TotalTime int `json:"total_time"`
User string `json:"user"`
UserId int `json:"user_id"`
Count int `json:"count"`
AvgTime IntFromOptionalStruct `json:"average_time"`
TotalTime int `json:"total_time"`
}

type MessageRpcInfo struct {
MessageType string `json:"message_type"`
TypeId int `json:"type_id"`
Count int `json:"count"`
AvgTime int `json:"average_time"`
TotalTime int `json:"total_time"`
MessageType string `json:"message_type"`
TypeId int `json:"type_id"`
Count int `json:"count"`
AvgTime IntFromOptionalStruct `json:"average_time"`
TotalTime int `json:"total_time"`
}

type DiagMetric struct {
Expand All @@ -41,22 +75,29 @@ type DiagMetric struct {
}

type SdiagResponse struct {
// Response coercible between slurm 23 and 24 data versions
Meta struct {
SlurmVersion struct {
Version struct {
Major int `json:"major"`
Micro int `json:"micro"`
Minor int `json:"minor"`
} `json:"version"`
Release string `json:"release"`
} `json:"Slurm"`
Plugins map[string]string
SlurmVersion SlurmVersion `json:"Slurm"`
Plugins map[string]string `json:"plugins"`
Plugin map[string]string `json:"plugin"`
} `json:"meta"`
Statistics DiagMetric
Errors []string `json:"errors"`
Warnings []string `json:"warnings"`
}

func (sr *SdiagResponse) IsDataParserPlugin() bool {
if sr.Meta.Plugins != nil {
_, ok := sr.Meta.Plugins["data_parser"]
return ok
}
if sr.Meta.Plugin != nil {
_, ok := sr.Meta.Plugin["data_parser"]
return ok
}
return false
}

func parseDiagMetrics(sdiagResp []byte) (*SdiagResponse, error) {
sdiag := new(SdiagResponse)
err := json.Unmarshal(sdiagResp, sdiag)
Expand Down Expand Up @@ -138,14 +179,14 @@ func (sc *DiagnosticsCollector) Collect(ch chan<- prometheus.Metric) {
}
ch <- prometheus.MustNewConstMetric(sc.diagScrapeDuration, prometheus.GaugeValue, float64(sc.fetcher.Duration().Abs().Milliseconds()))
sdiagResponse, err := parseDiagMetrics(sdiag)
if _, ok := sdiagResponse.Meta.Plugins["data_parser"]; !ok {
if err != nil {
sc.diagScrapeError.Inc()
slog.Error("only the data_parser plugin is supported")
slog.Error(fmt.Sprintf("diag parse error: %q", err))
return
}
if err != nil {
if !sdiagResponse.IsDataParserPlugin() {
sc.diagScrapeError.Inc()
slog.Error(fmt.Sprintf("diag parse error: %q", err))
slog.Error("only the data_parser plugin is supported")
return
}
emitNonZero := func(desc *prometheus.Desc, val float64, label string) {
Expand Down
39 changes: 39 additions & 0 deletions exporter/diags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,25 @@ func TestDiagCollect(t *testing.T) {
assert.NotEmpty(metrics)
}

func TestDiagCollect_2405(t *testing.T) {
assert := assert.New(t)
config, err := NewConfig(new(CliFlags))
assert.NoError(err)
dc := NewDiagsCollector(config)
dc.fetcher = &MockScraper{fixture: "fixtures/sdiag_2405.json"}
metricChan := make(chan prometheus.Metric)
go func() {
dc.Collect(metricChan)
close(metricChan)
}()
metrics := make([]prometheus.Metric, 0)
for m, ok := <-metricChan; ok; m, ok = <-metricChan {
metrics = append(metrics, m)
t.Logf("Received metric %s", m.Desc().String())
}
assert.NotEmpty(metrics)
}

func TestDiagDescribe(t *testing.T) {
assert := assert.New(t)
ch := make(chan *prometheus.Desc)
Expand All @@ -57,3 +76,23 @@ func TestDiagDescribe(t *testing.T) {
}
assert.NotEmpty(descs)
}

func TestDataParserVersionDiscovery_Slurm23(t *testing.T) {
assert := assert.New(t)
fetcher := MockScraper{fixture: "fixtures/sdiag.json"}
sdiag, err := fetcher.FetchRawBytes()
assert.NoError(err)
resp, err := parseDiagMetrics(sdiag)
assert.NoError(err)
assert.True(resp.IsDataParserPlugin())
}

func TestDataParserVersionDiscovery_Slurm24(t *testing.T) {
assert := assert.New(t)
fetcher := MockScraper{fixture: "fixtures/sdiag_2405.json"}
sdiag, err := fetcher.FetchRawBytes()
assert.NoError(err)
resp, err := parseDiagMetrics(sdiag)
assert.NoError(err)
assert.Truef(resp.IsDataParserPlugin(), "parsed metadata struct %+v", resp.Meta)
}
Loading

0 comments on commit 7c4f76d

Please sign in to comment.