Skip to content

Commit

Permalink
Make paths to files configurable (#5)
Browse files Browse the repository at this point in the history
* Add 'paths' to config struct

* Update test configs

* Implement new paths config entry

* Set default paths

* Make defaults setting nicer

* Rename paths structure and move logfile out of it

* Move default paths to config package

* Create binpaths struct if non-existent
  • Loading branch information
tdido authored May 23, 2022
1 parent 5189f27 commit 168e0ba
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 17 deletions.
4 changes: 4 additions & 0 deletions cmd/goslmailer/goslmailer.conf.annotated_example
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"logfile": "/tmp/goslmailer.log", # if specified; append logs to this file; else; dump to stderr
"binpaths": { # paths to required (slurm) binaries
"sacct":"/usr/bin/sacct",
"sstat":"/usr/bin/sstat"
},
"defaultconnector": "msteams", # default connector to be used for message delivery for receivers without full 'connector:user' specification
"connectors": { # map of connector configurations
"msteams": { # each connector has it's own map of config attributes
Expand Down
2 changes: 1 addition & 1 deletion cmd/goslmailer/goslmailer.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func main() {

// get job statistics based on the SLURM_JOB_ID from slurmEnv struct
// only if job is END or FAIL(?)
job.GetJobStats(log, ic.CmdParams.Subject)
job.GetJobStats(log, ic.CmdParams.Subject, cfg.Binpaths)

// generate hints based on SlurmEnv and JobStats (e.g. "too much memory requested" or "walltime << requested queue")
// only if job is END or fail(?)
Expand Down
18 changes: 18 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

type ConfigContainer struct {
Logfile string `json:"logfile"`
Binpaths map[string]string `json:"binpaths"`
DefaultConnector string `json:"defaultconnector"`
Connectors map[string]map[string]string `json:"connectors"`
QosMap map[uint64]string `json:"qosmap"`
Expand All @@ -32,6 +33,23 @@ func (cc *ConfigContainer) GetConfig(name string) error {
if err != nil {
return err
}

if cc.Binpaths == nil {
cc.Binpaths = make(map[string]string)
}

// set default paths
defaultpaths := map[string]string{
"sacct": "/usr/bin/sacct",
"sstat": "/usr/bin/sstat",
}

for key, path := range defaultpaths {
if _, exists := cc.Binpaths[key]; !exists {
cc.Binpaths[key] = path
}
}

return nil
}

Expand Down
8 changes: 4 additions & 4 deletions internal/slurmjob/getjobcontext.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (j *JobContext) IsJobFinished() bool {
}

// Get additional job statistics from external source (e.g. jobinfo or sacct)
func (j *JobContext) GetJobStats(log *log.Logger, subject string) {
func (j *JobContext) GetJobStats(log *log.Logger, subject string, paths map[string]string) {
log.Print("Start retrieving job stats")
log.Printf("%#v", j.SlurmEnvironment)
jobId := j.SlurmEnvironment.SLURM_JOBID
Expand All @@ -141,16 +141,16 @@ func (j *JobContext) GetJobStats(log *log.Logger, subject string) {
jobId = j.SlurmEnvironment.SLURM_ARRAY_JOB_ID
}
log.Printf("Fetch job info %s", jobId)
j.JobStats = *GetSacctMetrics(jobId, log)
j.JobStats = *GetSacctMetrics(jobId, log, paths)
counter := 0
for !IsJobFinished(j.JobStats.State) && j.JobStats.State != j.SlurmEnvironment.SLURM_JOB_STATE && counter < 5 {
time.Sleep(2 * time.Second)
j.JobStats = *GetSacctMetrics(jobId, log)
j.JobStats = *GetSacctMetrics(jobId, log, paths)
counter += 1
}
if j.JobStats.State == "RUNNING" {
log.Print("Update job with live stats")
updateJobStatsWithLiveData(&j.JobStats, jobId, log)
updateJobStatsWithLiveData(&j.JobStats, jobId, log, paths)
}
log.Printf("Finished retrieving job stats")
}
20 changes: 10 additions & 10 deletions internal/slurmjob/sacct.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,16 @@ func (m SacctMetrics) CalcSystemComputePercentage() float64 {
return 0.0
}

func GetSacctMetrics(jobId string, log *log.Logger) *SacctMetrics {
return ParseSacctMetrics(GetSacctData(jobId, log))
func GetSacctMetrics(jobId string, log *log.Logger, paths map[string]string) *SacctMetrics {
return ParseSacctMetrics(GetSacctData(jobId, log, paths))
}

func GetSstatMetrics(jobId string, log *log.Logger) *SstatMetrics {
return ParseSstatMetrics(GetSstatData(jobId, log))
func GetSstatMetrics(jobId string, log *log.Logger, paths map[string]string) *SstatMetrics {
return ParseSstatMetrics(GetSstatData(jobId, log, paths))
}

func updateJobStatsWithLiveData(metrics *SacctMetrics, jobId string, log *log.Logger) {
liveMetrics := GetSstatMetrics(jobId, log)
func updateJobStatsWithLiveData(metrics *SacctMetrics, jobId string, log *log.Logger, paths map[string]string) {
liveMetrics := GetSstatMetrics(jobId, log, paths)
if liveMetrics.MaxRSS > 0 {
metrics.MaxRSS = liveMetrics.MaxRSS
}
Expand All @@ -222,19 +222,19 @@ func updateJobStatsWithLiveData(metrics *SacctMetrics, jobId string, log *log.Lo
}

// Execute the saccct command and return its output
func GetSacctData(jobId string, log *log.Logger) []byte {
func GetSacctData(jobId string, log *log.Logger, paths map[string]string) []byte {
formatLine := "JobName,User,Partition,NodeList,ncpus,State,Submit,start,end,timelimit,elapsed,CPUTime,TotalCPU,UserCPU,SystemCPU,ReqMem,MaxRSS,MaxDiskWrite,MaxDiskRead,MaxRSSNode,MaxDiskWriteNode,MaxDiskReadNode,Comment"
cmd := exec.Command("/usr/bin/sacct", "-j", jobId, "-n", "-p", "--format", formatLine)
cmd := exec.Command(paths["sacct"], "-j", jobId, "-n", "-p", "--format", formatLine)
output, err := cmd.CombinedOutput()
if err != nil {
log.Fatal(output)
}
return output
}

func GetSstatData(jobId string, log *log.Logger) []byte {
func GetSstatData(jobId string, log *log.Logger, paths map[string]string) []byte {
formatLine := "JobID,MaxRSS,MaxDiskWrite,MaxDiskRead,MaxRSSNode,MaxDiskWriteNode,MaxDiskReadNode"
cmd := exec.Command("/usr/bin/sstat", "-a", "-j", jobId, "-n", "-p", "--format", formatLine)
cmd := exec.Command(paths["sstat"], "-a", "-j", jobId, "-n", "-p", "--format", formatLine)
output, err := cmd.CombinedOutput()
if err != nil {
log.Fatal(output)
Expand Down
6 changes: 5 additions & 1 deletion test_data/config_test/gobler.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"logfile": "",
"logfile": "/tmp/goslmailer.log",
"binpaths": {
"sacct":"/usr/bin/sacct",
"sstat":"/usr/bin/sstat"
},
"defaultconnector": "msteams",
"connectors": {
"msteams": {
Expand Down
6 changes: 5 additions & 1 deletion test_data/config_test/goslmailer.conf
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"logfile": "",
"logfile": "/tmp/goslmailer.log",
"binpaths": {
"sacct":"/usr/bin/sacct",
"sstat":"/usr/bin/sstat"
},
"defaultconnector": "msteams",
"connectors": {
"msteams": {
Expand Down

0 comments on commit 168e0ba

Please sign in to comment.