-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathslurm.sh
executable file
·194 lines (157 loc) · 5.77 KB
/
slurm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#! /bin/bash
# Submits job to a Slurm job scheduler.
# (see https://slurm.schedmd.com/sbatch.html)
# SCRIPT DIRECTORY
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# DEFAULT PARAMETERS
# We are concerned here with the `fawcett' computer of the Faculty of
# Mathematics at the University of Cambridge.
# (see https://www.maths.cam.ac.uk/computing/faculty-hpc-system-fawcett)
_SIM_DIR=${SCRIPT_DIR}/build # default simulation directory
_ERROR_DIR=${_SIM_DIR}/slurm_error # default error output directory
_OUT_FILE=/dev/null # standard output file
_PARTITION=skylake # default partition for the ressource allocation
_GRES= # default generic consumable ressources
_NODES=1 # default required number of nodes
_NTASKS=1 # default number of MPI ranks running per node
_ARRAY_SIZE= # default job array size
_ARRAY_TASKS= # default maximum number of simultaneous tasks in the array
_TIME= # default required time
_MEMORY= # default real memory required per node
# HELP MENU
usage() {
less <<< "Submit job to a Slurm job scheduler.
(see https://slurm.schedmd.com/sbatch.html)
SYNOPSIS
[bash] slurm.sh [OPTIONS] [ENVIRONMENT VARIABLES] [SCRIPT]
OPTIONS
-h Display this help.
-w Pause this script until completion of the job.
-j Job name on Slurm scheduler.
DEFAULT: script name after last '/'
-c Execute after job with this ID has succesfully executed.
DEFAULT: (not specified)
-d Directory in which to execute the job.
DEFAULT: _SIM_DIR
-o Error output directory.
NOTE: Error files are named according to job ID.
DEFAULT: _ERROR_DIR
-f Standard output file.
DEFAULT: _OUT_FILE
-p Partition for the resource allocation.
DEFAULT: _PARTITION
-g Generic consumable resources.
DEFAULT: _GRES
-n Required number of nodes.
DEFAULT: _NODES
-r Number of MPI ranks running per node.
Number of threads for OpenMP parallelised jobs.
DEFAULT: _NTASKS
-a Job array size.
(see https://slurm.schedmd.com/job_array.html)
NOTE: SLURM_ARRAY_TASK_ID is set as task id (between 0 and size - 1).
DEFAULT: _ARRAY_SIZE
-s Maximum number of simultaneously running tasks in the job array.
(see https://slurm.schedmd.com/job_array.html)
NOTE: An empty string will not set this maximum.
DEFAULT: _ARRAY_TASKS
-t Required time.
DEFAULT: _TIME
-m Real memory required per node.
NOTE: MaxMemPerNode allocates maximum memory.
DEFAULT: _MEMORY
"
}
# OPTIONS
while getopts "hwj:c:d:o:f:p:g:n:r:a:s:t:m:" OPTION; do
case $OPTION in
h) # help menu
usage; exit 0;;
w) # wait
WAIT=true;;
j) # job name
JOB_NAME=$OPTARG;;
c) # chained job
CHAIN=$OPTARG;;
d) # simulation directory
SIM_DIR=$OPTARG;;
o) # error output directory
ERROR_DIR=$OPTARG;;
f) # standard output file
OUT_FILE=$OPTARG;;
p) # partition
PARTITION=$OPTARG;;
g) # generic consumable resources
GRES=$OPTARG;;
n) # nodes
NODES=$OPTARG;;
r) # taks
NTASKS=$OPTARG;;
a) # array size
ARRAY_SIZE=$OPTARG;;
s) # array tasks
ARRAY_TASKS=$OPTARG;;
t) # time
TIME=$OPTARG;;
m) # real memory
MEMORY=$OPTARG;;
esac
done
shift $(expr $OPTIND - 1);
if [[ -z "$@" ]]; then
echo 'No script submitted.';
usage;
exit 1;
fi
SCRIPT=$@ # script to execute
# JOB PARAMETERS
JOB_NAME=${JOB_NAME-${SCRIPT##*/}} # job name
SIM_DIR=${SIM_DIR-$_SIM_DIR}; mkdir -p "$SIM_DIR"; # simulation directory
ERROR_DIR=${ERROR_DIR-$_ERROR_DIR}; mkdir -p "$ERROR_DIR"; # error output directory
OUT_FILE=${OUT_FILE-$_OUT_FILE} # standard output file
PARTITION=${PARTITION-$_PARTITION} # partition for the resource allocation
GRES=${GRES-$_GRES} # generic consumable resources
NODES=${NODES-$_NODES} # required number of nodes
NTASKS=${NTASKS-$_NTASKS} # maximum ntasks to be invoked on each core
ARRAY_SIZE=${ARRAY_SIZE-$_ARRAY_SIZE} # job array size
ARRAY_TASKS=${ARRAY_TASKS-$_ARRAY_TASKS} # maximum number of simultaneous tasks in the array
TIME=${TIME-$_TIME} # required time
MEMORY=${MEMORY-$_MEMORY} # real memory required per node
# SUBMIT JOB
sbatch ${WAIT:+-W} ${CHAIN:+-d afterok:$CHAIN} <<EOF
#!/bin/bash
#SBATCH --job-name='$JOB_NAME'
#SBATCH --chdir=$SIM_DIR
#SBATCH --error=${ERROR_DIR}/%j.out
#SBATCH --output=$OUT_FILE
#SBATCH --partition=$PARTITION
#SBATCH --gres=$GRES
#SBATCH --nodes=$NODES
#SBATCH --ntasks-per-node=$NTASKS
${ARRAY_SIZE:+#SBATCH --array=0-$(($ARRAY_SIZE-1))${ARRAY_TASKS+%$ARRAY_TASKS}}
${TIME:+#SBATCH --time=$TIME}
${MEMORY:+#SBATCH --mem=$MEMORY}
export OMP_NUM_THREADS=$NTASKS
# PRINT JOB PARAMETERS TO ERROR OUTPUT FILE
(>&2 printf '%-21s: %s\n' 'SUBMIT DIRECTORY' '$(pwd)')
(>&2 printf '%-21s: %s\n' 'DATE' '$(date)')
(>&2 echo)
(>&2 printf '%-21s: %s\n' 'JOB NAME' '$JOB_NAME')
(>&2 echo)
(>&2 printf '%-21s: %s\n' 'SIMULATION DIRECTORY' '$SIM_DIR')
(>&2 printf '%-21s: %s\n' 'OUTPUT FILE' '$OUT_FILE')
(>&2 echo)
(>&2 printf '%-21s: %s\n' 'PARTITION' '$PARTITION')
(>&2 printf '%-21s: %s\n' 'GRES' '$GRES')
(>&2 printf '%-21s: %s\n' 'NODES REQUIRED' '$NODES')
(>&2 printf '%-21s: %s\n' 'TASKS PER NODE' '$NTASKS')
(>&2 printf '%-21s: %s\n' 'ARRAY SIZE' '$ARRAY_SIZE')
(>&2 printf '%-21s: %s\n' 'TASKS IN ARRAY' '$ARRAY_TASKS')
(>&2 printf '%-21s: %s\n' 'TIME REQUIRED' '$TIME')
(>&2 printf '%-21s: %s\n' 'MEMORY REQUIRED' '$MEMORY')
(>&2 echo)
(>&2 printf '%-21s: %s\n' 'SCRIPT' '$SCRIPT')
(>&2 echo)
$SCRIPT # launching script
EOF
${WAIT:+wait} # wait until completion of the job