-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathadmin.sh
executable file
·73 lines (68 loc) · 2.78 KB
/
admin.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
source .env
DOCKER_ID=$(docker ps | grep msnovelist6 | cut -d' ' -f1)
echo DOCKER $DOCKER_ID
case $1 in
## the options below are for building, running and debugging the docker image
aptitude) ## install aptitude on the running docker container
docker exec -uroot $DOCKER_ID apt-get update
docker exec -uroot $DOCKER_ID apt-get install -y aptitude
;;
root) ## enter the running docker container as root
docker exec -uroot -it $DOCKER_ID bash
;;
build) ## build the Docker container
docker build -f Dockerfile.cuda . -t stravsm/msnovelist6
;;
push) ## push the Docker container
docker push stravsm/msnovelist6
;;
run) ## run the Docker container with webui and mounts from .env
docker run -d \
-v $DATA_LOC:/sirius6_db \
-v $PWD:/msnovelist \
-v $DATA_LOC:/target \
stravsm/msnovelist6 \
webui.sh
;;
kill) ## kill the running docker container
docker kill $DOCKER_ID
;;
## the options below are for running on SLURM and singularity
singularity-build) ## build singularity container
# requires a stravsm/msnovelist6 container on the docker registry,
# as singularity doesn't build Dockerfiles by itself
SINGULARITY_CACHEDIR=$SCRATCH_PATH/singularity_cache singularity build \
$SCRATCH_PATH/MSNovelist-image/msnovelist.sif docker://stravsm/msnovelist6$
;;
tail-train) ## find currently running training job, dial in and follow
JOBID=$(squeue -o "%i %j" | grep -F "run_train.sh" | cut -f1 -d' ')
srun --interactive --jobid $JOBID --pty bash -c "$PWD/admin.sh tail-gpu"
;;
tail-eval) ## find currently running training job, dial in and follow
JOBID=$(squeue -o "%i %j" | grep -F "run_evaluation.sh" | cut -f1 -d' ')
srun --interactive --jobid $JOBID --pty bash -c "$PWD/admin.sh tail-cpu"
;;
tail-gpu) ## find currently running training process and follow
PYPID=$(nvidia-smi --query-compute-apps="pid" --format=csv,noheader)
while [ ! -f "/proc/$PYPID/fd/1" ];
do
PYPID=$(nvidia-smi --query-compute-apps="pid" --format=csv,noheader)
sleep 1
done
tail -f /proc/$PYPID/fd/1
;;
tail-cpu)
PYPID=$( pgrep -u$(id -u) -a python | grep evaluation | cut -f1 -d' ' )
while [ ! -f "/proc/$PYPID/fd/1" ];
do
PYPID=$( pgrep -u$(id -u) -a python | grep evaluation | cut -f1 -d' ' )
sleep 1
done
tail -f /proc/$PYPID/fd/1
;;
bash-vs)
JOBID=$(squeue -o "%i %j" | grep -F "spawner-jupyterhub" | cut -f1 -d' ' )
srun --interactive --jobid $JOBID --pty bash
;;
esac