update scratch directory to match CSD3

cambiotraining · Jun 7, 2024 · cc4b5d6 · cc4b5d6
1 parent 6fc0b55
commit cc4b5d6
Show file tree

Hide file tree

Showing 17 changed files with 54 additions and 55 deletions.
diff --git a/course_files/slurm/drosophila_genome_indexing.sh b/course_files/slurm/drosophila_genome_indexing.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/drosophila_genome_indexing.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/estimate_pi.sh b/course_files/slurm/estimate_pi.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/estimate_pi.log  # standard output file
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/parallel_arrays.sh b/course_files/slurm/parallel_arrays.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/parallel_arrays_%a.log
 #SBATCH -c 2        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/parallel_drosophila_mapping.sh b/course_files/slurm/parallel_drosophila_mapping.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/drosophila_mapping_%a.log
 #SBATCH -c 2         # number of CPUs. Default: 1
 #SBATCH --mem=1G     # RAM memory. Default: 1G

diff --git a/course_files/slurm/parallel_estimate_pi.sh b/course_files/slurm/parallel_estimate_pi.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/parallel_estimate_pi_%a.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/parallel_turing_pattern.sh b/course_files/slurm/parallel_turing_pattern.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/turing_pattern_%a.log
 #SBATCH -c 1         # number of CPUs. Default: 1
 #SBATCH --mem=1G     # RAM memory. Default: 1G

diff --git a/course_files/slurm/plot_sir.sh b/course_files/slurm/plot_sir.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/plot_sir_%a.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/seqkit_singularity.sh b/course_files/slurm/seqkit_singularity.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/seqkit.log  # standard output file
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/course_files/slurm/simulate_sir.sh b/course_files/slurm/simulate_sir.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/FIX-YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/simulate_sir_%a.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/materials/02-ssh.md b/materials/02-ssh.md
@@ -106,11 +106,11 @@ After registering for a HPC account, you were sent the following information by
 > - Password: emailed separately
 > - Host: `train.bio`
 > 
-> You were automatically allocated 40GB in `/home/USERNAME/` and 1TB in `/scratch/USERNAME/`. 
+> You were automatically allocated 40GB in `/home/USERNAME/` (backed storage) and 1TB in `/home/USERNAME/rds/hpc-work` (non-backed high-performance "scratch" space for computations). 
 
 1. Connect to the training HPC using `ssh`. (Note: when you type your password, nothing shows on the screen - that's normal, the password is still being input.)
 2. Take some time to explore your home directory to identify what files and folders are in there. 
-Can you identify and navigate to your scratch directory?
+Can you identify and navigate to your high-performance compute directory?
 3. Use the commands `free -h` (available RAM memory) and `nproc --all` (number of CPU cores available) to check the capabilities of the login node of our HPC. Check how many people are logged in to the HPC login node using the command `who`.
 
 :::{.callout-answer}
@@ -136,14 +136,13 @@ We can get a detailed list of the files on our home directory:
 ls -l
 ```
 
-This will reveal that there is a shell script (`.sh` extension) named `slurm_submit_template.sh` and also a shortcut to our scratch directory. 
-We can see that this is a shortcut because of the way the output is printed as `scratch -> /scratch/username/`. 
+This will reveal that there is a shell script (`.sh` extension) named `slurm_submit_template.sh` and also a shortcut to our scratch directory, in this case `/home/username/rds/`. 
+Within that directory, we have another one named `hpc-work`, which is the default high-performance storage we were given on this cluster. 
 
-Therefore, to navigate to our scratch directory we can either use the shortcut from our home or use the full path:
+Therefore, to navigate to our scratch directory we can do:
 
 ```bash
-cd ~/scratch       # using the shortcut from the home directory
-cd /scratch/USERNAME/  # using the full path
+cd ~/rds/hpc-work
 ```
 
 Remember that `~` indicates your home directory, which in Linux filesystems is `/home/USERNAME/`.
@@ -220,7 +219,7 @@ Note that because we saved our file with `.sh` extension (the conventional exten
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 1. Create a new script file called `check_hostname.sh`. Copy the code shown below into this script and save it.
 1. From the terminal, run the script using `bash`.
@@ -339,8 +338,8 @@ If you haven't already done so, connect your VS Code to the HPC following the in
 **A1.**
 
 To open the folder we follow the instructions in @sec-vscode (steps 10 and 11) and use the following path:
-`/scratch/user/hpc_workshop`
-(replacing "user" with your username)
+`/home/username/rds/hpc-work/hpc_workshop`
+(replacing "username" with your username)
 
 **A2.**
 

diff --git a/materials/03-slurm.md b/materials/03-slurm.md
@@ -47,7 +47,7 @@ echo "This job is running on:"
 hostname
 ```
 
-We can run this script from the login node using the `bash` interpreter (make sure you are in the correct directory first: `cd ~/scratch/hpc_workshop/`): 
+We can run this script from the login node using the `bash` interpreter (make sure you are in the correct directory first: `cd ~/rds/hpc-work/hpc_workshop/`): 
 
 ```bash
 bash slurm/simple_job.sh
@@ -249,7 +249,7 @@ For example, let's say that we would like to keep our job output files in a fold
 For the example above, we might set these #SBATCH options:
 
 ```bash
-#SBATCH -D /home/YOUR-USERNAME/scratch/hpc_workshop/
+#SBATCH -D /home/YOUR-USERNAME/rds/hpc-work/hpc_workshop/
 #SBATCH -o logs/simple_job.log
 ```
 
@@ -258,13 +258,13 @@ But, unless we create the `logs/` directory _before running the job_, `sbatch` w
 Another thing to note is that you should not use the `~` home directory shortcut with the `-D` option. For example:
 
 ```bash
-#SBATCH -D ~/scratch/hpc_workshop/
+#SBATCH -D ~/rds/hpc-work/hpc_workshop/
 ```
 
 Will not work, instead you should use the full path, for example:
 
 ```bash
-#SBATCH -D /home/YOUR-USERNAME/scratch/hpc_workshop/
+#SBATCH -D /home/YOUR-USERNAME/rds/hpc-work/hpc_workshop/
 ```
 
 :::
@@ -274,7 +274,7 @@ Will not work, instead you should use the full path, for example:
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 In the "scripts" directory, you will find an R script called `pi_estimator.R`. 
 This script tries to get an approximate estimate for the number Pi using a stochastic algorithm. 
@@ -341,7 +341,7 @@ The modified script should look similar to this:
 ```bash
 #!/bin/bash
 #SBATCH -p training 
-#SBATCH -D /home/USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/estimate_pi_50M.log  # standard output file
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH -t 00:10:00 # time for the job HH:MM:SS.
@@ -398,7 +398,7 @@ Try these examples:
 
 ```shell
 # Make a variable with a path starting from the user's /home
-DATADIR="$HOME/scratch/data/"
+DATADIR="$HOME/rds/hpc-work/data/"
 
 # list files in that directory
 ls $DATADIR
@@ -421,14 +421,14 @@ Here is a table summarising some of the most useful environment variables that S
 | `$SLURM_JOB_ID` | The job ID | 
 | `$SLURM_JOB_NAME` | The name of the job defined with `-J` |
 | `$SLURM_SUBMIT_DIR` | The working directory defied with `-D` |
-| `$SLURM_ARRAY_TASK_ID` | The number of the sub-job when running parallel arrays (covered in the [Job Arrays](05-job_arrays.html) section) |
+| `$SLURM_ARRAY_TASK_ID` | The number of the sub-job when running parallel arrays (covered in the [Job Arrays](05-arrays.md) section) |
 
 
 ### Exercise: SLURM environment variables
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 The R script used in the previous exercise supports parallelisation of some of its internal computations. 
 The number of CPUs used by the script can be modified using the `--ncpus` option. 
@@ -451,7 +451,7 @@ We can modify our submission script in the following manner, for example for usi
 ```bash
 #!/bin/bash
 #SBATCH -p traininglarge     # partiton name
-#SBATCH -D /home/USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/estimate_pi_200M.log      # output file
 #SBATCH --mem=10G
 #SBATCH -c 2                          # number of CPUs

diff --git a/materials/04-software.md b/materials/04-software.md
@@ -46,22 +46,22 @@ Please ask your administrator.
 But if we load the software first, then the command works: 
 
 ```bash
-module load bowtie2
+module load bowtie/2.5.0
 bowtie2 --version
 ```
 
 ```
-/scratch/applications/bowtie2/bowtie2-2.4.5-linux-x86_64/bowtie2-align-s version 2.4.5
+/usr/local/Cluster-Apps/bowtie/2.5.0/bowtie2-align-s version 2.5.0
 64-bit
-Built on 51df6955ec49
-Mon Jan 17 00:22:22 UTC 2022
-Compiler: gcc version 8.3.1 20190311 (Red Hat 8.3.1-3) (GCC)
-Options: -O3 -msse2 -funroll-loops -g3 -g -O2 -fvisibility=hidden -I/hbb_exe_gc_hardened/include -ffunction-sections -fdata-sections -fstack-protector -D_FORTIFY_SOURCE=2 -fPIE -std=c++11 -DPOPCNT_CAPABILITY -DNO_SPINLOCK -DWITH_QUEUELOCK=1
+Built on login-e-12
+Mon 14 Nov 12:11:12 UTC 2022
+Compiler: gcc version 4.8.5 20150623 (Red Hat 4.8.5-44) (GCC)
+Options: -O3 -msse2 -funroll-loops -g3 -std=c++11 -DPOPCNT_CAPABILITY -DNO_SPINLOCK -DWITH_QUEUELOCK=1
 Sizeof {int, long, long long, void*, size_t, off_t}: {4, 8, 8, 8, 8, 8}
 ```
 
 If you `echo $PATH`, you will notice the installer has been added to your PATH variable (the environment variable that tells the shell where to find programs to run). 
-Once you run `module unload bowtie2`, and then `echo $PATH` again, you notice the PATH variable will have been modified. 
+Once you run `module unload bowtie2/2.5.0`, and then `echo $PATH` again, you notice the PATH variable will have been modified. 
 This is how the _Environment Modules_ package makes software available for you to use. 
 
 If a package is not available through the `module` command, your only option is to contact the HPC admin and ask them to install it for you. 
@@ -283,7 +283,7 @@ Running the `source` command shown will ensure `mamba activate` becomes availabl
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 In the `data` folder, you will find some files resulting from whole-genome sequencing individuals from the model organism _Drosophila melanogaster_ (fruit fly). 
 Our objective will be to align our sequences to the reference genome, using a software called _bowtie2_.
@@ -340,7 +340,7 @@ Then, if we run `bowtie2 --help`, we should get the software help printed on the
 We need to fix the script to specify the correct working directory with our username (only showing the relevant line of the script):
 
 ```
-#SBATCH -D /home/USERNAME/scratch/hpc_workshop
+#SBATCH -D /home/USERNAME/rds/hpc-work/hpc_workshop
 ```
 
 Replacing "USERNAME" with your username. 
@@ -456,7 +456,7 @@ Once we have this image available, we are ready to run the software, which will
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 To illustrate the use of Singularity, we will use the `seqkit` software to extract some basic statistics from the sequencing files in the `data/drosophila` directory. 
 If you haven't done so already, first download the container image with the commands shown above. 
@@ -497,7 +497,7 @@ Instead, we can modify the SLURM submission script to include this command insid
 ```bash
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /home/YOUR-USERNAME/scratch/hpc_workshop/  # working directory
+#SBATCH -D /home/YOUR-USERNAME/rds/hpc-work/hpc_workshop/  # working directory
 #SBATCH -o logs/seqkit.log  # standard output file
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/materials/05-arrays.md b/materials/05-arrays.md
@@ -85,7 +85,7 @@ Here are some examples taken from SLURM's Job Array Documentation:
 
 :::{.callout-exercise}
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 Previously, we used the `pi_estimator.R` script to obtain a single estimate of the number Pi. 
 Since this is done using a stochastic algorithm, we may want to run it several times to get a sense of the error associated with our estimate.
@@ -197,7 +197,7 @@ You can choose one of the two to start with (whichever one suits your work bette
 
 #### Bioinformatics
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 Continuing from our previous exercise where we [prepared our _Drosophila_ genome for bowtie2](04-software.html#Loading_Conda_Environments), we now want to map each of our samples' sequence data to the reference genome.
 
@@ -249,7 +249,7 @@ In a typical bioinformatics workflow these files would be used for further analy
 
 #### Simulation
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 A PhD student is working on project to understand how different patterns, such as animal stripes and coral colonies, form in nature. 
 They are using a type of model, first proposed by [Alan Turing](https://en.wikipedia.org/wiki/Turing_pattern), which models the interaction between two components that can difuse in space and promote/inhibit each other.

diff --git a/materials/06-dependencies.md b/materials/06-dependencies.md
@@ -42,7 +42,7 @@ We will give examples of `afterok`, `afternotok` and `singleton`, which are comm
 :::{.callout-note}
 **Dependencies and Arrays**
 
-The job dependency feature can be combined with [job arrays](05-job_arrays.md) to automate the running of parallel jobs as well as launching downstream jobs that depend on the output of other jobs.
+The job dependency feature can be combined with [job arrays](05-arrays.md) to automate the running of parallel jobs as well as launching downstream jobs that depend on the output of other jobs.
 :::
 
 ## Successful Run: `afterok`
@@ -174,7 +174,7 @@ Furthermore, to the `tast3.sh` script we add `--dependency singleton`, to indica
 :::{.callout-exercise}
 #### Dependencies & Arrays
 
-Make sure you are in the workshop folder (`cd ~/scratch/hpc_workshop`).
+Make sure you are in the workshop folder (`cd ~/rds/hpc-work/hpc_workshop`).
 
 In this exercise we'll use a new script that runs a stochastic simulation of the classic epidemiological model known as SIR (Susceptible, Infectious, or Recovered). 
 
@@ -227,7 +227,7 @@ Here is the full script:
 ```bash
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /scratch/YOUR_USERNAME/hpc_workshop
+#SBATCH -D /home/YOUR_USERNAME/rds/hpc-work/hpc_workshop
 #SBATCH -o logs/plot_sir.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G
@@ -269,7 +269,7 @@ Then, we could create a new submission script with the following:
 ```bash
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /scratch/FIXME/hpc_workshop
+#SBATCH -D /home/USERNAME/rds/hpc-work/hpc_workshop
 #SBATCH -o logs/combine_pi_results.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G
@@ -293,7 +293,7 @@ Let's say that the script to combine the results was called `combine_pi.sh`, wit
 ```bash
 #!/bin/bash
 #SBATCH -p training  # name of the partition to run job on
-#SBATCH -D /scratch/FIXME/hpc_workshop
+#SBATCH -D /home/USERNAME/rds/hpc-work/hpc_workshop
 #SBATCH -o logs/combine_pi_results.log
 #SBATCH -c 1        # number of CPUs. Default: 1
 #SBATCH --mem=1G    # RAM memory. Default: 1G

diff --git a/materials/07-files.md b/materials/07-files.md
@@ -97,7 +97,7 @@ To check what files `rsync` would transfer but not actually transfer them, add t
 **Note:** only do this exercise if you are following the materials by yourself as a self-learner. For those attending our live workshop we already put the materials on the training HPC.
 
 - <a href="https://www.dropbox.com/sh/mcu1hjxlr8yqxxa/AAB8s5NcHZH1Tkof4B5JXuyLa?dl=0" target="_blank" rel="noopener noreferrer">Download the data</a> for this course to your computer and place it on your Desktop. (do not unzip the file yet!)
-- Use _Filezilla_, `scp` or `rsync` (your choice) to move this file to the directory we created earlier: `/scratch/user/hpc_workshop/`. 
+- Use _Filezilla_, `scp` or `rsync` (your choice) to move this file to the directory we created earlier: `/home/USERNAME/rds/hpc-work/hpc_workshop/`. 
 - The file we just downloaded is a compressed file. From the HPC terminal, use `unzip` to decompress the file.
 - Bonus: how many shell scripts (files with `.sh` extension) are there in your project folder? 
 
@@ -110,18 +110,18 @@ Notice that these commands are **run from your local terminal**:
 
 ```bash
 # with scp
-scp -r ~/Desktop/hpc_workshop_files.zip username@train.bio:scratch/hpc_workshop/
+scp -r ~/Desktop/hpc_workshop_files.zip username@train.bio:rds/hpc-work/hpc_workshop/
 
 # with rsync
-rsync -avhu ~/Desktop/hpc_workshop_files.zip username@train.bio:scratch/hpc_workshop/
+rsync -avhu ~/Desktop/hpc_workshop_files.zip username@train.bio:rds/hpc-work/hpc_workshop/
 ```
 
 Once we finish transfering the files we can go ahead and decompress the data folder. 
 Note, this is now run **from the HPC terminal**:
 
 ```bash
 # make sure to be in the correct directory
-cd ~/scratch/hpc_workshop/
+cd ~/rds/hpc-work/hpc_workshop/
 
 # decompress the files
 unzip hpc_workshop_files.zip