diff --git a/.gitignore b/.gitignore index 130d93e..422b6ec 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,6 @@ slides notes # specific files from course_materials that we do not version control -course_files/data -course_files/logs -course_files/results \ No newline at end of file +course_files/**/data +course_files/**/logs +course_files/**/results \ No newline at end of file diff --git a/course_files/dependency/notok/submit_notok.sh b/course_files/aws/dependency/notok/submit_notok.sh similarity index 100% rename from course_files/dependency/notok/submit_notok.sh rename to course_files/aws/dependency/notok/submit_notok.sh diff --git a/course_files/dependency/notok/task_with_checkpoints.sh b/course_files/aws/dependency/notok/task_with_checkpoints.sh similarity index 100% rename from course_files/dependency/notok/task_with_checkpoints.sh rename to course_files/aws/dependency/notok/task_with_checkpoints.sh diff --git a/course_files/dependency/ok/submit_ok.sh b/course_files/aws/dependency/ok/submit_ok.sh similarity index 100% rename from course_files/dependency/ok/submit_ok.sh rename to course_files/aws/dependency/ok/submit_ok.sh diff --git a/course_files/dependency/ok/task1.sh b/course_files/aws/dependency/ok/task1.sh similarity index 100% rename from course_files/dependency/ok/task1.sh rename to course_files/aws/dependency/ok/task1.sh diff --git a/course_files/dependency/ok/task2.sh b/course_files/aws/dependency/ok/task2.sh similarity index 100% rename from course_files/dependency/ok/task2.sh rename to course_files/aws/dependency/ok/task2.sh diff --git a/course_files/dependency/singleton/submit_singleton.sh b/course_files/aws/dependency/singleton/submit_singleton.sh similarity index 100% rename from course_files/dependency/singleton/submit_singleton.sh rename to course_files/aws/dependency/singleton/submit_singleton.sh diff --git a/course_files/dependency/singleton/task1_singleton.sh b/course_files/aws/dependency/singleton/task1_singleton.sh similarity index 100% rename from course_files/dependency/singleton/task1_singleton.sh rename to course_files/aws/dependency/singleton/task1_singleton.sh diff --git a/course_files/dependency/singleton/task2_singleton.sh b/course_files/aws/dependency/singleton/task2_singleton.sh similarity index 100% rename from course_files/dependency/singleton/task2_singleton.sh rename to course_files/aws/dependency/singleton/task2_singleton.sh diff --git a/course_files/dependency/singleton/task3_singleton.sh b/course_files/aws/dependency/singleton/task3_singleton.sh similarity index 100% rename from course_files/dependency/singleton/task3_singleton.sh rename to course_files/aws/dependency/singleton/task3_singleton.sh diff --git a/course_files/scripts/pi_estimator.R b/course_files/aws/scripts/pi_estimator.R similarity index 100% rename from course_files/scripts/pi_estimator.R rename to course_files/aws/scripts/pi_estimator.R diff --git a/course_files/scripts/sir_plotter.py b/course_files/aws/scripts/sir_plotter.py similarity index 100% rename from course_files/scripts/sir_plotter.py rename to course_files/aws/scripts/sir_plotter.py diff --git a/course_files/scripts/sir_simulator.py b/course_files/aws/scripts/sir_simulator.py similarity index 100% rename from course_files/scripts/sir_simulator.py rename to course_files/aws/scripts/sir_simulator.py diff --git a/course_files/scripts/turing_pattern.py b/course_files/aws/scripts/turing_pattern.py similarity index 100% rename from course_files/scripts/turing_pattern.py rename to course_files/aws/scripts/turing_pattern.py diff --git a/course_files/slurm/drosophila_genome_indexing.sh b/course_files/aws/slurm/drosophila_genome_indexing.sh similarity index 100% rename from course_files/slurm/drosophila_genome_indexing.sh rename to course_files/aws/slurm/drosophila_genome_indexing.sh diff --git a/course_files/slurm/estimate_pi.sh b/course_files/aws/slurm/estimate_pi.sh similarity index 100% rename from course_files/slurm/estimate_pi.sh rename to course_files/aws/slurm/estimate_pi.sh diff --git a/course_files/slurm/parallel_arrays.sh b/course_files/aws/slurm/parallel_arrays.sh similarity index 100% rename from course_files/slurm/parallel_arrays.sh rename to course_files/aws/slurm/parallel_arrays.sh diff --git a/course_files/slurm/parallel_drosophila_mapping.sh b/course_files/aws/slurm/parallel_drosophila_mapping.sh similarity index 100% rename from course_files/slurm/parallel_drosophila_mapping.sh rename to course_files/aws/slurm/parallel_drosophila_mapping.sh diff --git a/course_files/slurm/parallel_estimate_pi.sh b/course_files/aws/slurm/parallel_estimate_pi.sh similarity index 100% rename from course_files/slurm/parallel_estimate_pi.sh rename to course_files/aws/slurm/parallel_estimate_pi.sh diff --git a/course_files/slurm/parallel_turing_pattern.sh b/course_files/aws/slurm/parallel_turing_pattern.sh similarity index 100% rename from course_files/slurm/parallel_turing_pattern.sh rename to course_files/aws/slurm/parallel_turing_pattern.sh diff --git a/course_files/slurm/plot_sir.sh b/course_files/aws/slurm/plot_sir.sh similarity index 100% rename from course_files/slurm/plot_sir.sh rename to course_files/aws/slurm/plot_sir.sh diff --git a/course_files/slurm/seqkit_singularity.sh b/course_files/aws/slurm/seqkit_singularity.sh similarity index 100% rename from course_files/slurm/seqkit_singularity.sh rename to course_files/aws/slurm/seqkit_singularity.sh diff --git a/course_files/slurm/simple_job.sh b/course_files/aws/slurm/simple_job.sh similarity index 100% rename from course_files/slurm/simple_job.sh rename to course_files/aws/slurm/simple_job.sh diff --git a/course_files/slurm/simulate_sir.sh b/course_files/aws/slurm/simulate_sir.sh similarity index 100% rename from course_files/slurm/simulate_sir.sh rename to course_files/aws/slurm/simulate_sir.sh diff --git a/course_files/csd3/dependency/notok/submit_notok.sh b/course_files/csd3/dependency/notok/submit_notok.sh new file mode 100644 index 0000000..d7c1b12 --- /dev/null +++ b/course_files/csd3/dependency/notok/submit_notok.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# This is not submitted to SLURM! +# These are the sbatch commands we are using to submit our jobs + +# first submission +run1_id=$(sbatch --parsable task_with_checkpoints.sh) + +# second submission in case the first one fails +run2_id=$(sbatch --parsable --dependency afternotok:${run1_id} task_with_checkpoints.sh) + +# submit a third time in case the second fails +run3_id=$(sbatch --parsable --dependency afternotok:${run2_id} task_with_checkpoints.sh) + +# etc... we could continue submitting more +# but it's probably good to stop after a while +# and check if our job finally completed or not diff --git a/course_files/csd3/dependency/notok/task_with_checkpoints.sh b/course_files/csd3/dependency/notok/task_with_checkpoints.sh new file mode 100644 index 0000000..04a9ad1 --- /dev/null +++ b/course_files/csd3/dependency/notok/task_with_checkpoints.sh @@ -0,0 +1,45 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -o logs/task_with_checkpoints_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min + +# output file name +checkpoint="checkpoint.txt" +finalresult="long_task_result.txt" + +# the code below this is a bit silly and you don't need to worry about its details +# we are simply incrementing a number by 1 every 15 seconds +# when that number reaches 10, we consider the job finished +# at each stage the current number is saved in the checkpoint file +# so if the job fails we resume it from that point + +#### incrementer-with-checkpoint #### + +# Check if checkpoint file exists +if [ -f "$checkpoint" ]; then + # if does, read the number from the file + number=$(<"$checkpoint") +else + # if it doesn't, initiate it + number=0 +fi + +# loop through every 15 seconds +for i in $(seq $(( $number + 1)) 10) +do + # increment after 15 seconds + sleep 15 + ((number++)) + + # write to checkpoint + echo "$number" > "$checkpoint" +done + +# result +echo "Congratulations, you have counted to 10." > "$finalresult" + +# message to log file +echo "Job complete, removing checkpoint.txt file." +rm $checkpoint diff --git a/course_files/csd3/dependency/ok/submit_ok.sh b/course_files/csd3/dependency/ok/submit_ok.sh new file mode 100644 index 0000000..a28f86e --- /dev/null +++ b/course_files/csd3/dependency/ok/submit_ok.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# This is not submitted to SLURM! +# These are the sbatch commands we are using to submit our jobs + +# first task of our pipeline +run1_id=$(sbatch --parsable task1.sh) + +# second task of our pipeline - only runs if the previous was successful +sbatch --dependency afterok:${run1_id} task2.sh diff --git a/course_files/csd3/dependency/ok/task1.sh b/course_files/csd3/dependency/ok/task1.sh new file mode 100644 index 0000000..aa15e63 --- /dev/null +++ b/course_files/csd3/dependency/ok/task1.sh @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -o logs/task1_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:02:00 # time for the job HH:MM:SS. Default: 1 min + +# sleep for 60 seconds (to have time to see the job in the queue) +sleep 60 + +# create an example file +touchh output_task1.txt diff --git a/course_files/csd3/dependency/ok/task2.sh b/course_files/csd3/dependency/ok/task2.sh new file mode 100644 index 0000000..cb7ac04 --- /dev/null +++ b/course_files/csd3/dependency/ok/task2.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -o logs/task2_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min + +# sleep for 10 seconds (to have time to see the job in the queue) +sleep 10 + +# rename file from previous task +# which requires task1 to have completed successfully +mv output_task1.txt output_task2.txt diff --git a/course_files/csd3/dependency/singleton/submit_singleton.sh b/course_files/csd3/dependency/singleton/submit_singleton.sh new file mode 100644 index 0000000..d23a7dd --- /dev/null +++ b/course_files/csd3/dependency/singleton/submit_singleton.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# This is not submitted to SLURM! +# These are the sbatch commands we are using to submit our jobs + +# first two tasks of our pipeline - none have dependencies +sbatch -J my_pipeline task1_singleton.sh +sbatch -J my_pipeline task2_singleton.sh + +# the third task requires all previous ones with the same "job name" to have finished +sbatch -J my_pipeline --dependency singleton task3_singleton.sh diff --git a/course_files/csd3/dependency/singleton/task1_singleton.sh b/course_files/csd3/dependency/singleton/task1_singleton.sh new file mode 100644 index 0000000..de4e491 --- /dev/null +++ b/course_files/csd3/dependency/singleton/task1_singleton.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -J my_pipeline # name for the job +#SBATCH -o logs/task1_singleton_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min + +# sleep for 10 seconds (to have time to see the job in the queue) +sleep 10 + +# create a file +echo "Output from task1" > result_task1.txt diff --git a/course_files/csd3/dependency/singleton/task2_singleton.sh b/course_files/csd3/dependency/singleton/task2_singleton.sh new file mode 100644 index 0000000..67e63fa --- /dev/null +++ b/course_files/csd3/dependency/singleton/task2_singleton.sh @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -J my_pipeline # name for the job +#SBATCH -o logs/task2_singleton_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min + +# sleep for 10 seconds (to have time to see the job in the queue) +sleep 10 + +# create a file +echo "Output from task2" > result_task2.txt diff --git a/course_files/csd3/dependency/singleton/task3_singleton.sh b/course_files/csd3/dependency/singleton/task3_singleton.sh new file mode 100644 index 0000000..3dfb799 --- /dev/null +++ b/course_files/csd3/dependency/singleton/task3_singleton.sh @@ -0,0 +1,14 @@ +#!/bin/bash +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -J my_pipeline # name for the job +#SBATCH -o logs/task3_singleton_%j.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem 1G # RAM memory. Default: 1G +#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min +#SBATCH --dependency singleton + +# sleep for 10 seconds (to have time to see the job in the queue) +sleep 10 + +# concatenate previous two files into one +cat result_task1.txt result_task2.txt > result_task3.txt diff --git a/course_files/csd3/scripts/pi_estimator.R b/course_files/csd3/scripts/pi_estimator.R new file mode 100644 index 0000000..3b63879 --- /dev/null +++ b/course_files/csd3/scripts/pi_estimator.R @@ -0,0 +1,65 @@ + + +# User Arguments ---------------------------------------------------------- + +suppressPackageStartupMessages(library("argparse")) + +# create parser object +parser <- ArgumentParser() + +# specify our desired options +# by default ArgumentParser will add an help option +parser$add_argument("--ncpus", type = "integer", default = 1, + help="number of CPUs used for calculation. Default: %(default)s") +parser$add_argument("--nsamples", type="integer", default = 10, + help="Number of points to sample for estimation in millions. Default: %(default)s", + metavar="number") + +# parse arguments +args <- parser$parse_args() + + +# Functions --------------------------------------------------------------- + +# split a number into N parts +# https://www.geeksforgeeks.org/split-the-number-into-n-parts-such-that-difference-between-the-smallest-and-the-largest-part-is-minimum/ +split <- function(x, n){ + if(x %% n == 0) { + out <- rep(floor(x/n), n) + } else { + # upto n-(x % n) the values + # will be x / n + # after that the values + # will be x / n + 1 + zp = n - (x %% n) + pp = floor(x/n) + out <- 1:n + out <- ifelse(out > zp, pp + 1, pp) + } + out +} + +# count points inside a circle +inside_circle <- function(total_count){ + x <- runif(total_count) + y <- runif(total_count) + radii <- sqrt(x*x + y*y) + count <- length(radii[which(radii <= 1)]) + count +} + +# Estimate Pi --------------------- + +# grab user options +n_samples <- ceiling(args$nsamples*1e6) +ncpus <- args$ncpus + +results <- parallel::mclapply(split(n_samples, ncpus), inside_circle) +results <- unlist(results) + +counts <- sum(results) +my_pi <- 4*counts/n_samples + +# print to standard output +cat(my_pi, "\n") + diff --git a/course_files/csd3/scripts/turing_pattern.py b/course_files/csd3/scripts/turing_pattern.py new file mode 100644 index 0000000..86d6f1b --- /dev/null +++ b/course_files/csd3/scripts/turing_pattern.py @@ -0,0 +1,135 @@ +# Author: Benjamin F. Maier +# https://github.com/benmaier/reaction-diffusion +# Adapted by: Hugo Tavares + +# import necessary libraries +import numpy as np +import matplotlib.pyplot as plt +import argparse + + +# ============ capture user input ============= + +parser = argparse.ArgumentParser(description='Reaction diffusion models.') +parser.add_argument('-f', '--feed', type=float, default=0.04, + help='The "feed rate" parameter of the model. Default: 0.04') +parser.add_argument('-k', '--kill', type=float, default=0.06, + help='The "kill rate" parameter of the model. Default: 0.06') +parser.add_argument('-o', '--outdir', type=str, default=".", + help='Output directory. Default: .') + +args = parser.parse_args() + + +# ============ define relevant functions ============= + +# an efficient function to compute a mean over neighboring cells +def apply_laplacian(mat): + """This function applies a discretized Laplacian + in periodic boundary conditions to a matrix + For more information see + https://en.wikipedia.org/wiki/Discrete_Laplace_operator#Implementation_via_operator_discretization + """ + + # the cell appears 4 times in the formula to compute + # the total difference + neigh_mat = -4*mat.copy() + + # Each direct neighbor on the lattice is counted in + # the discrete difference formula + neighbors = [ + ( 1.0, (-1, 0) ), + ( 1.0, ( 0,-1) ), + ( 1.0, ( 0, 1) ), + ( 1.0, ( 1, 0) ), + ] + + # shift matrix according to demanded neighbors + # and add to this cell with corresponding weight + for weight, neigh in neighbors: + neigh_mat += weight * np.roll(mat, neigh, (0,1)) + + return neigh_mat + +# Define the update formula for chemicals A and B +def update(A, B, DA, DB, f, k, delta_t): + """Apply the Gray-Scott update formula""" + + # compute the diffusion part of the update + diff_A = DA * apply_laplacian(A) + diff_B = DB * apply_laplacian(B) + + # Apply chemical reaction + reaction = A*B**2 + diff_A -= reaction + diff_B += reaction + + # Apply birth/death + diff_A += f * (1-A) + diff_B -= (k+f) * B + + A += diff_A * delta_t + B += diff_B * delta_t + + return A, B + +def get_initial_A_and_B(N, random_influence = 0.2): + """get the initial chemical concentrations""" + + # get initial homogeneous concentrations + A = (1-random_influence) * np.ones((N,N)) + B = np.zeros((N,N)) + + # put some noise on there + A += random_influence * np.random.random((N,N)) + B += random_influence * np.random.random((N,N)) + + # get center and radius for initial disturbance + N2, r = N//2, 50 + + # apply initial disturbance + A[N2-r:N2+r, N2-r:N2+r] = 0.50 + B[N2-r:N2+r, N2-r:N2+r] = 0.25 + + return A, B + +def draw(A, B): + """return the matplotlib artists for animation""" + fig, ax = pl.subplots(1,2,figsize=(5.65,3)) + imA = ax[0].imshow(A, animated=True,vmin=0,cmap='Greys') + imB = ax[1].imshow(B, animated=True,vmax=1,cmap='Greys') + ax[0].axis('off') + ax[1].axis('off') + ax[0].set_title('A') + ax[1].set_title('B') + + return fig, imA, imB + + +# =========== define model parameters ========== + +# update in time +delta_t = 1.0 + +# Diffusion coefficients +DA = 0.16 +DB = 0.08 + +# define birth/death rates +f = args.feed +k = args.kill + +# grid size +N = 200 + +# intialize the chemical concentrations +A, B = get_initial_A_and_B(N) + +N_simulation_steps = 10000 +for step in range(N_simulation_steps): + A, B = update(A, B, DA, DB, f, k, delta_t) + +# make plot +plt.imshow(A) +plt.axis('off') +plt.savefig("{}/f{}_k{}.png".format(args.outdir, args.feed, args.kill)) \ No newline at end of file diff --git a/course_files/csd3/slurm/drosophila_genome_indexing.sh b/course_files/csd3/slurm/drosophila_genome_indexing.sh new file mode 100644 index 0000000..9cc8c11 --- /dev/null +++ b/course_files/csd3/slurm/drosophila_genome_indexing.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/drosophila_genome_indexing.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:10:00 # time for the job HH:MM:SS. Default: 1 min + +# these lines are needed to source the mamba activate command +# include them if you want to activate environments in your script +eval "$(conda shell.bash hook)" +source $CONDA_PREFIX/etc/profile.d/mamba.sh + +# activate conda environment +FIXME + +# make an output directory for the index +mkdir -p results/drosophila/genome + +# index the reference genome with bowtie2; the syntax is: +# bowtie2-build input.fa output_prefix +bowtie2-build data/genome/drosophila_genome.fa results/drosophila/genome/index diff --git a/course_files/csd3/slurm/estimate_pi.sh b/course_files/csd3/slurm/estimate_pi.sh new file mode 100644 index 0000000..1f95054 --- /dev/null +++ b/course_files/csd3/slurm/estimate_pi.sh @@ -0,0 +1,11 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/estimate_pi.log # standard output file +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:10:00 # time for the job HH:MM:SS. Default: 1 min + +Rscript scripts/pi_estimator.R diff --git a/course_files/csd3/slurm/parallel_arrays.sh b/course_files/csd3/slurm/parallel_arrays.sh new file mode 100644 index 0000000..f15428a --- /dev/null +++ b/course_files/csd3/slurm/parallel_arrays.sh @@ -0,0 +1,15 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/parallel_arrays_%a.log +#SBATCH -c 2 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:30:00 # time for the job HH:MM:SS. Default: 1 min +#SBATCH -a 1-3 + +echo "This is task number $SLURM_ARRAY_TASK_ID" +echo "Using $SLURM_CPUS_PER_TASK CPUs" +echo "Running on:" +hostname diff --git a/course_files/csd3/slurm/parallel_drosophila_mapping.sh b/course_files/csd3/slurm/parallel_drosophila_mapping.sh new file mode 100644 index 0000000..a9d4abf --- /dev/null +++ b/course_files/csd3/slurm/parallel_drosophila_mapping.sh @@ -0,0 +1,40 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/drosophila_mapping_%a.log +#SBATCH -c 2 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:30:00 # time for the job HH:MM:SS. Default: 1 min +#SBATCH -a 2-FIXME # we start at 2 because of the header + +# these lines are needed to source the mamba activate command +# include them if you want to activate environments in your script +eval "$(conda shell.bash hook)" +source $CONDA_PREFIX/etc/profile.d/mamba.sh + +# activate conda environment +mamba activate mapping + +# get the relevant line of the CSV sample information file +# see http://bigdatums.net/2016/02/22/3-ways-to-get-the-nth-line-of-a-file-in-linux/ +SAMPLE_INFO=$(cat data/drosophila_sample_info.csv | head -n FIXME | tail -n 1) + +# get the sample name and paths to read1 and read2 +SAMPLE=$(echo $SAMPLE_INFO | cut -d "," -f 1) +READ1=$(echo $SAMPLE_INFO | cut -d "," -f 2) +READ2=$(echo $SAMPLE_INFO | cut -d "," -f 3) + +# create output directory +mkdir -p "results/drosophila/mapping" + +# output some informative messages +echo "The input read files are: $READ1 and $READ2" +echo "Number of CPUs used: $SLURM_CPUS_PER_TASK" + +# Align the reads to the genome +bowtie2 --very-fast -p "$SLURM_CPUS_PER_TASK" \ + -x "results/drosophila/genome/index" \ + -1 "$READ1" \ + -2 "$READ2" > "results/drosophila/mapping/$SAMPLE.sam" diff --git a/course_files/csd3/slurm/parallel_estimate_pi.sh b/course_files/csd3/slurm/parallel_estimate_pi.sh new file mode 100644 index 0000000..be36e88 --- /dev/null +++ b/course_files/csd3/slurm/parallel_estimate_pi.sh @@ -0,0 +1,20 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/parallel_estimate_pi_%a.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:10:00 # time for the job HH:MM:SS. Default: 1 min +#SBATCH -a FIXME + +echo "Starting array: $SLURM_ARRAY_TASK_ID" + +# make output directory, in case it doesn't exist +mkdir -p results/pi + +# run pi_estimator script +Rscript scripts/pi_estimator.R > results/pi/replicate_${SLURM_ARRAY_TASK_ID}.txt + +echo "Finished array: $SLURM_ARRAY_TASK_ID" diff --git a/course_files/csd3/slurm/parallel_turing_pattern.sh b/course_files/csd3/slurm/parallel_turing_pattern.sh new file mode 100644 index 0000000..b3daf76 --- /dev/null +++ b/course_files/csd3/slurm/parallel_turing_pattern.sh @@ -0,0 +1,36 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/turing_pattern_%a.log +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:30:00 # time for the job HH:MM:SS. Default: 1 min +#SBATCH -a 2-FIXME # we start at 2 because of the header + +echo "Starting array: $SLURM_ARRAY_TASK_ID" + +# these lines are needed to source the mamba activate command +# include them if you want to activate environments in your script +eval "$(conda shell.bash hook)" +source $CONDA_PREFIX/etc/profile.d/mamba.sh + +# activate conda environment +mamba activate scipy + +# make output directory +mkdir -p results/turing + +# get the relevant line of the CSV parameter file +# see http://bigdatums.net/2016/02/22/3-ways-to-get-the-nth-line-of-a-file-in-linux/ +PARAMS=$(cat data/turing_model_parameters.csv | head -n FIXME | tail -n 1) + +# separate the values based on comma "," as delimiter +FEED=$(echo ${PARAMS} | cut -d "," -f 1) +KILL=$(echo ${PARAMS} | cut -d "," -f 2) + +# Launch script using our defined variables +python scripts/turing_pattern.py --feed ${FEED} --kill ${KILL} --outdir results/turing + +echo "Finished array: $SLURM_ARRAY_TASK_ID" diff --git a/course_files/csd3/slurm/seqkit_singularity.sh b/course_files/csd3/slurm/seqkit_singularity.sh new file mode 100644 index 0000000..89894ee --- /dev/null +++ b/course_files/csd3/slurm/seqkit_singularity.sh @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training +#SBATCH -p icelake # name of the partition to run job on +#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory +#SBATCH -o logs/seqkit.log # standard output file +#SBATCH -c 1 # number of CPUs. Default: 1 +#SBATCH --mem=1G # RAM memory. Default: 1G +#SBATCH -t 00:10:00 # time for the job HH:MM:SS. Default: 1 min + +# Your singularity command here +singularity FIXME \ No newline at end of file diff --git a/course_files/csd3/slurm/simple_job.sh b/course_files/csd3/slurm/simple_job.sh new file mode 100644 index 0000000..368545a --- /dev/null +++ b/course_files/csd3/slurm/simple_job.sh @@ -0,0 +1,8 @@ +#!/bin/bash +#SBATCH -A TRAINING-CPU +#SBATCH --reservation=training + +sleep 60 # hold for 60 seconds + +echo "This job is running on:" +hostname diff --git a/course_files/simulation_plot.png b/course_files/simulation_plot.png deleted file mode 100644 index 666630f..0000000 Binary files a/course_files/simulation_plot.png and /dev/null differ diff --git a/course_files/sir_simulation_results.csv b/course_files/sir_simulation_results.csv deleted file mode 100644 index df85027..0000000 --- a/course_files/sir_simulation_results.csv +++ /dev/null @@ -1,201 +0,0 @@ -day,infected,susceptible,recovered -0,1,999,0 -1,1,999,0 -2,2,998,0 -3,4,996,0 -4,6,994,0 -5,7,992,1 -6,8,990,2 -7,12,986,2 -8,13,983,4 -9,17,978,5 -10,20,972,8 -11,27,963,10 -12,34,955,11 -13,37,948,15 -14,47,933,20 -15,62,913,25 -16,74,893,33 -17,81,877,42 -18,87,857,56 -19,101,837,62 -20,128,802,70 -21,156,759,85 -22,178,723,99 -23,193,685,122 -24,199,654,147 -25,217,617,166 -26,238,571,191 -27,263,525,212 -28,277,485,238 -29,284,447,269 -30,294,412,294 -31,296,383,321 -32,309,340,351 -33,308,310,382 -34,310,278,412 -35,300,253,447 -36,291,236,473 -37,291,214,495 -38,289,189,522 -39,275,175,550 -40,260,160,580 -41,258,143,599 -42,250,129,621 -43,222,119,659 -44,210,111,679 -45,188,106,706 -46,172,102,726 -47,156,96,748 -48,137,94,769 -49,125,88,787 -50,110,88,802 -51,96,87,817 -52,87,86,827 -53,82,84,834 -54,74,84,842 -55,67,81,852 -56,62,80,858 -57,62,74,864 -58,54,71,875 -59,48,71,881 -60,43,70,887 -61,37,69,894 -62,34,69,897 -63,29,69,902 -64,22,69,909 -65,22,69,909 -66,21,69,910 -67,18,69,913 -68,17,69,914 -69,15,69,916 -70,14,69,917 -71,11,69,920 -72,9,69,922 -73,11,67,922 -74,9,67,924 -75,7,67,926 -76,6,67,927 -77,6,67,927 -78,5,67,928 -79,4,67,929 -80,4,67,929 -81,4,67,929 -82,4,67,929 -83,4,67,929 -84,4,67,929 -85,4,67,929 -86,3,67,930 -87,2,67,931 -88,2,67,931 -89,2,67,931 -90,2,67,931 -91,3,66,931 -92,3,66,931 -93,3,66,931 -94,3,66,931 -95,3,66,931 -96,3,66,931 -97,2,66,932 -98,2,66,932 -99,1,66,933 -100,1,66,933 -101,1,66,933 -102,0,66,934 -103,0,66,934 -104,0,66,934 -105,0,66,934 -106,0,66,934 -107,0,66,934 -108,0,66,934 -109,0,66,934 -110,0,66,934 -111,0,66,934 -112,0,66,934 -113,0,66,934 -114,0,66,934 -115,0,66,934 -116,0,66,934 -117,0,66,934 -118,0,66,934 -119,0,66,934 -120,0,66,934 -121,0,66,934 -122,0,66,934 -123,0,66,934 -124,0,66,934 -125,0,66,934 -126,0,66,934 -127,0,66,934 -128,0,66,934 -129,0,66,934 -130,0,66,934 -131,0,66,934 -132,0,66,934 -133,0,66,934 -134,0,66,934 -135,0,66,934 -136,0,66,934 -137,0,66,934 -138,0,66,934 -139,0,66,934 -140,0,66,934 -141,0,66,934 -142,0,66,934 -143,0,66,934 -144,0,66,934 -145,0,66,934 -146,0,66,934 -147,0,66,934 -148,0,66,934 -149,0,66,934 -150,0,66,934 -151,0,66,934 -152,0,66,934 -153,0,66,934 -154,0,66,934 -155,0,66,934 -156,0,66,934 -157,0,66,934 -158,0,66,934 -159,0,66,934 -160,0,66,934 -161,0,66,934 -162,0,66,934 -163,0,66,934 -164,0,66,934 -165,0,66,934 -166,0,66,934 -167,0,66,934 -168,0,66,934 -169,0,66,934 -170,0,66,934 -171,0,66,934 -172,0,66,934 -173,0,66,934 -174,0,66,934 -175,0,66,934 -176,0,66,934 -177,0,66,934 -178,0,66,934 -179,0,66,934 -180,0,66,934 -181,0,66,934 -182,0,66,934 -183,0,66,934 -184,0,66,934 -185,0,66,934 -186,0,66,934 -187,0,66,934 -188,0,66,934 -189,0,66,934 -190,0,66,934 -191,0,66,934 -192,0,66,934 -193,0,66,934 -194,0,66,934 -195,0,66,934 -196,0,66,934 -197,0,66,934 -198,0,66,934 -199,0,66,934