Skip to content

Commit

Permalink
add CSD3 scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
tavareshugo committed Jun 28, 2024
1 parent cc4b5d6 commit ed39712
Show file tree
Hide file tree
Showing 45 changed files with 517 additions and 204 deletions.
6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ slides
notes

# specific files from course_materials that we do not version control
course_files/data
course_files/logs
course_files/results
course_files/**/data
course_files/**/logs
course_files/**/results
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
17 changes: 17 additions & 0 deletions course_files/csd3/dependency/notok/submit_notok.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/bin/bash

# This is not submitted to SLURM!
# These are the sbatch commands we are using to submit our jobs

# first submission
run1_id=$(sbatch --parsable task_with_checkpoints.sh)

# second submission in case the first one fails
run2_id=$(sbatch --parsable --dependency afternotok:${run1_id} task_with_checkpoints.sh)

# submit a third time in case the second fails
run3_id=$(sbatch --parsable --dependency afternotok:${run2_id} task_with_checkpoints.sh)

# etc... we could continue submitting more
# but it's probably good to stop after a while
# and check if our job finally completed or not
45 changes: 45 additions & 0 deletions course_files/csd3/dependency/notok/task_with_checkpoints.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -o logs/task_with_checkpoints_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min

# output file name
checkpoint="checkpoint.txt"
finalresult="long_task_result.txt"

# the code below this is a bit silly and you don't need to worry about its details
# we are simply incrementing a number by 1 every 15 seconds
# when that number reaches 10, we consider the job finished
# at each stage the current number is saved in the checkpoint file
# so if the job fails we resume it from that point

#### incrementer-with-checkpoint ####

# Check if checkpoint file exists
if [ -f "$checkpoint" ]; then
# if does, read the number from the file
number=$(<"$checkpoint")
else
# if it doesn't, initiate it
number=0
fi

# loop through every 15 seconds
for i in $(seq $(( $number + 1)) 10)
do
# increment after 15 seconds
sleep 15
((number++))

# write to checkpoint
echo "$number" > "$checkpoint"
done

# result
echo "Congratulations, you have counted to 10." > "$finalresult"

# message to log file
echo "Job complete, removing checkpoint.txt file."
rm $checkpoint
10 changes: 10 additions & 0 deletions course_files/csd3/dependency/ok/submit_ok.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash

# This is not submitted to SLURM!
# These are the sbatch commands we are using to submit our jobs

# first task of our pipeline
run1_id=$(sbatch --parsable task1.sh)

# second task of our pipeline - only runs if the previous was successful
sbatch --dependency afterok:${run1_id} task2.sh
12 changes: 12 additions & 0 deletions course_files/csd3/dependency/ok/task1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -o logs/task1_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:02:00 # time for the job HH:MM:SS. Default: 1 min

# sleep for 60 seconds (to have time to see the job in the queue)
sleep 60

# create an example file
touchh output_task1.txt
13 changes: 13 additions & 0 deletions course_files/csd3/dependency/ok/task2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -o logs/task2_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min

# sleep for 10 seconds (to have time to see the job in the queue)
sleep 10

# rename file from previous task
# which requires task1 to have completed successfully
mv output_task1.txt output_task2.txt
11 changes: 11 additions & 0 deletions course_files/csd3/dependency/singleton/submit_singleton.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# This is not submitted to SLURM!
# These are the sbatch commands we are using to submit our jobs

# first two tasks of our pipeline - none have dependencies
sbatch -J my_pipeline task1_singleton.sh
sbatch -J my_pipeline task2_singleton.sh

# the third task requires all previous ones with the same "job name" to have finished
sbatch -J my_pipeline --dependency singleton task3_singleton.sh
13 changes: 13 additions & 0 deletions course_files/csd3/dependency/singleton/task1_singleton.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -J my_pipeline # name for the job
#SBATCH -o logs/task1_singleton_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min

# sleep for 10 seconds (to have time to see the job in the queue)
sleep 10

# create a file
echo "Output from task1" > result_task1.txt
13 changes: 13 additions & 0 deletions course_files/csd3/dependency/singleton/task2_singleton.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -J my_pipeline # name for the job
#SBATCH -o logs/task2_singleton_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min

# sleep for 10 seconds (to have time to see the job in the queue)
sleep 10

# create a file
echo "Output from task2" > result_task2.txt
14 changes: 14 additions & 0 deletions course_files/csd3/dependency/singleton/task3_singleton.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -J my_pipeline # name for the job
#SBATCH -o logs/task3_singleton_%j.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem 1G # RAM memory. Default: 1G
#SBATCH -t 00:01:00 # time for the job HH:MM:SS. Default: 1 min
#SBATCH --dependency singleton

# sleep for 10 seconds (to have time to see the job in the queue)
sleep 10

# concatenate previous two files into one
cat result_task1.txt result_task2.txt > result_task3.txt
65 changes: 65 additions & 0 deletions course_files/csd3/scripts/pi_estimator.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@


# User Arguments ----------------------------------------------------------

suppressPackageStartupMessages(library("argparse"))

# create parser object
parser <- ArgumentParser()

# specify our desired options
# by default ArgumentParser will add an help option
parser$add_argument("--ncpus", type = "integer", default = 1,
help="number of CPUs used for calculation. Default: %(default)s")
parser$add_argument("--nsamples", type="integer", default = 10,
help="Number of points to sample for estimation in millions. Default: %(default)s",
metavar="number")

# parse arguments
args <- parser$parse_args()


# Functions ---------------------------------------------------------------

# split a number into N parts
# https://www.geeksforgeeks.org/split-the-number-into-n-parts-such-that-difference-between-the-smallest-and-the-largest-part-is-minimum/
split <- function(x, n){
if(x %% n == 0) {
out <- rep(floor(x/n), n)
} else {
# upto n-(x % n) the values
# will be x / n
# after that the values
# will be x / n + 1
zp = n - (x %% n)
pp = floor(x/n)
out <- 1:n
out <- ifelse(out > zp, pp + 1, pp)
}
out
}

# count points inside a circle
inside_circle <- function(total_count){
x <- runif(total_count)
y <- runif(total_count)
radii <- sqrt(x*x + y*y)
count <- length(radii[which(radii <= 1)])
count
}

# Estimate Pi ---------------------

# grab user options
n_samples <- ceiling(args$nsamples*1e6)
ncpus <- args$ncpus

results <- parallel::mclapply(split(n_samples, ncpus), inside_circle)
results <- unlist(results)

counts <- sum(results)
my_pi <- 4*counts/n_samples

# print to standard output
cat(my_pi, "\n")

135 changes: 135 additions & 0 deletions course_files/csd3/scripts/turing_pattern.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# Author: Benjamin F. Maier
# https://github.com/benmaier/reaction-diffusion
# Adapted by: Hugo Tavares

# import necessary libraries
import numpy as np
import matplotlib.pyplot as plt
import argparse


# ============ capture user input =============

parser = argparse.ArgumentParser(description='Reaction diffusion models.')
parser.add_argument('-f', '--feed', type=float, default=0.04,
help='The "feed rate" parameter of the model. Default: 0.04')
parser.add_argument('-k', '--kill', type=float, default=0.06,
help='The "kill rate" parameter of the model. Default: 0.06')
parser.add_argument('-o', '--outdir', type=str, default=".",
help='Output directory. Default: .')

args = parser.parse_args()


# ============ define relevant functions =============

# an efficient function to compute a mean over neighboring cells
def apply_laplacian(mat):
"""This function applies a discretized Laplacian
in periodic boundary conditions to a matrix
For more information see
https://en.wikipedia.org/wiki/Discrete_Laplace_operator#Implementation_via_operator_discretization
"""

# the cell appears 4 times in the formula to compute
# the total difference
neigh_mat = -4*mat.copy()

# Each direct neighbor on the lattice is counted in
# the discrete difference formula
neighbors = [
( 1.0, (-1, 0) ),
( 1.0, ( 0,-1) ),
( 1.0, ( 0, 1) ),
( 1.0, ( 1, 0) ),
]

# shift matrix according to demanded neighbors
# and add to this cell with corresponding weight
for weight, neigh in neighbors:
neigh_mat += weight * np.roll(mat, neigh, (0,1))

return neigh_mat

# Define the update formula for chemicals A and B
def update(A, B, DA, DB, f, k, delta_t):
"""Apply the Gray-Scott update formula"""

# compute the diffusion part of the update
diff_A = DA * apply_laplacian(A)
diff_B = DB * apply_laplacian(B)

# Apply chemical reaction
reaction = A*B**2
diff_A -= reaction
diff_B += reaction

# Apply birth/death
diff_A += f * (1-A)
diff_B -= (k+f) * B

A += diff_A * delta_t
B += diff_B * delta_t

return A, B

def get_initial_A_and_B(N, random_influence = 0.2):
"""get the initial chemical concentrations"""

# get initial homogeneous concentrations
A = (1-random_influence) * np.ones((N,N))
B = np.zeros((N,N))

# put some noise on there
A += random_influence * np.random.random((N,N))
B += random_influence * np.random.random((N,N))

# get center and radius for initial disturbance
N2, r = N//2, 50

# apply initial disturbance
A[N2-r:N2+r, N2-r:N2+r] = 0.50
B[N2-r:N2+r, N2-r:N2+r] = 0.25

return A, B

def draw(A, B):
"""return the matplotlib artists for animation"""
fig, ax = pl.subplots(1,2,figsize=(5.65,3))
imA = ax[0].imshow(A, animated=True,vmin=0,cmap='Greys')
imB = ax[1].imshow(B, animated=True,vmax=1,cmap='Greys')
ax[0].axis('off')
ax[1].axis('off')
ax[0].set_title('A')
ax[1].set_title('B')

return fig, imA, imB


# =========== define model parameters ==========

# update in time
delta_t = 1.0

# Diffusion coefficients
DA = 0.16
DB = 0.08

# define birth/death rates
f = args.feed
k = args.kill

# grid size
N = 200

# intialize the chemical concentrations
A, B = get_initial_A_and_B(N)

N_simulation_steps = 10000
for step in range(N_simulation_steps):
A, B = update(A, B, DA, DB, f, k, delta_t)

# make plot
plt.imshow(A)
plt.axis('off')
plt.savefig("{}/f{}_k{}.png".format(args.outdir, args.feed, args.kill))
24 changes: 24 additions & 0 deletions course_files/csd3/slurm/drosophila_genome_indexing.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash
#SBATCH -A TRAINING-CPU
#SBATCH --reservation=training
#SBATCH -p icelake # name of the partition to run job on
#SBATCH -D /home/FIX-YOUR-USERNAME/rds/hpc-work/hpc_workshop/ # working directory
#SBATCH -o logs/drosophila_genome_indexing.log
#SBATCH -c 1 # number of CPUs. Default: 1
#SBATCH --mem=1G # RAM memory. Default: 1G
#SBATCH -t 00:10:00 # time for the job HH:MM:SS. Default: 1 min

# these lines are needed to source the mamba activate command
# include them if you want to activate environments in your script
eval "$(conda shell.bash hook)"
source $CONDA_PREFIX/etc/profile.d/mamba.sh

# activate conda environment
FIXME

# make an output directory for the index
mkdir -p results/drosophila/genome

# index the reference genome with bowtie2; the syntax is:
# bowtie2-build input.fa output_prefix
bowtie2-build data/genome/drosophila_genome.fa results/drosophila/genome/index
Loading

0 comments on commit ed39712

Please sign in to comment.