-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcreate_long_reports.sh
executable file
·139 lines (126 loc) · 7.13 KB
/
create_long_reports.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env bash
## Parse CLI args.
TEMP=`getopt -q -o hf --long help,force -n 'create_long_reports.sh' -- "$@"`
eval set -- "$TEMP"
Yellow=$(tput sgr0)'\033[1;33m' ## Yellow normal face
Green=$(tput sgr0)'\033[1;32m' ## Green normal face
Red=$(tput sgr0)'\033[1;31m' ## Red normal face
Normal=$(tput sgr0) ## Original normal face
failed_jobs=''
successful_jobs=''
## Helptext function
function Helptext {
echo -ne "\t Usage: create_long_reports.sh [-f] \n\n"
echo -ne "This script will copmare all completed eager runs with all completed reports and create long reports for any runs that\n\tare newer than the associated long report or do not have an associated long report.\n\n"
echo -ne "options:\n"
echo -ne "-h, --help\t\tPrint this text and exit.\n"
echo -ne "-f, --force\t\tForce recreation of long reports for all finished eager runs.\n"
}
force_remake="FALSE"
while true ; do
case "$1" in
-f|--force) force_remake="TRUE"; shift 1;;
-h|--help) Helptext; exit 0 ;;
--) break;;
*) echo -e "Invalid option provided.\n"; Helptext; exit 1;; ## Should never trigger since $TEMP has had invalid options removed. Good to have for dev
esac
done
report_knitter="/home/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/project_reports/knit_long_report.R"
report_template="/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/project_reports/long_report.Rmd"
cred_file="~/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/.credentials"
long_report_dir="/mnt/archgen/MICROSCOPE/reports"
base_poseidon_package_dir="/mnt/archgen/MICROSCOPE/poseidon_packages"
base_analysis_dir="/mnt/archgen/MICROSCOPE/automated_analysis"
bg_annotation_fn="/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/project_reports/assets/bg_annotation.txt"
we_evec_fn="/mnt/archgen/MICROSCOPE/automated_analysis/microscope_pca/West_Eurasian_pca.evec"
we_eval_fn="/mnt/archgen/MICROSCOPE/automated_analysis/microscope_pca/West_Eurasian_pca.eval"
eu_evec_fn="/mnt/archgen/MICROSCOPE/automated_analysis/microscope_pca/Europe_only_pca.evec"
eu_eval_fn="/mnt/archgen/MICROSCOPE/automated_analysis/microscope_pca/Europe_only_pca.eval"
janno_fn="/mnt/archgen/MICROSCOPE/forged_packages/microscope_pca/microscope_pca.janno"
logo_file="/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/project_reports/assets/MICROSCOPE_logo_blue.pdf"
mpi_logo_file="/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/project_reports/assets/EVA_logo.png"
## Get list of completed runs (MQC report exists)
eager_out_dir="/mnt/archgen/MICROSCOPE/eager_outputs"
finished_runs=($(find ${eager_out_dir}/*/multiqc -name 'multiqc_report.html'))
## Create array of expected report names with the same index/order as the finished runs.
for run in ${finished_runs[@]}; do
batch_Id=$(echo ${run} | rev | cut -f 3 -d '/' | rev)
batch_name=$(echo ${batch_Id} | rev | cut -f 1 -d '-' |rev)
expected_outputs+=("${long_report_dir}/${batch_Id}/${batch_name}_long_report.pdf")
done
## DEBUG
# echo ${#finished_runs[@]}
# echo ${#expected_outputs[@]}
## If multiqc is newer than the long report AND the PCA evec file, or the report does not exist, generate the report.
for idx in ${!finished_runs[@]}; do
batch_Id=$(echo ${finished_runs[${idx}]} | rev | cut -f 3 -d '/' | rev)
batch_name=$(echo ${batch_Id} | rev | cut -f1 -d "-" | rev)
if [[ ${force_remake} == "TRUE" || (${finished_runs[${idx}]} -nt ${expected_outputs[${idx}]} && ${we_evec_fn} -nt ${finished_runs[${idx}]} && ${eu_evec_fn} -nt ${finished_runs[${idx}]}) ]]; then
## Infer filepaths for snp_coverage, sex det results and general stats table
## When multiple snp coverage files exist (ssDNA + dsDNA) they get sorted alphabetically.
## Take the last by index to prefer ssDNA when multiple files exist (Same as poseidon package creation).
snp_fns=($(find ${eager_out_dir}/${batch_Id}/genotyping/ -name '*_eigenstrat_coverage.txt' ))
snp_coverage_file="${snp_fns[-1]}"
## Sexdet output has stable path
sex_det_file="${eager_out_dir}/${batch_Id}/sex_determination/SexDet.txt"
## General stats table has stable path
stats_table="$(dirname ${finished_runs[${idx}]})/multiqc_data/multiqc_general_stats.txt"
## pMMR results have stable paths based on batch Id
pmmr_fn=${base_analysis_dir}/${batch_Id}/pmmr/${batch_Id}.pmmr.txt
## Read results have stable paths
read_txt=${base_analysis_dir}/${batch_Id}/read/${batch_Id}.read.txt
read_pdf=${base_analysis_dir}/${batch_Id}/read/${batch_Id}.read.plot.pdf
## Required Poseidon package paths have stable path derived from the batch Id
geno_fn=${base_poseidon_package_dir}/${batch_Id}/${batch_Id}.geno
snp_fn=${base_poseidon_package_dir}/${batch_Id}/${batch_Id}.snp
ind_fn=${base_poseidon_package_dir}/${batch_Id}/${batch_Id}.ind
## Get report creation date
report_date=$(date +%d-%m-%Y)
echo "Creating long report for ${batch_Id} -> ${expected_outputs[${idx}]}"
${report_knitter} \
--report_template ${report_template} \
--snp_coverage_file ${snp_coverage_file} \
--sex_det_file ${sex_det_file} \
--stats_table ${stats_table} \
--batch_name ${batch_name} \
--cred_file ${cred_file} \
--pmmr_results ${pmmr_fn} \
--read_txt ${read_txt} \
--read_pdf ${read_pdf} \
--janno_fn ${janno_fn} \
--GenoFile ${geno_fn} \
--SnpFile ${snp_fn} \
--IndFile ${ind_fn} \
--bg_annotation_file ${bg_annotation_fn} \
--we_evec_fn ${we_evec_fn} \
--we_eval_fn ${we_eval_fn} \
--eu_evec_fn ${eu_evec_fn} \
--eu_eval_fn ${eu_eval_fn} \
--output_pdf_name ${expected_outputs[${idx}]} \
--report_date ${report_date} \
--logo_file ${logo_file} \
--mpi_logo_file ${mpi_logo_file}
## If a report generation fails, keep the name of the batch and throw a list of unmade reports.
if [[ $? != "0" ]]; then
failed_jobs+="${batch_name} "
else
successful_jobs+="${batch_name} "
fi
# exit 0 ## For Testing
elif [[ ${we_evec_fn} -ot ${finished_runs[${idx}]} || ${eu_evec_fn} -ot ${finished_runs[${idx}]} ]]; then
## Error message when the PCA results are outdated.
echo -e "${Yellow}PCA has not been updated since package '${batch_Id}' was updated.$(tput sgr0)"
echo " Consider updating the PCA evec files, or use '-f' to force report (re)creation."
else
echo "Long report for ${batch_Id} did not need updating. Skipping this batch."
fi
done
echo "Report generation successful for:"
for run in ${successful_jobs}; do
echo -e " - ${Green}${run}${Normal}"
done
echo ''
echo "Report generation failed for:"
for run in ${failed_jobs}; do
echo -e " - ${Red}${run}${Normal}"
done