diff --git a/.github/workflows/automated_tests.yml b/.github/workflows/automated_tests.yml index db43ddc..c253dfb 100644 --- a/.github/workflows/automated_tests.yml +++ b/.github/workflows/automated_tests.yml @@ -19,4 +19,5 @@ jobs: wget -qO- https://get.nextflow.io | bash && cp nextflow /usr/local/bin/nextflow - name: Run tests run: | + export NXF_VER=22.04.5 make \ No newline at end of file diff --git a/Makefile b/Makefile index f9bac0b..b86785c 100644 --- a/Makefile +++ b/Makefile @@ -21,3 +21,4 @@ test: # STAR indices take over 1 GB and we did not manage to make it work in GitHub actions #bash tests/run_test_11.sh #bash tests/run_test_12.sh + #bash tests/run_test_13.sh diff --git a/README.md b/README.md index fb76c59..e4dc238 100755 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ somatic variant calling. Find the documentation here [![Documentation Status](https://readthedocs.org/projects/tronflow-docs/badge/?version=latest)](https://tronflow-docs.readthedocs.io/en/latest/?badge=latest) This pipeline aligns paired and single end FASTQ files with BWA aln and mem algorithms and with BWA mem 2. -For RNA-seq star is also supported. +For RNA-seq STAR is also supported. To increase sensitivity of novel junctions use `--star_two_pass_mode` (recommended for RNAseq variant calling). It also includes an initial step of read trimming using FASTP. @@ -55,6 +55,8 @@ Optional input: * memory: determines the memory required by each job (default: 32g) * inception: if enabled it uses an inception, only valid for BWA aln, it requires a fast file system such as flash (default: false) * skip_trimming: skips the read trimming step + * star_two_pass_mode: activates STAR two-pass mode, increasing sensitivity of novel junction discovery, recommended for RNA variant calling (default: false) + * additional_args: additional alignment arguments, only effective in BWA mem, BWA mem 2 and STAR (default: none) Output: * A BAM file \${name}.bam and its index diff --git a/modules/02_bwa_mem.nf b/modules/02_bwa_mem.nf index 0364ce2..626a170 100644 --- a/modules/02_bwa_mem.nf +++ b/modules/02_bwa_mem.nf @@ -16,7 +16,7 @@ process BWA_MEM { file("software_versions.${task.process}.txt") """ - bwa mem -t ${task.cpus} ${params.reference} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam + bwa mem ${params.additional_args} -t ${task.cpus} ${params.reference} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam echo ${params.manifest} >> software_versions.${task.process}.txt echo "bwa=0.7.17" >> software_versions.${task.process}.txt @@ -42,7 +42,7 @@ process BWA_MEM_SE { file("software_versions.${task.process}.txt") """ - bwa mem -t ${task.cpus} ${params.reference} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam + bwa mem ${params.additional_args} -t ${task.cpus} ${params.reference} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam echo ${params.manifest} >> software_versions.${task.process}.txt echo "bwa=0.7.17" >> software_versions.${task.process}.txt diff --git a/modules/02_bwa_mem_2.nf b/modules/02_bwa_mem_2.nf index 5cfcdba..58ad725 100755 --- a/modules/02_bwa_mem_2.nf +++ b/modules/02_bwa_mem_2.nf @@ -16,7 +16,7 @@ process BWA_MEM_2 { file("software_versions.${task.process}.txt") """ - bwa-mem2 mem -t ${task.cpus} ${params.reference} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam + bwa-mem2 mem ${params.additional_args} -t ${task.cpus} ${params.reference} ${fastq1} ${fastq2} | samtools view -uS - | samtools sort - > ${name}.bam echo ${params.manifest} >> software_versions.${task.process}.txt bwa-mem2 version >> software_versions.${task.process}.txt @@ -42,7 +42,7 @@ process BWA_MEM_2_SE { file("software_versions.${task.process}.txt") """ - bwa-mem2 mem -t ${task.cpus} ${params.reference} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam + bwa-mem2 mem ${params.additional_args} -t ${task.cpus} ${params.reference} ${fastq} | samtools view -uS - | samtools sort - > ${name}.bam echo ${params.manifest} >> software_versions.${task.process}.txt bwa-mem2 version >> software_versions.${task.process}.txt diff --git a/modules/02_star.nf b/modules/02_star.nf index 91ad979..66c1b95 100644 --- a/modules/02_star.nf +++ b/modules/02_star.nf @@ -15,8 +15,10 @@ process STAR { tuple val("${name}"), file("${name}.bam"), emit: bams file("software_versions.${task.process}.txt") + script: + two_pass_mode_param = params.star_two_pass_mode ? "--twopassMode Basic" : "" """ - STAR --genomeDir ${params.reference} \ + STAR --genomeDir ${params.reference} ${two_pass_mode_param} ${params.additional_args} \ --readFilesCommand "gzip -d -c -f" \ --readFilesIn ${fastq1} ${fastq2} \ --outSAMmode Full \ @@ -51,8 +53,10 @@ process STAR_SE { tuple val("${name}"), file("${name}.bam"), emit: bams file("software_versions.${task.process}.txt") + script: + two_pass_mode_param = params.star_two_pass_mode ? "--twopassMode Basic" : "" """ - STAR --genomeDir ${params.reference} \ + STAR --genomeDir ${params.reference} ${two_pass_mode_param} ${params.additional_args} \ --readFilesCommand "gzip -d -c -f" \ --readFilesIn ${fastq} \ --outSAMmode Full \ diff --git a/nextflow.config b/nextflow.config index 7fa4f65..0223a82 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,8 @@ params.cpus = 8 params.memory = "32g" params.inception = false params.skip_trimming = false +params.star_two_pass_mode = false +params.additional_args = "" profiles { conda { @@ -44,7 +46,7 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] cleanup = true -VERSION = "1.7.0" +VERSION = "1.8.0" manifest { name = 'TRON-Bioinformatics/tronflow-alignment' @@ -58,7 +60,7 @@ manifest { params.manifest = manifest params.help_message = """ -TronFlow BWA v${VERSION} +TronFlow alignment v${VERSION} Usage: nextflow main.nf --input_files input_files [--reference reference.fasta] @@ -81,6 +83,8 @@ Optional input: * memory: determines the memory required by each job (default: 8g) * inception: if enabled it uses an inception, only valid for BWA aln, it requires a fast file system such as flash (default: false) * skip_trimming: skips the read trimming step + * star_two_pass_mode: activates STAR two-pass mode, increasing sensitivity of novel junction discovery, recommended for RNA variant calling (default: false) + * additional_args: additional alignment arguments, only effective in BWA mem, BWA mem 2 and STAR (default: none) Output: * A BAM file \${name}.bam and its index diff --git a/tests/run_test_10.sh b/tests/run_test_10.sh index 2f2f9cb..fcda739 100644 --- a/tests/run_test_10.sh +++ b/tests/run_test_10.sh @@ -1,6 +1,6 @@ #!/bin/bash -output_folder=output/test3 +output_folder=output/test10 nextflow main.nf -profile test,conda --library single --algorithm mem2 --output $output_folder test -s $output_folder/TESTX_S1_L001.bam || { echo "Missing test 3 output file!"; exit 1; } test -s $output_folder/TESTX_S1_L001.bam.bai || { echo "Missing test 3 output file!"; exit 1; } diff --git a/tests/run_test_11.sh b/tests/run_test_11.sh index 2535a12..e176cb8 100644 --- a/tests/run_test_11.sh +++ b/tests/run_test_11.sh @@ -1,6 +1,6 @@ #!/bin/bash -output_folder=output/test4 +output_folder=output/test11 nextflow main.nf -profile test,conda --algorithm star --reference `pwd`/test_data --output $output_folder test -s $output_folder/TESTX_S1_L001.bam || { echo "Missing test 4 output file!"; exit 1; } test -s $output_folder/TESTX_S1_L001.bam.bai || { echo "Missing test 4 output file!"; exit 1; } diff --git a/tests/run_test_12.sh b/tests/run_test_12.sh index c152186..cca6ace 100644 --- a/tests/run_test_12.sh +++ b/tests/run_test_12.sh @@ -1,6 +1,6 @@ #!/bin/bash -output_folder=output/test4 +output_folder=output/test12 nextflow main.nf -profile test,conda --algorithm star --library single --reference `pwd`/test_data --output $output_folder test -s $output_folder/TESTX_S1_L001.bam || { echo "Missing test 4 output file!"; exit 1; } test -s $output_folder/TESTX_S1_L001.bam.bai || { echo "Missing test 4 output file!"; exit 1; } diff --git a/tests/run_test_13.sh b/tests/run_test_13.sh new file mode 100644 index 0000000..a59d46d --- /dev/null +++ b/tests/run_test_13.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +output_folder=output/test13 +nextflow main.nf -profile test,conda --algorithm star --star_two_pass_mode --reference `pwd`/test_data --output $output_folder +test -s $output_folder/TESTX_S1_L001.bam || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L001.bam.bai || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L001.fastp_stats.html || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L001.fastp_stats.json || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L002.bam || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L002.bam.bai || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L002.fastp_stats.html || { echo "Missing test 4 output file!"; exit 1; } +test -s $output_folder/TESTX_S1_L002.fastp_stats.json || { echo "Missing test 4 output file!"; exit 1; } \ No newline at end of file