From 66531d1787f4a4551924635436fa5fdd305cec89 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Thu, 13 Feb 2025 17:55:40 +0100 Subject: [PATCH 1/7] Restructure modules.config. Outputs in new section/tool/type structure --- conf/modules.config | 1061 ++++++++++++++++++++++++++----------------- 1 file changed, 648 insertions(+), 413 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index fcb14b4f..b5927862 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -18,6 +18,15 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] + withName: 'MULTIQC' { + ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // // CONVERT INPUT BAM // @@ -45,9 +54,12 @@ process { ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ - path: { "${params.outdir}/preprocessing/fastqc_raw/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/preprocessing/fastqc_raw/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -56,18 +68,12 @@ process { ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ - path: { "${params.outdir}/preprocessing/fastqc_processed/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } - publishDir = [ - path: { "${params.outdir}/multiqc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + [ + // stats + path: { "${params.outdir}/preprocessing/fastqc_processed/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -76,9 +82,12 @@ process { ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ - path: { "${params.outdir}/preprocessing/falco_raw/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/preprocessing/falco_raw/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -87,9 +96,12 @@ process { ext.args = '--quiet' ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ - path: { "${params.outdir}/preprocessing/falco_processed/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/preprocessing/falco_processed/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -107,13 +119,15 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - path: { "${params.outdir}/preprocessing/fastp" }, + // data + path: { "${params.outdir}/preprocessing/fastp/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', enabled: params.preprocessing_savepreprocessedreads ], [ - path: { "${params.outdir}/preprocessing/fastp" }, + //stats + path: { "${params.outdir}/preprocessing/fastp/stats/" }, mode: params.publish_dir_mode, pattern: '*.{log,html,json}' ] @@ -137,13 +151,15 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - path: { "${params.outdir}/preprocessing/fastp" }, + // data + path: { "${params.outdir}/preprocessing/fastp/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', enabled: params.preprocessing_savepreprocessedreads ], [ - path: { "${params.outdir}/preprocessing/fastp" }, + // stats + path: { "${params.outdir}/preprocessing/fastp/stats/" }, mode: params.publish_dir_mode, pattern: '*.{log,html,json}' ] @@ -169,13 +185,15 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - path: { "${params.outdir}/preprocessing/adapterremoval" }, + // data + path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', enabled: params.preprocessing_savepreprocessedreads ], [ - path: { "${params.outdir}/preprocessing/adapterremoval" }, + // stats + path: { "${params.outdir}/preprocessing/adapterremoval/stats/" }, mode: params.publish_dir_mode, pattern: '*.settings' ] @@ -203,13 +221,15 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ [ - path: { "${params.outdir}/preprocessing/adapterremoval" }, + // data + path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, mode: params.publish_dir_mode, pattern: '*.fastq.gz', enabled: params.preprocessing_savepreprocessedreads ], [ - path: { "${params.outdir}/preprocessing/adapterremoval" }, + // stats + path: { "${params.outdir}/preprocessing/adapterremoval/stats/" }, mode: params.publish_dir_mode, pattern: '*.settings' ] @@ -220,10 +240,13 @@ process { tag = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_L${meta.lane}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/preprocessing/adapterremoval" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads + [ + // data + path: { "${params.outdir}/preprocessing/adapterremoval/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.preprocessing_savepreprocessedreads + ] ] } @@ -233,14 +256,17 @@ process { // withName: GUNZIP_FASTA { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: '*.f*', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/fasta/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{fasta,fna,fas,fa}', + enabled: params.save_reference + ] ] } - withName: 'GUNZIP_PMDFASTA|GUNZIP_PMDSNP|GUNZIP_SNPBED' { + withName: 'GUNZIP_PMDFASTA|GUNZIP_PMDBED|GUNZIP_SNPBED' { publishDir = [ enabled: false ] @@ -248,66 +274,87 @@ process { withName: SAMTOOLS_FAIDX { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: '*.fai', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/fasta/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fai', + enabled: params.save_reference + ] ] } withName: PICARD_CREATESEQUENCEDICTIONARY { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: '*.dict', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/fasta/data/" }, + mode: params.publish_dir_mode, + pattern: '*.dict', + enabled: params.save_reference + ] ] } withName: BOWTIE2_BUILD { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: 'bowtie2', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/bowtie2/data/" }, + mode: params.publish_dir_mode, + pattern: 'bowtie2', + enabled: params.save_reference + ] ] } withName: BWA_INDEX { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}/" }, - mode: params.publish_dir_mode, - pattern: 'bwa', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/bwa/data/" }, + mode: params.publish_dir_mode, + pattern: 'bwa', + enabled: params.save_reference + ] ] } // Reference elongation and indexing for circular mapping withName: GUNZIP_ELONGATED_FASTA { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, - mode: params.publish_dir_mode, - pattern: '*_*[0-9].f*', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/fasta/data/" }, + mode: params.publish_dir_mode, + pattern: '*_*[0-9].{fasta,fna,fas,fa}', + enabled: params.save_reference + ] ] } withName: CIRCULARMAPPER_CIRCULARGENERATOR { tag = { "${meta.id}_${params.fasta_circularmapper_elongationfactor}" } publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, - mode: params.publish_dir_mode, - pattern: '*_*[0-9].fasta', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/fasta/data/" }, + mode: params.publish_dir_mode, + pattern: '*_*[0-9].fasta', + enabled: params.save_reference + ] ] } withName: BWA_INDEX_CIRCULARISED { publishDir = [ - path: { "${params.outdir}/reference/${meta.id}_${params.fasta_circularmapper_elongationfactor}/" }, - mode: params.publish_dir_mode, - pattern: 'bwa', - enabled: params.save_reference + [ + // data + path: { "${params.outdir}/references/bwa/data/" }, + mode: params.publish_dir_mode, + pattern: 'bwa', + enabled: params.save_reference + ] ] } @@ -315,17 +362,22 @@ process { // BAM INPUT // withName: 'SAMTOOLS_FLAGSTATS_BAM_INPUT' { + // NOTE This step becomes obsolete once a lane-merging step is added for input BAMs. // TODO Once a lane-merging step is added for input BAMs, the lane should be dropped from this tag. tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.lane}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/bam_input_stats/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/mapping/bam_input/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } withName: SAMTOOLS_INDEX_BAM_INPUT { + // NOTE This step becomes obsolete once a lane-merging step is added for input BAMs. tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } publishDir = [ enabled: false @@ -335,40 +387,18 @@ process { // // BAM FILTERING // - withName: 'CAT_FASTQ_UNMAPPED|CAT_FASTQ_MAPPED' { - tag = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.preprocessing_savepreprocessedreads - ] - } - withName: FILTER_BAM_FRAGMENT_LENGTH { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.args = "-l ${params.bamfiltering_minreadlength}" ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.filtered.bam', - enabled: params.bamfiltering_savefilteredbams - ] - } - - withName: SAMTOOLS_FASTQ_UNMAPPED { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - '-f 4', - ].join(' ').trim() - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_unmapped" } - publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.bamfiltering_generateunmappedfastq + [ + // data + path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, + mode: params.publish_dir_mode, + pattern: '*.filtered.bam', + enabled: params.bamfiltering_savefilteredbams + ] ] } @@ -380,10 +410,13 @@ process { ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.bam', - enabled: params.bamfiltering_savefilteredbams + [ + // data + path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.bamfiltering_savefilteredbams + ] ] } @@ -392,9 +425,25 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // data + path: { "${params.outdir}/read_filtering/filtered_bams/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + ] + } + + withName: SAMTOOLS_FLAGSTAT_FILTERED { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } + publishDir = [ + [ + // stats + path: { "${params.outdir}/read_filtering/filtered_bams/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } @@ -405,20 +454,44 @@ process { ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_mapped" } publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.fastq.gz', - enabled: params.bamfiltering_generatemappedfastq + [ + // data + path: { "${params.outdir}/read_filtering/fastq/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.bamfiltering_generatemappedfastq + ] ] } - withName: SAMTOOLS_FLAGSTAT_FILTERED { + withName: SAMTOOLS_FASTQ_UNMAPPED { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_filtered" } + ext.args = [ + '-f 4', + ].join(' ').trim() + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_unmapped" } publishDir = [ - path: { "${params.outdir}/bam_filtering/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // data + path: { "${params.outdir}/read_filtering/fastq/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.bamfiltering_generateunmappedfastq + ] + ] + } + + withName: 'CAT_FASTQ_UNMAPPED|CAT_FASTQ_MAPPED' { + tag = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + publishDir = [ + [ + // data + path: { "${params.outdir}/read_filtering/fastq/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.preprocessing_savepreprocessedreads + ] ] } @@ -459,16 +532,6 @@ process { ] } - withName: ENDORSPY { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - publishDir = [ - path: { "${params.outdir}/mapstats/endorspy" }, - mode: params.publish_dir_mode, - pattern: '*.json' - ] - } - withName: ".*MAP:FASTQ_ALIGN_BWAALN:SAMTOOLS_INDEX" { tag = { "${meta.id_index}|${meta.sample_id}_${meta.library_id}_L${meta.lane}" } ext.args = { params.fasta_largeref ? "-c" : "" } @@ -538,9 +601,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, - mode: params.publish_dir_mode, - pattern: '*.{bam}' + [ + // data + path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bam}' + ] ] } @@ -549,9 +615,12 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // data + path: { "${params.outdir}/mapping/${params.mapping_tool}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] ] } @@ -559,9 +628,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_sorted" } publishDir = [ - path: { "${params.outdir}/mapping/${params.mapping_tool}/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/mapping/${params.mapping_tool}/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } @@ -592,7 +664,6 @@ process { publishDir = [ enabled: false ] - } // @@ -656,9 +727,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ - path: { "${params.outdir}/deduplication/" }, - mode: params.publish_dir_mode, - pattern: '*.bam' + [ + // data + path: { "${params.outdir}/deduplication/${params.deduplication_tool}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] ] } @@ -667,9 +741,12 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ - path: { "${params.outdir}/deduplication/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // data + path: { "${params.outdir}/deduplication/${params.deduplication_tool}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] ] } @@ -677,9 +754,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_dedupped" } publishDir = [ - path: { "${params.outdir}/deduplication/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/deduplication/${params.deduplication_tool}/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } @@ -693,23 +773,42 @@ process { "${meta_fastqs.single_end}" == false && "${params.preprocessing_skippairmerging}" == false ? "-merged" : "" ].join(' ').trim()} publishDir = [ - path: { "${params.outdir}/host_removal/" }, - mode: params.publish_dir_mode, - pattern: '*.fq.gz' + [ + // data + path: { "${params.outdir}/read_filtering/host_removal/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fq.gz' ] + ] } // - // PRESEQ + // AUTHENTICATION // + withName: ENDORSPY { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + publishDir = [ + [ + // stats + path: { "${params.outdir}/authentication/endorspy/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.json' + ] + ] + } + withName: PRESEQ_CCURVE { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.args = "-B -s ${params.mapstats_preseq_stepsize}" ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapstats/preseq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/authentication/preseq/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -726,15 +825,15 @@ process { ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/mapstats/preseq" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/authentication/preseq/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } - // - // BEDTOOLS_COVERAGE - // withName: SAMTOOLS_VIEW_GENOME { tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ @@ -747,9 +846,12 @@ process { ext.args = '-mean -nonamecheck' ext.prefix = { "${meta.sample_id}_${meta.reference}_depth" } publishDir = [ - path: { "${params.outdir}/mapstats/bedtools" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/authentication/bedtools/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } @@ -758,166 +860,261 @@ process { ext.args = '-nonamecheck' ext.prefix = { "${meta.sample_id}_${meta.reference}_breadth" } publishDir = [ - path: { "${params.outdir}/mapstats/bedtools" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + [ + // stats + path: { "${params.outdir}/authentication/bedtools/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } - - // - // DAMAGE MANIPULATION - // - - withName: BEDTOOLS_MASKFASTA { - ext.prefix = { "${meta.id}.masked" } + withName: ANGSD_DOCOUNTS { + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ + "-iCounts 1", + "-r ${params.contamination_estimation_angsd_chrom_name}:${params.contamination_estimation_angsd_range_from}-${params.contamination_estimation_angsd_range_to}", + "-minMapQ ${params.contamination_estimation_angsd_mapq}", + "-minQ ${params.contamination_estimation_angsd_minq}" + ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/reference/masked_reference/" }, - mode: params.publish_dir_mode, - pattern: '*.masked.fa' + enabled: false ] } - withName: MAPDAMAGE2 { + withName: ANGSD_CONTAMINATION { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { [ - "--rescale", - // "--rescale-out=${task.ext.prefix}.bam", // This doesn't work, because the output is expected to be in the mapdamage results dir. - "--seq-length=${params.damage_manipulation_rescale_seqlength}", - params.damage_manipulation_rescale_length_3p != 0 ? "--rescale-length-3p=${params.damage_manipulation_rescale_length_3p}" : "", - params.damage_manipulation_rescale_length_5p != 0 ? "--rescale-length-5p=${params.damage_manipulation_rescale_length_5p}" : "", - { meta.strandedness } == "single" ? '--single-stranded' : '' - ].join(' ').trim() } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ [ - path: { "${params.outdir}/damage_manipulation/" }, + // stats + path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/"}, mode: params.publish_dir_mode, - pattern: 'result*/*.rescaled.bam', - saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam" } // Needed to save the bam directly in the output dir instead of within the mapdamage results dir. - ], + pattern: '*.txt' + ] + ] + } + + withName: PRINT_CONTAMINATION_ANGSD { + publishDir = [ [ - path: { "${params.outdir}/damage_manipulation/" }, + // stats + path: { "${params.outdir}/authentication/angsd_nuclear_contamination/stats/"}, mode: params.publish_dir_mode, - pattern: 'results_*/Stats_out_MCMC_*' + pattern: 'nuclear_contamination*' ] ] } - withName: SAMTOOLS_INDEX_DAMAGE_RESCALED { + withName: MTNUCRATIO { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}', - saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam.bai" } // ext.prefix cannot be used here, so rename for publishing instead. + enabled: false ] } - withName: PMDTOOLS_FILTER { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args2 = {[ - "${meta.damage_treatment}" == 'none' ? '--UDGminus' : "${meta.damage_treatment}" == 'half' ? '--UDGhalf' : '--UDGplus' , - "${meta.strandedness}" == 'single' ? '--ss' : '' - ].join(' ').trim()} - ext.args3 = { "-h" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } + withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { + tag = { "${meta.reference}|${meta.sample_id}" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*_pmdfiltered.bam' + [ + // stats + path: { "${params.outdir}/authentication/qualimap/stats/${meta.reference}/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } - withName: SAMTOOLS_INDEX_DAMAGE_FILTERED { + withName: DAMAGEPROFILER { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { params.fasta_largeref ? "-c" : "" } + ext.args = [ + "-l ${params.damagecalculation_damageprofiler_length}", + "-t ${params.damagecalculation_xaxis}", + "-yaxis_dp_max ${params.damagecalculation_yaxis}" + ].join(' ').trim() + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // stats + path: { "${params.outdir}/authentication/damageprofiler/stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] ] } - withName: SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED { + withName: CALCULATE_MAPDAMAGE2 { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } + ext.args = { [ + "--no-stats", + "-y ${params.damagecalculation_yaxis}", + params.damagecalculation_mapdamage_downsample != 0 ? "-n ${params.damagecalculation_mapdamage_downsample} --downsample-seed=1" : "", + { meta.strandedness } == "single" ? '--single-stranded' : '', + "-m ${params.damagecalculation_xaxis}" + ].join(' ').trim() } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/authentication/mapdamage2/stats/" }, + mode: params.publish_dir_mode, + pattern: 'results_*/*' + ] ] } - withName: BAMUTIL_TRIMBAM { + withName: SAMTOOLS_DEPTH_SEXDETERRMINE { + tag = { "${meta1.reference}|${meta1.sample_id}" } + ext.prefix = { "${meta2.id}_samtoolsdepth" } + ext.args = '-aa -q30 -Q30 -H' + publishDir = [ + enabled: false + ] + } + + withName: SEXDETERRMINE { + tag = { "${meta.reference}|${meta.sample_id}" } + ext.prefix = { "${meta.reference}_sexdeterrmine" } + publishDir = [ + [ + // stats + path: { "${params.outdir}/authentication/sexdeterrmine/stats/" }, + mode: params.publish_dir_mode, + pattern: '*{_sexdeterrmine}*', + ] + ] + } + + // + // DAMAGE MANIPULATION + // + withName: BEDTOOLS_MASKFASTA { + ext.prefix = { "${meta.id}.masked" } + publishDir = [ + [ + // data + path: { "${params.outdir}/references/masked_reference/data/" }, + mode: params.publish_dir_mode, + pattern: '*.masked.fa' + ] + ] + } + + withName: MAPDAMAGE2 { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - params.damage_manipulation_bamutils_softclip ? '-c' : '' - ].join(' ').trim() - ext.prefix = { params.run_pmd_filtering ? "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered_trimmed" : "${meta.sample_id}_${meta.library_id}_${meta.reference}_trimmed" } + ext.args = { [ + "--rescale", + // "--rescale-out=${task.ext.prefix}.bam", // This doesn't work, because the output is expected to be in the mapdamage results dir. + "--seq-length=${params.damage_manipulation_rescale_seqlength}", + params.damage_manipulation_rescale_length_3p != 0 ? "--rescale-length-3p=${params.damage_manipulation_rescale_length_3p}" : "", + params.damage_manipulation_rescale_length_5p != 0 ? "--rescale-length-5p=${params.damage_manipulation_rescale_length_5p}" : "", + { meta.strandedness } == "single" ? '--single-stranded' : '' + ].join(' ').trim() } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*_trimmed.bam' + [ + // data + path: { "${params.outdir}/damage_manipulation/mapdamage2/data/" }, + mode: params.publish_dir_mode, + pattern: 'result*/*.rescaled.bam', + saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam" } // Needed to save the bam directly in the output dir instead of within the mapdamage results dir. + ], + [ + // stats + path: { "${params.outdir}/damage_manipulation/mapdamage2/stats/" }, + mode: params.publish_dir_mode, + pattern: 'results_*/Stats_out_MCMC_*' + ] ] } - withName: SAMTOOLS_INDEX_DAMAGE_TRIMMED { + withName: SAMTOOLS_INDEX_DAMAGE_RESCALED { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ - path: { "${params.outdir}/damage_manipulation/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // stats + path: { "${params.outdir}/damage_manipulation/mapdamage2/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + saveAs: { "${meta.sample_id}_${meta.library_id}_${meta.reference}_rescaled.bam.bai" } // ext.prefix cannot be used here, so rename for publishing instead. + ] ] } - // - // CONTAMINATION ESTIMATION - // - withName: ANGSD_DOCOUNTS { - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + withName: PMDTOOLS_FILTER { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - "-iCounts 1", - "-r ${params.contamination_estimation_angsd_chrom_name}:${params.contamination_estimation_angsd_range_from}-${params.contamination_estimation_angsd_range_to}", - "-minMapQ ${params.contamination_estimation_angsd_mapq}", - "-minQ ${params.contamination_estimation_angsd_minq}" - ].join(' ').trim() + ext.args2 = {[ + "${meta.damage_treatment}" == 'none' ? '--UDGminus' : "${meta.damage_treatment}" == 'half' ? '--UDGhalf' : '--UDGplus' , + "${meta.strandedness}" == 'single' ? '--ss' : '' + ].join(' ').trim()} + ext.args3 = { "-h" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } publishDir = [ - enabled: false + [ + // data + path: { "${params.outdir}/damage_manipulation/pmdtools/data/" }, + mode: params.publish_dir_mode, + pattern: '*_pmdfiltered.bam' + ] ] } - withName: ANGSD_CONTAMINATION { + withName: SAMTOOLS_INDEX_DAMAGE_FILTERED { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } + ext.args = { params.fasta_largeref ? "-c" : "" } publishDir = [ - path: { "${params.outdir}/contamination_estimation/angsd/"}, - mode: params.publish_dir_mode, - pattern: '*.txt' + [ + // data + path: { "${params.outdir}/damage_manipulation/pmdtools/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] ] } - withName: PRINT_CONTAMINATION_ANGSD { + withName: SAMTOOLS_FLAGSTAT_DAMAGE_FILTERED { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered" } publishDir = [ - path: { "${params.outdir}/contamination_estimation/angsd/"}, - mode: params.publish_dir_mode, - pattern: 'nuclear_contamination*' + [ + // stats + path: { "${params.outdir}/damage_manipulation/pmdtools/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } - // - // MT-NUCLEAR RATIO - // - withName: MTNUCRATIO { + withName: BAMUTIL_TRIMBAM { tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = [ + params.damage_manipulation_bamutils_softclip ? '-c' : '' + ].join(' ').trim() + ext.prefix = { params.run_pmd_filtering ? "${meta.sample_id}_${meta.library_id}_${meta.reference}_pmdfiltered_trimmed" : "${meta.sample_id}_${meta.library_id}_${meta.reference}_trimmed" } publishDir = [ - enabled: false + [ + // data + path: { "${params.outdir}/damage_manipulation/bamutils_trimbam/data/" }, + mode: params.publish_dir_mode, + pattern: '*_trimmed.bam' + ] ] } + withName: SAMTOOLS_INDEX_DAMAGE_TRIMMED { + tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } + ext.args = { params.fasta_largeref ? "-c" : "" } + publishDir = [ + [ + // data + path: { "${params.outdir}/damage_manipulation/bamutils_trimbam/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + ] + } // // METAGENOMIC SCREENING @@ -931,9 +1128,17 @@ process { ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" } publishDir = [ [ - path: { "${params.outdir}/metagenomics/complexity_filter/prinseq" }, + // data + path: { "${params.outdir}/metagenomics/prinseq/data/" }, + mode: params.publish_dir_mode, + pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz}', + enabled: params.metagenomics_complexity_savefastq + ], + [ + // stats + path: { "${params.outdir}/metagenomics/prinseq/stats/" }, mode: params.publish_dir_mode, - pattern: '*{_good_out.fastq.gz,_good_out_R1.fastq.gz,_good_out_R2.fastq.gz,log}', + pattern: '*log', enabled: params.metagenomics_complexity_savefastq ] ] @@ -944,10 +1149,20 @@ process { ext.args = { "entropymask=f entropy=${params.metagenomics_complexity_entropy}" } ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}_complexity" } publishDir = [ - path: { "${params.outdir}/metagenomics/complexity_filter/bbduk/" }, - mode: params.publish_dir_mode, - pattern: '*.{fastq.gz,log}', - enabled: params.metagenomics_complexity_savefastq + [ + // data + path: { "${params.outdir}/metagenomics/bbduk/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.metagenomics_complexity_savefastq + ], + [ + // stats + path: { "${params.outdir}/metagenomics/bbduk/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.log', + enabled: params.metagenomics_complexity_savefastq + ] ] } @@ -962,20 +1177,32 @@ process { params.metagenomics_malt_minsupportmode == "percent" ? "-supp ${params.metagenomics_malt_minsupportpercent}" : "-sup ${params.metagenomics_malt_minsupportreads}", params.metagenomics_malt_savereads ? "--alignments ./" : "" ].join(' ').trim() + ext.prefix = { "${meta.label}_${meta.id}-run" } publishDir = [ - path: { "${params.outdir}/metagenomics/profiling/malt/" }, - mode: params.publish_dir_mode, - pattern: '*.{rma6,log,sam.gz}' + [ + // data + path: { "${params.outdir}/metagenomics/malt/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{rma6,sam.gz}' + ], + [ + // stats + path: { "${params.outdir}/metagenomics/malt/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] ] - ext.prefix = { "${meta.label}_${meta.id}-run" } } withName: CAT_CAT_MALT { ext.prefix = { "${meta.id}_runtime_log_concatenated.log" } publishDir = [ - path: { "${params.outdir}/metagenomics/profiling/malt/" }, - mode: params.publish_dir_mode, - pattern: '*.{log}' + [ + // stats + path: { "${params.outdir}/metagenomics/malt/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] ] } @@ -985,27 +1212,48 @@ process { ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/metagenomics/profiling/kraken2/" }, - mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + [ + // data + path: { "${params.outdir}/metagenomics/kraken2/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz' + ], + [ + // stats + path: { "${params.outdir}/metagenomics/kraken2/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.txt' + ] ] } withName: KRAKENUNIQ_PRELOADEDKRAKENUNIQ { publishDir = [ - path: { "${params.outdir}/metagenomics/profiling/krakenuniq/" }, - mode: params.publish_dir_mode, - pattern: '*.{txt,fastq.gz}' + [ + // data + path: { "${params.outdir}/metagenomics/krakenuniq/data/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz' + ], + [ + // stats + path: { "${params.outdir}/metagenomics/krakenuniq/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.txt' + ] ] } withName: METAPHLAN_METAPHLAN { + ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/metagenomics/profiling/metaphlan/" }, - mode: params.publish_dir_mode, - pattern: '*.{biom,txt}' + [ + // stats + path: { "${params.outdir}/metagenomics/metaphlan/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.{biom,txt}' + ] ] - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } } withName: MALTEXTRACT { @@ -1022,10 +1270,13 @@ process { { meta.strandedness } == "single" ? '--singleStranded' : '', ].join(' ').trim() publishDir = [ - path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" }, - mode: params.publish_dir_mode, - pattern: 'results', - saveAs: { "${meta.id}" } + [ + // stats + path: { "${params.outdir}/metagenomics/maltextract/stats/" }, + mode: params.publish_dir_mode, + pattern: 'results', + saveAs: { "${meta.id}" } + ] ] } @@ -1034,15 +1285,18 @@ process { ext.args = "-c2c Taxonomy" ext.prefix = { "${meta.id}" } publishDir = [ - path: { "${params.outdir}/metagenomics/postprocessing/megan_summaries/" }, - mode: params.publish_dir_mode, - pattern: '*.{txt.gz,megan}' + [ + // stats + path: { "${params.outdir}/metagenomics/megan_summaries/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.{txt.gz,megan}' + ] ] } withName: AMPS { publishDir = [ - path: { "${params.outdir}/metagenomics/postprocessing/maltextract/" }, + path: { "${params.outdir}/metagenomics/amps/stats/" }, mode: params.publish_dir_mode, pattern: 'results' ] @@ -1050,98 +1304,32 @@ process { } withName: TAXPASTA_MERGE { + ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } publishDir = [ - path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" }, - mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + [ + // stats + path: { "${params.outdir}/metagenomics/taxpasta/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + ] ] - ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}_taxpasta_table.tsv" } } withName: TAXPASTA_STANDARDISE { - publishDir = [ - path: { "${params.outdir}/metagenomics/postprocessing/taxpasta/" }, - mode: params.publish_dir_mode, - pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' - ] ext.args = { "--profiler ${meta.profiler} --output ${meta.profiler}taxpasta_table.tsv" } - } - - // - // QUALIMAP - // - - withName: 'QUALIMAP_BAMQC_WITHBED|QUALIMAP_BAMQC_NOBED' { - tag = { "${meta.reference}|${meta.sample_id}" } - publishDir = [ - path: { "${params.outdir}/mapstats/qualimap/${meta.reference}/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - // - // DAMAGE CALCULATION - // - withName: DAMAGEPROFILER { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = [ - "-l ${params.damagecalculation_damageprofiler_length}", - "-t ${params.damagecalculation_xaxis}", - "-yaxis_dp_max ${params.damagecalculation_yaxis}" - ].join(' ').trim() - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } - publishDir = [ - path: { "${params.outdir}/damage_estimation/damageprofiler/" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - ] - } - - withName: CALCULATE_MAPDAMAGE2 { - tag = { "${meta.reference}|${meta.sample_id}_${meta.library_id}" } - ext.args = { [ - "--no-stats", - "-y ${params.damagecalculation_yaxis}", - params.damagecalculation_mapdamage_downsample != 0 ? "-n ${params.damagecalculation_mapdamage_downsample} --downsample-seed=1" : "", - { meta.strandedness } == "single" ? '--single-stranded' : '', - "-m ${params.damagecalculation_xaxis}" - ].join(' ').trim() } - ext.prefix = { "${meta.sample_id}_${meta.library_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/damage_estimation/mapDamage2/" }, - mode: params.publish_dir_mode, - pattern: 'results_*/*' - ] - } - - // - // RUN SEXDETERRMINE - // - withName: SAMTOOLS_DEPTH_SEXDETERRMINE { - tag = { "${meta1.reference}|${meta1.sample_id}" } - ext.prefix = { "${meta2.id}_samtoolsdepth" } - ext.args = '-aa -q30 -Q30 -H' - publishDir = [ - enabled: false - ] - } - - withName: SEXDETERRMINE { - tag = { "${meta.reference}|${meta.sample_id}" } - ext.prefix = { "${meta.reference}_sexdeterrmine" } - publishDir = [ - path: { "${params.outdir}/sex_determination/" }, - mode: params.publish_dir_mode, - pattern: '*{_sexdeterrmine}*', - enabled: true + [ + // stats + path: { "${params.outdir}/metagenomics/taxpasta/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.{csv,tsv,ods,xlsx,arrow,parquet,biom}' + ] ] } // // LIBRARY MERGE // - withName: ".*MERGE_LIBRARIES:SAMTOOLS_MERGE_LIBRARIES" { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}_unsorted" } @@ -1154,9 +1342,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/raw/" }, - mode: params.publish_dir_mode, - pattern: '*.bam' + [ + // data + path: { "${params.outdir}/final_bams/raw/data/" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] ] } @@ -1165,9 +1356,12 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/raw/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // data + path: { "${params.outdir}/final_bams/raw/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] ] } @@ -1175,9 +1369,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/raw/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/final_bams/raw/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } @@ -1193,9 +1390,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, - mode: params.publish_dir_mode, - pattern: '*.bam' + [ + // data + path: { "${params.outdir}/final_bams/${params.genotyping_source}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] ] } @@ -1204,9 +1404,12 @@ process { ext.args = { params.fasta_largeref ? "-c" : "" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, - mode: params.publish_dir_mode, - pattern: '*.{bai,csi}' + [ + // data + path: { "${params.outdir}/final_bams/${params.genotyping_source}/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] ] } @@ -1214,16 +1417,18 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/final_bams/${params.genotyping_source}/" }, - mode: params.publish_dir_mode, - pattern: '*.flagstat' + [ + // stats + path: { "${params.outdir}/final_bams/${params.genotyping_source}/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat' + ] ] } // // GENOTYPING // - withName: SAMTOOLS_MPILEUP_PILEUPCALLER { tag = { "${meta.reference}|${meta.strandedness}" } ext.args = [ @@ -1256,9 +1461,12 @@ process { tag = { "${meta.reference}" } ext.prefix = { "pileupcaller_genotypes_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.{geno,snp,ind}', + [ + // data + path: { "${params.outdir}/genotyping/pileupcaller/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{geno,snp,ind}', + ] ] } @@ -1267,9 +1475,12 @@ process { ext.args = { "-j ${prefix}.json" } ext.prefix = { "pileupcaller_genotypes_${meta.reference}_coverage" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.{tsv}', + [ + // stats + path: { "${params.outdir}/genotyping/pileupcaller/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.tsv', + ] ] } @@ -1291,10 +1502,13 @@ process { ].join(' ').trim() ext.prefix = { "${meta.sample_id}_${meta.reference}_realigned" } publishDir = [ - enabled: params.genotyping_gatk_ug_keeprealignbam, - path: { "${params.outdir}/genotyping/IndelRealigner" }, - mode: params.publish_dir_mode, - pattern: '*.{bam,bai}' + [ + // data + path: { "${params.outdir}/genotyping/gatk_ug/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{bam,bai}', + enabled: params.genotyping_gatk_ug_keeprealignbam, + ] ] } @@ -1310,9 +1524,12 @@ process { ].join(' ').trim() } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.vcf.gz', + [ + // data + path: { "${params.outdir}/genotyping/gatk_ug/data/" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + ] ] } @@ -1321,9 +1538,12 @@ process { ext.args = "--tbi" //tbi indices for consistency with GATK HC ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.vcf.gz.tbi', + [ + // data + path: { "${params.outdir}/genotyping/gatk_ug/data/" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz.tbi', + ] ] } @@ -1338,9 +1558,12 @@ process { ].join(' ').trim() } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.{vcf.gz,vcf.gz.tbi}', + [ + // data + path: { "${params.outdir}/genotyping/gatk_hc/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{vcf.gz,vcf.gz.tbi}', + ] ] } @@ -1353,9 +1576,12 @@ process { ].join(' ').trim() } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.vcf.gz', + [ + // data + path: { "${params.outdir}/genotyping/freebayes/data/" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + ] ] } @@ -1364,9 +1590,12 @@ process { ext.args = "--tbi" //tbi indices for consistency with GATK HC ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.vcf.gz.tbi', + [ + // data + path: { "${params.outdir}/genotyping/freebayes/data/" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz.tbi', + ] ] } @@ -1374,9 +1603,12 @@ process { tag = { "${meta.reference}|${meta.sample_id}" } ext.prefix = { "${meta.sample_id}_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.txt', + [ + // stats + path: { "${params.outdir}/genotyping/bcftools/stats/" }, + mode: params.publish_dir_mode, + pattern: '*.txt', + ] ] } @@ -1393,9 +1625,12 @@ process { } ext.prefix = { "angsd_${meta.reference}" } publishDir = [ - path: { "${params.outdir}/genotyping/" }, - mode: params.publish_dir_mode, - pattern: '*.{glf,beagle}.gz', + [ + // data + path: { "${params.outdir}/genotyping/angsd/data/" }, + mode: params.publish_dir_mode, + pattern: '*.{glf,beagle}.gz', + ] ] } } From 4e52dc3327e928e315df7fed1c31a67ad7c1f155 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 14 Feb 2025 11:03:13 +0100 Subject: [PATCH 2/7] ensure single_ref gunzip gets correct configureation --- subworkflows/local/reference_indexing_single.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/reference_indexing_single.nf b/subworkflows/local/reference_indexing_single.nf index 41feced1..e81f8080 100644 --- a/subworkflows/local/reference_indexing_single.nf +++ b/subworkflows/local/reference_indexing_single.nf @@ -3,7 +3,7 @@ // include { grabUngzippedExtension } from '../../subworkflows/local/utils_nfcore_eager_pipeline/main' -include { GUNZIP } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' include { BWA_INDEX } from '../../modules/nf-core/bwa/index/main' include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' include { SAMTOOLS_FAIDX } from '../../modules/nf-core/samtools/faidx/main' @@ -27,9 +27,9 @@ workflow REFERENCE_INDEXING_SINGLE { // Detect if fasta is gzipped or not, unzip if necessary, and generate meta ID by sanitizing file if ( fasta.extension == 'gz' ) { ch_gz_ref = Channel.fromPath(fasta).map{[[], it]} - GUNZIP ( ch_gz_ref ) - ch_ungz_ref = GUNZIP.out.gunzip.map{[[id: clean_name], it[1] ]} - ch_versions = ch_versions.mix( GUNZIP.out.versions.first()) + GUNZIP_FASTA ( ch_gz_ref ) + ch_ungz_ref = GUNZIP_FASTA.out.gunzip.map{[[id: clean_name], it[1] ]} + ch_versions = ch_versions.mix( GUNZIP_FASTA.out.versions.first()) } else { ch_ungz_ref = Channel.fromPath(fasta).map{[[id: clean_name], it ]} } From f76eff0166edfb79967dd81b2020d9c1b758f505 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 14 Feb 2025 11:44:46 +0100 Subject: [PATCH 3/7] give untar unique module name. Deactivate publishdir --- conf/modules.config | 6 ++++++ subworkflows/local/metagenomics_profiling.nf | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b5927862..70aef263 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1206,6 +1206,12 @@ process { ] } + withName: UNTAR_METAGENOMICS { + publishDir = [ + enabled: false + ] + } + withName: KRAKEN2_KRAKEN2 { ext.args = [ params.metagenomics_kraken2_saveminimizers ? "--report-minimizer-data" : "" diff --git a/subworkflows/local/metagenomics_profiling.nf b/subworkflows/local/metagenomics_profiling.nf index f21cb559..149fc6d3 100644 --- a/subworkflows/local/metagenomics_profiling.nf +++ b/subworkflows/local/metagenomics_profiling.nf @@ -9,7 +9,7 @@ include { KRAKEN2_KRAKEN2 } from '../../modules/nf-core/kraken2/k include { KRAKENUNIQ_PRELOADEDKRAKENUNIQ } from '../../modules/nf-core/krakenuniq/preloadedkrakenuniq/main' include { METAPHLAN_METAPHLAN } from '../../modules/nf-core/metaphlan/metaphlan/main' include { CAT_CAT as CAT_CAT_MALT } from '../../modules/nf-core/cat/cat/main' -include { UNTAR } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_METAGENOMICS } from '../../modules/nf-core/untar/main' workflow METAGENOMICS_PROFILING { @@ -35,8 +35,8 @@ workflow METAGENOMICS_PROFILING { // untar the database ch_untar_input = ch_database.untar.map{ [[], it] } - UNTAR( ch_untar_input ) - ch_untar_output = UNTAR.out.untar.map{ it[1] } + UNTAR_METAGENOMICS( ch_untar_input ) + ch_untar_output = UNTAR_METAGENOMICS.out.untar.map{ it[1] } // back to the original database channel... ch_database = ch_database.base.mix(ch_untar_output) From 1e8fddab21cf3c84dcbd4a8e2fd94a9e9a9f663d Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 14 Feb 2025 11:45:03 +0100 Subject: [PATCH 4/7] correct importing of metagenomics SWF --- workflows/eager.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index b01f769e..9dbf9b54 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -27,7 +27,7 @@ include { MAP } from '../subwork include { FILTER_BAM } from '../subworkflows/local/bamfiltering.nf' include { DEDUPLICATE } from '../subworkflows/local/deduplicate' include { MANIPULATE_DAMAGE } from '../subworkflows/local/manipulate_damage' -include { METAGENOMICS_COMPLEXITYFILTER } from '../subworkflows/local/metagenomics_complexityfilter' +include { METAGENOMICS } from '../subworkflows/local/metagenomics' include { ESTIMATE_CONTAMINATION } from '../subworkflows/local/estimate_contamination' include { CALCULATE_DAMAGE } from '../subworkflows/local/calculate_damage' include { RUN_SEXDETERRMINE } from '../subworkflows/local/run_sex_determination' From 0c24ef3b3b197c422d56c863341fd1f26ec45a43 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 14 Feb 2025 12:50:33 +0100 Subject: [PATCH 5/7] fix parameter names in test_nothing profile --- conf/test_nothing.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/test_nothing.config b/conf/test_nothing.config index 97607484..a16745b6 100644 --- a/conf/test_nothing.config +++ b/conf/test_nothing.config @@ -33,13 +33,13 @@ params { skip_preprocessing = true skip_deduplication = true skip_qualimap = true - skip_damage_calculation = true + skip_damagecalculation = true mapstats_skip_preseq = true run_fastq_sharding = false run_bamfiltering = false run_bedtools_coverage = false - run_metagenomicscreening = false + run_metagenomics = false run_contamination_estimation_angsd = false run_mtnucratio = false run_mapdamage_rescaling = false From c353212463f2a6262b712a48bac886d1670239c9 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 14 Feb 2025 12:59:33 +0100 Subject: [PATCH 6/7] add todos --- conf/modules.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 70aef263..ad2d0637 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1167,6 +1167,7 @@ process { } withName: MALT_RUN { + // TODO needs a tag ext.args = [ "-m ${params.metagenomics_malt_mode}", "-at ${params.metagenomics_malt_alignmentmode}", @@ -1195,6 +1196,7 @@ process { } withName: CAT_CAT_MALT { + // TODO needs a tag ext.prefix = { "${meta.id}_runtime_log_concatenated.log" } publishDir = [ [ @@ -1263,6 +1265,7 @@ process { } withName: MALTEXTRACT { + // TODO needs a tag ext.args = [ "-f ${params.metagenomics_maltextract_filter}", "-a ${params.metagenomics_maltextract_toppercent}", From 4a94c2fcedb2319c89d331cf830f39e385a7a731 Mon Sep 17 00:00:00 2001 From: Thiseas Christos Lamnidis Date: Fri, 21 Feb 2025 10:41:36 +0100 Subject: [PATCH 7/7] fix preseq lc_extrap call. fixes test_nothing --- workflows/eager.nf | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/workflows/eager.nf b/workflows/eager.nf index 9dbf9b54..dd98a790 100644 --- a/workflows/eager.nf +++ b/workflows/eager.nf @@ -432,15 +432,12 @@ workflow EAGER { ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_CCURVE.out.c_curve.collect { it[1] }.ifEmpty([])) ch_versions = ch_versions.mix(PRESEQ_CCURVE.out.versions) } - else { - (!params.mapstats_skip_preseq && params.mapstats_preseq_mode == 'lc_extrap').call { + else if (!params.mapstats_skip_preseq && params.mapstats_preseq_mode == 'lc_extrap') { PRESEQ_LCEXTRAP(ch_reads_for_deduplication.map { [it[0], it[1]] }) ch_multiqc_files = ch_multiqc_files.mix(PRESEQ_LCEXTRAP.out.lc_extrap.collect { it[1] }.ifEmpty([])) ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions) - } } - // // MODULE: Bedtools coverage //