-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMICROSCOPE.config
145 lines (119 loc) · 6.77 KB
/
MICROSCOPE.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
profiles {
// Profile for microscope eagering. Includes parameters for the runs.
microscope {
// No cleanup for resume possibility
cleanup = false
// Increase number of concurrent jobs to 24
executor {
queueSize = 12
}
params{
// Adapter removal
clip_adapters_list = "/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/assets/adapter_list.txt"
// Mapping reference and reference indexes
fasta = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa'
fasta_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa.fai'
bwa_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/'
seq_dict = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.dict'
// Mapping
bwaalnn = 0.01
// BAM filtering
run_bam_filtering = true // Filter out unmapped reads, so barplots in MultiQC are not completely overtaken by unmapped reads.
bam_unmapped_type = 'fastq' // Keep unmapped reads as a separate fastq file. Preferred format for possible future pathogen screening.
bam_mapping_quality_threshold = 30 // Keep MapQ 30+ (together with snpcapture_bed is needed for poseidon "coverage on target SNPs" field)
// The above also means that reads that are mapped with MapQ below 30 are lost after filtering, not present in the fastq OR the filtered bam!
// Calculate on-target coverage and capture efficiency metrics
snpcapture_bed = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
// mtDNA to nuclear ratio
run_mtnucratio = true
mtnucratio_header = "MT"
// Bam Trimming
run_trim_bam = true
bamutils_clip_double_stranded_half_udg_left = 2 // Trim 2 bp of either side for dsDNA half-UDG libraries.
bamutils_clip_double_stranded_half_udg_right = 2 // Trim 2 bp of either side for dsDNA half-UDG libraries.
bamutils_clip_double_stranded_none_udg_left = 7 // Trim 7 bp of either side for dsDNA non-UDG libraries.
bamutils_clip_double_stranded_none_udg_right = 7 // Trim 7 bp of either side for dsDNA non-UDG libraries.
bamutils_clip_single_stranded_half_udg_left = 0 // No trimming for ssDNA libraries, since --singelStrandMode removes damage artefacts.
bamutils_clip_single_stranded_half_udg_right = 0 // No trimming for ssDNA libraries, since --singelStrandMode removes damage artefacts.
bamutils_clip_single_stranded_none_udg_left = 0 // No trimming for ssDNA libraries, since --singelStrandMode removes damage artefacts.
bamutils_clip_single_stranded_none_udg_right = 0 // No trimming for ssDNA libraries, since --singelStrandMode removes damage artefacts.
// Genotyping
genotyping_source = 'trimmed' // Use trimmed bams for genotyping
run_genotyping = true
genotyping_tool = 'pileupcaller'
pileupcaller_min_map_quality = 30
pileupcaller_min_base_quality = 30
pileupcaller_bedfile = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
pileupcaller_snpfile = '/mnt/archgen/public_data/Datashare_Boston_Jena_June2018.backup/1240K.snp'
//Sex determination
run_sexdeterrmine = true
sexdeterrmine_bedfile = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
// Nuclear contamination
run_nuclear_contamination = true
contamination_chrom_name = 'X'
//1240k Coverage/Depth calculation
run_bedtools_coverage = true
anno_file = '/mnt/archgen/Reference_Genomes/Human/hs37d5/SNPCapBEDs/1240K.pos.list_hs37d5.0based.bed'
// Custom MQC config file with increased max_table_rows value
multiqc_config = '/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/MICROSCOPE_multiqc_config.yaml'
}
// Change amount of resources provided to MarkD.
process {
maxRetries = 2
withName:markduplicates {
// MarkD has this weird behaviour that it will increaase runtime like crazy if not given enough memory because
// it keeps waiting for the garbage collector to kick in and clear some memory for it to continue. Bump up memory a lot to avoid this.
// Should drop runtime to a few minutes, down from multiple hours.
memory = { task.attempt == 3 ? 32.GB : task.attempt == 2 ? 24.GB : 16.GB }
}
// More cores for bwa to reduce runtime
withName:bwa {
cpus = 16
memory = { task.attempt == 3 ? 32.GB : task.attempt == 2 ? 24.GB : 16.GB }
}
}
}
// This profile is to be specified on INSTEAD of the microscope profile to prepare data for ena upload
ena_processing {
// No cleanup for resume possibility
cleanup = false
// Increase number of concurrent jobs to 24
executor {
queueSize = 12
}
params{
// Skip any statistic generation
skip_preseq = true
skip_damage_calculation = true
skip_qualimap = true
skip_deduplication = true
// Adapter removal
skip_collapse = true // No read collapsing for paired-end data.
clip_readlength = 1 // If 0, then empty reads are left in, which breaks downstream processing.
clip_min_read_quality = 0 // No base quality filtering.
clip_adapters_list = "/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/assets/adapter_list.txt"
// Mapping reference and reference indexes
fasta = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa'
fasta_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.fa.fai'
bwa_index = '/mnt/archgen/Reference_Genomes/Human/hs37d5/'
seq_dict = '/mnt/archgen/Reference_Genomes/Human/hs37d5/hs37d5.dict'
// Mapping
bwaalnn = 0.01
// Custom MQC config file with increased max_table_rows value
multiqc_config = '/r1/people/thiseas_christos_lamnidis/Software/github/Schiffels-Popgen/MICROSCOPE-processing-pipeline/MICROSCOPE_multiqc_config.yaml'
}
}
// Profile for microscope_automated_analysis
automated_analysis {
process {
withName: kinship_read {
// Exit code is 11 when no individuals pass coverage. Exit code 1 when a single individual passes coverage.
errorStrategy = { task.exitStatus in [11] ? 'ignore' : 'retry' }
}
// withName: microscope_pca {
// // Exit code is 1 the PCA does not rerunning.
// errorStrategy = { task.exitStatus in [1] ? 'ignore' : 'retry' }
// }
}
}
}