File tree Expand file tree Collapse file tree 9 files changed +84
-3
lines changed Expand file tree Collapse file tree 9 files changed +84
-3
lines changed Original file line number Diff line number Diff line change 26
26
run : conda config --show-sources
27
27
- name : Run tests
28
28
run : |
29
+ export NXF_VER=22.04.5
29
30
make
Original file line number Diff line number Diff line change 29
29
bash test/scripts/run_test_11.sh
30
30
bash test/scripts/run_test_12.sh
31
31
bash test/scripts/run_test_13.sh
32
+ bash test/scripts/run_test_14.sh
Original file line number Diff line number Diff line change @@ -268,6 +268,25 @@ These annotations are provided by VAFator (https://github.com/TRON-Bioinformatic
268
268
269
269
No technical annotations are performed if the parameter ` --input_bams ` is not passed.
270
270
271
+ ## Phasing with WhatsHap
272
+
273
+ The phase of the mutations can be inferred from the reads pileup.
274
+ This information is relevant to determine the impact of nearby mutations.
275
+ WhatsHap adds the phasing information when possible to the VCF following the VCF specification for that purpose.
276
+ Genotypes with phase information use the ` | ` instead of the ` / ` and ` 0|1 ` and ` 1|0 ` represent the two different phases.
277
+ A homozygous mutations in phase is represented as ` 1|1 ` .
278
+ Furthermore, because phasing may be incomplete, to define the different phased blocks the INFO/PS annotation used.
279
+ All mutations belonging to the same phase block share the same PS number.
280
+
281
+ BAM files need to be provided to perform phasing with ` --input_bams ` and the phasing sample has to be determined with
282
+ ` --phasing_sample ` .
283
+
284
+ This approach has some limitations:
285
+ - We only support diploid genotyped VCF files. WhatsHap does support polyploid samples. But the particular case of
286
+ somatic mutations is not supported.
287
+ - The sample name chosen to perform the phasing has to be the same across all VCFs.
288
+ Not to be mistaken with the patient name. Eg: only ` normal ` samples across all patients can be used for phasing
289
+
271
290
## Functional annotations
272
291
273
292
The functional annotations provide a biological context for every variant. Such as the overlapping genes or the effect
Original file line number Diff line number Diff line change @@ -6,10 +6,13 @@ include { FILTER_VCF } from './modules/01_filter'
6
6
include { BCFTOOLS_NORM ; VT_DECOMPOSE_COMPLEX ; REMOVE_DUPLICATES } from ' ./modules/02_normalization'
7
7
include { SUMMARY_VCF ; SUMMARY_VCF as SUMMARY_VCF_2 } from ' ./modules/03_summary'
8
8
include { VAFATOR ; MULTIALLELIC_FILTER } from ' ./modules/04_vafator'
9
- include { VARIANT_ANNOTATION_SNPEFF ; VARIANT_ANNOTATION_BCFTOOLS } from ' ./modules/05_variant_annotation'
9
+ include { WHATSHAP } from ' ./modules/05_phasing'
10
+ include { VARIANT_ANNOTATION_SNPEFF ; VARIANT_ANNOTATION_BCFTOOLS } from ' ./modules/06_variant_annotation'
11
+
10
12
11
13
params. help= false
12
14
params. input_vcfs = false
15
+ params. input_bams = false
13
16
params. input_vcf = false
14
17
15
18
// optional VAFator inputs
@@ -32,6 +35,7 @@ params.skip_multiallelic_filter = false
32
35
// SnpEff input
33
36
params. snpeff_organism = false
34
37
params. snpeff_datadir = false
38
+ params. phasing_sample = false
35
39
36
40
37
41
if (params. help) {
@@ -136,6 +140,10 @@ workflow {
136
140
final_vcfs = MULTIALLELIC_FILTER (final_vcfs)
137
141
final_vcfs = MULTIALLELIC_FILTER . out. filtered_vcf
138
142
}
143
+ if (params. phasing_sample) {
144
+ WHATSHAP (final_vcfs. join(input_bams. groupTuple()))
145
+ final_vcfs = WHATSHAP . out. phased_vcf
146
+ }
139
147
}
140
148
141
149
if (params. snpeff_organism) {
Original file line number Diff line number Diff line change
1
+ params. phasing_sample = false
2
+ params. cpus = 1
3
+ params. memory = ' 3g'
4
+
5
+
6
+ process WHATSHAP {
7
+ cpus params. cpus
8
+ memory params. memory
9
+ tag " ${ patient_name} "
10
+ publishDir " ${ params.output} /${ patient_name} " , mode: " copy"
11
+
12
+ conda (params. enable_conda ? " bioconda::whatshap=1.4" : null )
13
+
14
+ input:
15
+ tuple val(patient_name), file(vcf), val(bams)
16
+
17
+ output:
18
+ tuple val(patient_name), file(" ${ patient_name} .phased.vcf" ), emit: phased_vcf
19
+
20
+ script:
21
+ phasing_bams = bams. toList(). stream()
22
+ .filter{ b -> b. split(' :' )[0 ] == " ${ params.phasing_sample} " }
23
+ .collect{ b -> b. split(" :" )[1 ] }
24
+ .join(" " ) ;
25
+ """
26
+ whatshap phase \
27
+ --indel \
28
+ -o ${ patient_name} .phased.vcf \
29
+ --reference=${ params.reference} \
30
+ ${ vcf} \
31
+ ${ phasing_bams}
32
+ """
33
+ }
Original file line number Diff line number Diff line change @@ -49,6 +49,7 @@ process VARIANT_ANNOTATION_BCFTOOLS {
49
49
--fasta-ref ${ params.reference} \\
50
50
--gff-annot ${ params.gff} ${ vcf} \\
51
51
--output-type v \\
52
- --output ${ name} .annotated.vcf
52
+ --output ${ name} .annotated.vcf \\
53
+ --phase a
53
54
"""
54
55
}
Original file line number Diff line number Diff line change 27
27
// Capture exit codes from upstream processes when piping
28
28
process. shell = [' /bin/bash' , ' -euo' , ' pipefail' ]
29
29
30
- VERSION = ' 2.5 .0'
30
+ VERSION = ' 3.0 .0'
31
31
32
32
manifest {
33
33
name = ' TRON-Bioinformatics/tronflow-vcf-postprocessing'
@@ -87,6 +87,7 @@ Optional input:
87
87
* --snpeff_memory: for some samples SnpEff may require to use more memory (default: 3g)
88
88
* --mapping_quality: VAFator minimum mapping quality (default: 0)
89
89
* --base_call_quality: VAFator minimum base call quality (default: 0)
90
+ * --phasing-sample: enables phasing with whatshap and defines the sample BAM to phase the VCF (BAMs need to be provided)
90
91
91
92
Output:
92
93
* Normalized VCF file
Original file line number Diff line number Diff line change
1
+ single_sample test/data/test_single_sample.vcf
Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+
4
+ source test/scripts/assert.sh
5
+ output_folder=test/output/test14
6
+ echo -e " tumor_normal\tprimary:" ` pwd` " /test/data/TESTX_S1_L001.bam" > test/data/test_bams.txt
7
+ echo -e " tumor_normal\tnormal:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
8
+ echo -e " single_sample\ttumor:" ` pwd` " /test/data/TESTX_S1_L001.bam" >> test/data/test_bams.txt
9
+ echo -e " single_sample\ttumor:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
10
+ echo -e " single_sample\tnormal:" ` pwd` " /test/data/TESTX_S1_L001.bam" >> test/data/test_bams.txt
11
+ echo -e " single_sample\tnormal:" ` pwd` " /test/data/TESTX_S1_L002.bam" >> test/data/test_bams.txt
12
+
13
+ nextflow main.nf -profile test,conda --output $output_folder --input_bams test/data/test_bams.txt \
14
+ --phasing_sample normal --input_vcfs test/data/test_input_single_sample.txt
15
+
16
+ test -s $output_folder /single_sample/single_sample.phased.vcf || { echo " Missing test 10 output file!" ; exit 1; }
You can’t perform that action at this time.
0 commit comments