-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmultiqc_config_overall.yaml
197 lines (188 loc) · 5.96 KB
/
multiqc_config_overall.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# Top of the page info
title: "Overall Run MultiQC Report"
subtitle: "Run QC statistics and plots"
report_comment: >
This report contains viral sequencing metrics for all samples ran in the pipeline.
Individual sample reports are also available to provide more detailed information on each sample
# Other things
data_format: "yaml"
max_table_rows: 10000
show_analysis_paths: false
disable_version_detection: true
# General Statistics Table
skip_generalstats: true
# Modules/tools supported that we only want to grab
run_modules:
- custom_content
- samtools
- qualimap
- bcftools
- nanostat
- snpeff
# Order for reports
module_order:
- nanostat:
name: Nanostat filtered fastq statistics
path_filters:
- "*.nanostat.txt"
- samtools
- qualimap
- bcftools
- snpeff
report_section_order:
software_versions:
order: -1000
# Custom data here
custom_data:
# QC CSV
qc_csv_table:
section_name: "Overall Metrics Table"
description: >
This table shows the overall quality control metrics for all samples in the run
plot_type: "table"
pconfig:
id: "qc_csv_table"
table_title: "Overall QC Table"
namespace: "Overall QC Table"
col1_header: "Sample"
only_defined_headers: True
format: "{:,.0f}"
headers:
sample:
scale: False
qc_pass:
title: "QC Pass"
description: "Overall status of the sample"
cond_formatting_rules:
warn: [{"s_ne": "PASS"}]
run_status:
title: "Run Status"
description: "Overall status of the run taking negative control samples into account"
cond_formatting_rules:
warn: [{"s_ne": "PASS"}]
num_aligned_reads:
title: "# Aligned Reads"
description: "Number of aligned reads used for variant calling and consensus generation"
hidden: False
genome_completeness:
title: "Genome Completeness"
description: "Float value for how many bases in the genome were called with 1.0 being 100%"
scale: "RdYlGn"
min: 0
max: 1
format: "{:,.4f}"
median_sequencing_depth:
title: "Median Depth"
description: "Median sequencing depth of the sample"
total_variants:
title: "Total Variants"
description: "Total number of variant sites in the sample"
scale: "Greens"
num_snps:
title: "# SNPs"
description: "Number of individual SNPs in the sample"
min: 0
scale: "PuBu"
num_deletions:
title: "# Deletions"
description: "The total number of deleted bases in the sample"
min: 0
scale: "Purples"
num_deletion_sites:
title: "# Deletion Sites"
description: "The number of sites in the genome containing a deletion"
min: 0
scale: "Purples"
num_insertions:
title: "# Insertions"
description: "The total number of inserted bases in the sample"
min: 0
scale: "Oranges"
num_insertion_sites:
title: "# Insertion Sites"
description: "The number of sites in the genome containing an insertion"
min: 0
scale: "Oranges"
possible_frameshift_variants:
title: "Possible Frameshift Variants"
description: "Variants detected by SnpEFF as containing a frameshift or if SnpEFF was not run, indels that fail a divisible by 3 check"
# Note that the order here must be kept or the no_data will be yellow
cond_formatting_rules:
pass: [{"s_eq": "none"}]
frameshift: [{"s_ne": "none"}]
no_data: [{"s_eq": "NA"}]
cond_formatting_colours:
- 1:
frameshift: "#f0ad4e" # Yellow Warn colour
- 2:
no_data: "#000000" # Black
neg_control_info:
title: "Neg Control Status"
description: "If the sample is a negative control gives its status as PASS/WARN"
# Amplicons section
amplicon_depth:
parent_id: amplicons_qc
parent_name: "Amplicon Summary"
parent_description: >
Custom amplicon plots to show how deep each amplicon is sequenced and how complete each amplicon is
id: 'amplicon_depth'
file_format: 'tsv'
section_name: 'Amplicon Depth Lineplot'
description: >
Calculated from the primertrimmed sorted bam file using bedtools coverage
looking for reads that overlap 85% of the given amplicon region
plot_type: 'linegraph'
pconfig:
id: 'amplicon_depth'
title: 'Amplicon Depth'
xlab: 'Amplicon ID'
ylab: 'Read Depth'
logswitch: True
logswitch_active: False
categories: True
ymin: 0
amplicon_completeness:
parent_id: amplicons_qc
file_format: 'csv'
section_name: 'Amplicon Completeness'
description: >
Heatmap showing how complete each amplicon is based on the consensus sequence.
Amplicon completeness is calculated using the number of Ns in the amplicon compared to the amplicon length.
A 1.0 means the amplicon is fully sequenced while a 0 means that there were no bases called in the amplicon.
Note that as amplicons overlap, a fully dropped amplicon could still have some completeness from its two neighbours
id: 'amplicon_completeness'
plot_type: 'heatmap'
pconfig:
id: 'amplicon_completeness'
title: 'Amplicon Completeness'
xTitle: 'Amplicon ID'
xcats_samples: False
ycats_samples: True
square: False
min: 0
max: 1
colstops: [
[0, '#a50026'],
[0.1, '#d73027'],
[0.2, '#f46d43'],
[0.3, '#fdae61'],
[0.4, '#fee08b'],
[0.5, '#ffffbf'],
[0.6, '#d9ef8b'],
[0.7, '#a6d96a'],
[0.8, '#66bd63'],
[0.9, '#1a9850'],
[1, '#006837'],
]
# Extensions to clean from names
extra_fn_clean_exts:
- '.nanostat'
- '.ann'
# Search Pathes
sp:
qc_csv_table:
fn: '*.qc.csv'
amplicon_depth:
fn: '*_ampdepth.tsv'
amplicon_completeness:
fn: "merged_amplicon_completeness.csv"