Skip to content

Commit

Permalink
Merge pull request #62 from cidgoh/multiqc_config
Browse files Browse the repository at this point in the history
add customized multiqc config file
  • Loading branch information
anwarMZ authored Jun 25, 2024
2 parents 206152f + 5641947 commit dd06226
Show file tree
Hide file tree
Showing 28 changed files with 710 additions and 366 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)
[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)

# BACPAQ

## Introduction

**`bacpaq`** is a bioinformatics best-practice pipeline for bacterial genomic analysis for short-reads (Illumina) and long-reads (Oxford Nanopore) sequencing data. Currently `bacpaq` supports WGS-based analyses, however, we plan to integrate Microbiome (Amplicon and Shotgun Metagenomics) analyses in future.
Expand Down
8 changes: 4 additions & 4 deletions assets/methods_description_template.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
id: "nf-core-bacpaq-methods-description"
id: "cidgoh-bacpaq-methods-description"
description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
section_name: "nf-core/bacpaq Methods Description"
section_href: "https://github.com/nf-core/bacpaq"
section_name: "cidgoh/bacpaq Methods Description"
section_href: "https://github.com/cidgoh/bacpaq"
plot_type: "html"
## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline
## You inject any metadata in the Nextflow '${workflow}' object
data: |
<h4>Methods</h4>
<p>Data was processed using nf-core/bacpaq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>), utilising reproducible software environments from the Bioconda (<a href="https://doi.org/10.1038/s41592-018-0046-7">Grüning <em>et al.</em>, 2018</a>) and Biocontainers (<a href="https://doi.org/10.1093/bioinformatics/btx192">da Veiga Leprevost <em>et al.</em>, 2017</a>) projects.</p>
<p>Data was processed using cidgoh/bacpaq v${workflow.manifest.version} ${doi_text} built using the nf-core template of workflows (<a href="https://doi.org/10.1038/s41587-020-0439-x">Ewels <em>et al.</em>, 2020</a>), utilising reproducible software environments from the Bioconda (<a href="https://doi.org/10.1038/s41592-018-0046-7">Grüning <em>et al.</em>, 2018</a>) and Biocontainers (<a href="https://doi.org/10.1093/bioinformatics/btx192">da Veiga Leprevost <em>et al.</em>, 2017</a>) projects.</p>
<p>The pipeline was executed with Nextflow v${workflow.nextflow.version} (<a href="https://doi.org/10.1038/nbt.3820">Di Tommaso <em>et al.</em>, 2017</a>) with the following command:</p>
<pre><code>${workflow.commandLine}</code></pre>
<p>${tool_citations}</p>
Expand Down
223 changes: 218 additions & 5 deletions assets/multiqc_config.yml
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,15 +1,228 @@
report_comment: >
This report has been generated by the <a href="https://github.com/nf-core/bacpaq/tree/dev" target="_blank">nf-core/bacpaq</a>
analysis pipeline. For information about how to interpret these results, please see the
<a href="https://nf-co.re/bacpaq/dev/docs/output" target="_blank">documentation</a>.
bacpaq is a bioinformatics best-practice pipeline for bacterial genomic analysis for short-reads (Illumina) and long-reads (Oxford Nanopore) sequencing data. Currently bacpaq supports WGS-based analyses, however, we plan to integrate Microbiome (Amplicon and Shotgun Metagenomics) analyses in future.
report_section_order:
"nf-core-bacpaq-methods-description":
"cidgoh-bacpaq-methods-description":
order: -1000
software_versions:
order: -1001
"nf-core-bacpaq-summary":
"cidgoh-bacpaq-summary":
order: -1002

export_plots: true

disable_version_detection: true
data_format: "yaml"

run_modules:
- fastqc
- fastp
- trimmomatic
- porechop
- fastqc
- kraken
- bracken
- quast
- prokka
- bakta
- busco

# Module order
top_modules:
- "fastqc":
name: "FastQC: raw reads"
path_filters_exclude:
- "*trimmed*"
- "fastp":
name: "FastP"
info: "Read preprocessing."
path_filters:
- "*fastp.json"
- "trimmomatic":
name: "Trimmomatic"
info: "Adapter trimming for Illumina reads."
path_filters:
- "*trimmomatic.log"
- "porechop":
name: "Porechop"
info: "Adapter trimming for Oxford Nanopore reads."
path_filters:
- "*porechop.log"
- "fastqc":
name: "FastQC: after preprocessing"
anchor: "FastQC_trimmed"
info: "After trimming and, if requested, contamination removal."
path_filters:
- "*trimmed*"
- "kraken":
name: "Kraken2"
anchor: "Kraken2"
target: "Kraken2"
doi: "10.1101/gr.210641.116"
path_filters:
- "*.kraken2.report.txt"
top_n: 10
- "kraken":
name: "Centrifuge"
anchor: "centrifuge"
target: "Centrifuge"
doi: "10.1101/gr.210641.116"
info: "is a very rapid and memory-efficient system for the classification of DNA sequences from microbial samples. The system uses a novel indexing scheme based on the Burrows-Wheeler transform (BWT) and the Ferragina-Manzini (FM) index. Note: Figure title"
extra: "ℹ️: plot title will say Kraken2 due to Centrifuge producing the same output format as Kraken. If activated, see the actual Kraken2 results in the section above."
path_filters:
- "*.centrifuge_kreport.txt"
- "bracken":
name: "bracken"
anchor: "bracken"
target: "bracken"
info: "Estimates the abundance of species in metagenomic samples."
contents_re: ^(\d{1,3}\.\d{1,2})\t(\d+)\t(\d+)\t((\d+)\t(\d+)\t)?([URDKPCOFGS-]\d{0,2})\t(\d+)(\s+)root
num_lines: 1
path_filters:
- "*.bracken.report.tsv"
- "quast":
name: "QUAST: assembly"
info: "Assembly statistics of binned assemblies."
path_filters_exclude:
- "*rawassemblies.tsv"
contents: "Assembly\t"
num_lines: 2

- "prokka":
name: "Prokka"
info: "Prokka is a software tool for the rapid annotation of prokaryotic genomes."
contents: "contigs:"
fn: "*.txt"

- "bakta":
name: "Bakta"
info: "Bakta is a software tool for the rapid annotation of prokaryotic genomes."
contents: "Bakta:"
fn: "*.txt"

- "busco":
info: "assesses genome assembly and annotation completeness with Benchmarking Universal Single-Copy Orthologs. In case BUSCO's automated lineage selection was used, only generic results for the selected domain are shown and only for genome bins and kept, unbinned contigs for which the BUSCO analysis was successfull, i.e. not for contigs for which no BUSCO genes could be found. Bins for which a specific virus lineage was selected are also not shown."
fn: short_summary*
contents: "BUSCO version is:"
num_lines: 1

sp:
kraken:
fn_re: ".*[kraken2|centrifuge].*report.txt"
quast:
fn_re: "report.*.tsv"

# clean names
fn_clean_exts:
- ".gz"
- ".fastq"

extra_fn_clean_exts:
- type: regex
pattern: "_T{1,2}.*"

## Prettification
custom_logo_url: https://github.com/cidgoh/bacpaq/
custom_logo_title: "cidgoh/bacpaq"

## Report Title
title: "BACPAQ"

## Tool specific configuration
prokka_fn_snames: True

## General Stats customisation
table_columns_visible:
"FastQC: raw reads":
avg_sequence_length: True
"FastQC: after preprocessing":
avg_sequence_length: True
"fastp":
pct_duplication: False
after_filtering_q30_rate: False
after_filtering_q30_bases: False
filtering_result_passed_filter_reads: 3300
after_filtering_gc_content: False
pct_surviving: True
pct_adapter: True
"Kraken2": True
"Centrifuge": True
"QUAST: assembly":
N75: True
L50: True
L75: True
"Largest contig": True
"Total length": True
N50: True
"Prokka": True
"Bakta": True

table_columns_placement:
"FastQC: raw reads":
percent_duplicates: 1000
percent_gc: 1100
avg_sequence_length: 1200
median_sequence_length: 1300
total_sequences: 1400
percent_fails: 1500
"FastQC: after preprocessing":
percent_duplicates: 2000
percent_gc: 2100
avg_sequence_length: 2200
median_sequence_length: 2300
total_sequences: 2400
percent_fails: 2500
"fastp":
pct_duplication: 3000
after_filtering_q30_rate: 3100
after_filtering_q30_bases: 3200
filtering_result_passed_filter_reads: 3300
after_filtering_gc_content: 3400
pct_surviving: 3500
pct_adapter: 3600
"Kraken2":
"% root": 8000
"% Top 5": 8100
"% Unclassified": 8200
"Centrifuge":
"% root": 9000
"% Top 5": 9100
"% Unclassified": 9200
"QUAST: assembly":
"N50": 10000
"Total length": 11000
Prokka:
contigs: 20000
bases: 21000
CDS: 22000
organism: 23000
plasmid: 24000
Bakta:
contigs: 20000
bases: 21000
CDS: 22000
organism: 23000
plasmid: 24000

table_columns_name:
"FastQC: raw reads":
percent_duplicates: "% Dups (raw)"
percent_gc: "% GC (raw)"
avg_sequence_length: "Avg. length (raw)"
median_sequence_length: "Median length (raw)"
total_sequences: "M Seqs (raw)"
percent_fails: "% Fails (raw)"
"FastQC: after preprocessing":
percent_duplicates: "% Dups (processed)"
percent_gc: "% GC (processed)"
avg_sequence_length: "Avg. length (processed)"
median_sequence_length: "Median length (processed)"
total_sequences: "M Seqs (processed)"
percent_fails: "% Fails (processed)"

custom_table_header_config:
general_stats_table:
"Total length":
hidden: True
N50:
hidden: True
Loading

0 comments on commit dd06226

Please sign in to comment.