snakemake-workflows
diff --git a/‎.github/workflows/main.yml
Lines changed: 4 additions & 4 deletions b/‎.github/workflows/main.yml
Lines changed: 4 additions & 4 deletions
diff --git a/‎.snakemake-workflow-catalog.yml
Lines changed: 7 additions & 7 deletions b/‎.snakemake-workflow-catalog.yml
Lines changed: 7 additions & 7 deletions
diff --git a/‎.template/config/config.yaml.tmpl.tmpl
Lines changed: 0 additions & 1 deletion b/‎.template/config/config.yaml.tmpl.tmpl
Lines changed: 0 additions & 1 deletion
diff --git a/‎.template/workflow/Snakefile.tmpl.tmpl
Lines changed: 0 additions & 10 deletions b/‎.template/workflow/Snakefile.tmpl.tmpl
Lines changed: 0 additions & 10 deletions
diff --git a/‎.test/config/config.yml
Lines changed: 2 additions & 20 deletions b/‎.test/config/config.yml
Lines changed: 2 additions & 20 deletions
diff --git a/‎.test/config/multiqc_config.yml
Lines changed: 0 additions & 2 deletions b/‎.test/config/multiqc_config.yml
Lines changed: 0 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 27 additions & 40 deletions b/‎README.md
Lines changed: 27 additions & 40 deletions
diff --git a/‎config/README.md
Lines changed: 17 additions & 36 deletions b/‎config/README.md
Lines changed: 17 additions & 36 deletions
diff --git a/‎config/config.yml
Lines changed: 2 additions & 20 deletions b/‎config/config.yml
Lines changed: 2 additions & 20 deletions
diff --git a/‎config/multiqc_config.yml
Lines changed: 0 additions & 2 deletions b/‎config/multiqc_config.yml
Lines changed: 0 additions & 2 deletions
@@ -29,7 +29,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Lint workflow
-        uses: snakemake/snakemake-github-action@v1.25.1
+        uses: snakemake/snakemake-github-action@v2
         with:
           directory: .
           snakefile: workflow/Snakefile
@@ -45,14 +45,14 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Test workflow
-        uses: snakemake/snakemake-github-action@v1.25.1
+        uses: snakemake/snakemake-github-action@v2
         with:
           directory: .test
           snakefile: workflow/Snakefile
-          args: "--use-conda --show-failed-logs --cores 3 --conda-cleanup-pkgs cache"
+          args: "--sdm conda --show-failed-logs --cores 2 --conda-cleanup-pkgs cache"
 
       - name: Test report
-        uses: snakemake/snakemake-github-action@v1.25.1
+        uses: snakemake/snakemake-github-action@v2
         with:
           directory: .test
           snakefile: workflow/Snakefile
 
@@ -1,11 +1,11 @@
 # configuration of display in snakemake workflow catalog: https://snakemake.github.io/snakemake-workflow-catalog
 
 usage:
-  mandatory-flags: # optional definition of additional flags
-    desc: # describe your flags here in a few sentences (they will be inserted below the example commands)
+  mandatory-flags:
+    desc: # describe your flags here in a few sentences
     flags: # put your flags here
-  software-stack-deployment: # definition of software deployment method (at least one of conda, singularity, or singularity+conda)
-    conda: true # whether pipeline works with --use-conda
-    singularity: false # whether pipeline works with --use-singularity
-    singularity+conda: false # whether pipeline works with --use-singularity --use-conda
-  report: true # add this to confirm that the workflow allows to use 'snakemake --report report.zip' to generate a report containing all results and explanations
+  software-stack-deployment:
+    conda: true # whether pipeline works with '--sdm conda'
+    apptainer: true # whether pipeline works with '--sdm apptainer/singularity'
+    apptainer+conda: true # whether pipeline works with '--sdm conda apptainer/singularity'
+    report: true # whether creation of reports using 'snakemake --report report.zip' is supported
@@ -1,27 +1,9 @@
 samplesheet: "config/samples.tsv"
 
 get_genome:
-  database: "ncbi"
-  assembly: "GCF_000006785.2"
-  fasta: Null
-  gff: Null
-  gff_source_type:
-    [
-      "RefSeq": "gene",
-      "RefSeq": "pseudogene",
-      "RefSeq": "CDS",
-      "Protein Homology": "CDS",
-    ]
+  ncbi_ftp: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.fna.gz
 
 simulate_reads:
   read_length: 100
   read_number: 100000
-  random_freq: 0.01
-
-cutadapt:
-  threep_adapter: "-a ATCGTAGATCGG"
-  fivep_adapter: "-A GATGGCGATAGG"
-  default: ["-q 10 ", "-m 25 ", "-M 100", "--overlap=5"]
-
-multiqc:
-  config: "config/multiqc_config.yml"
+  random_reads: 0.01
@@ -1,10 +1,9 @@
 # Snakemake workflow: `<name>`
 
 [![Snakemake](https://img.shields.io/badge/snakemake-≥8.0.0-brightgreen.svg)](https://snakemake.github.io)
-[![GitHub actions status](https://github.com/MPUSP/snakemake-workflow-template/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/MPUSP/snakemake-workflow-template/actions/workflows/main.yml)
+[![GitHub actions status](https://github.com/snakemake-workflows/snakemake-workflow-template/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/snakemake-workflows/snakemake-workflow-template/actions/workflows/main.yml)
 [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)
-[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1D355C.svg?labelColor=000000)](https://sylabs.io/docs/)
-[![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog)
+[![workflow catalog](https://img.shields.io/badge/Snakemake%20workflow%20catalog-darkgreen)](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/<owner>/<repo>)
 
 A Snakemake workflow for `<description>`
 
@@ -21,7 +20,7 @@ A Snakemake workflow for `<description>`
 
 ## Usage
 
-The usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/?usage=<owner>%2F<repo>).
+The usage of this workflow is described in the [Snakemake Workflow Catalog](https://snakemake.github.io/snakemake-workflow-catalog/docs/workflows/<owner>/<repo>).
 
 If you use this workflow in a paper, don't forget to give credits to the authors by citing the URL of this repository or its DOI.
 
@@ -30,11 +29,10 @@ If you use this workflow in a paper, don't forget to give credits to the authors
 This workflow is a best-practice workflow for `<detailed description>`.
 The workflow is built using [snakemake](https://snakemake.readthedocs.io/en/stable/) and consists of the following steps:
 
-1. Parse sample sheet containing sample meta data (`python`)
+1. Download genome reference from NCBI
 2. Simulate short read sequencing data on the fly (`dwgsim`)
 3. Check quality of input read data (`FastQC`)
-4. Trim adapters from input data (`cutadapt`)
-5. Collect statistics from tool output (`MultiQC`)
+4. Collect statistics from tool output (`MultiQC`)
 
 ## Running the workflow
 
@@ -47,7 +45,6 @@ This template workflow creates artificial sequencing data in `*.fastq.gz` format
 | sample1 | wild_type | 1         | sample1.bwa.read1.fastq.gz | sample1.bwa.read2.fastq.gz |
 | sample2 | wild_type | 2         | sample2.bwa.read1.fastq.gz | sample2.bwa.read2.fastq.gz |
 
-
 ### Execution
 
 To run the workflow from command line, change the working directory.
@@ -57,49 +54,39 @@ cd path/to/snakemake-workflow-name
 ```
 
 Adjust options in the default config file `config/config.yml`.
-Before running the entire workflow, you can perform a dry run using:
+Before running the complete workflow, you can perform a dry run using:
 
 ```bash
 snakemake --dry-run
 ```
 
-To run the complete workflow with test files using **conda**, execute the following command. The definition of the number of compute cores is mandatory.
+To run the workflow with test files using **conda**:
 
 ```bash
-snakemake --cores 3 --sdm conda --directory .test
+snakemake --cores 2 --sdm conda --directory .test
 ```
 
-To run the workflow with **singularity** / **apptainer**, add a link to a container registry in the `Snakefile`, for example:
+To run the workflow with **apptainer** / **singularity**, add a link to a container registry in the `Snakefile`, for example:
 `container: "oras://ghcr.io/<user>/<repository>:<version>"` for Github's container registry. Run the workflow with:
 
 ```bash
-snakemake --cores 3 --sdm conda apptainer --directory .test
+snakemake --cores 2 --sdm conda apptainer --directory .test
 ```
 
 ### Parameters
 
 This table lists all parameters that can be used to run the workflow.
 
-| parameter          | type | details                                 | default                                       |
-| ------------------ | ---- | --------------------------------------- | --------------------------------------------- |
-| **samplesheet**    |      |                                         |                                               |
-| path               | str  | path to samplesheet, mandatory          | "config/samples.tsv"                          |
-| **get_genome**     |      |                                         |                                               |
-| database           | str  | one of `manual`, `ncbi`                 | `ncbi`                                        |
-| assembly           | str  | RefSeq ID                               | `GCF_000006785.2`                             |
-| fasta              | str  | optional path to fasta file             | Null                                          |
-| gff                | str  | optional path to gff file               | Null                                          |
-| gff_source_type    | str  | list of name/value pairs for GFF source | see config file                               |
-| **simulate_reads** |      |                                         |                                               |
-| read_length        | num  | length of target reads in bp            | 100                                           |
-| read_number        | num  | number of total reads to be simulated   | 100000                                        |
-| random_freq        | num  | frequency of random read sequences      | 0.01                                          |
-| **cutadapt**       |      |                                         |                                               |
-| threep_adapter     | str  | sequence of the 3' adapter              | `-a ATCGTAGATCGG`                             |
-| fivep_adapter      | str  | sequence of the 5' adapter              | `-A GATGGCGATAGG`                             |
-| default            | str  | additional options passed to `cutadapt` | [`-q 10 `, `-m 25 `, `-M 100`, `--overlap=5`] |
-| **multiqc**        |      |                                         |                                               |
-| config             | str  | path to multiQC config                  | `config/multiqc_config.yml`                   |
+| parameter          | type | details                               | default                        |
+| ------------------ | ---- | ------------------------------------- | ------------------------------ |
+| **samplesheet**    |      |                                       |                                |
+| path               | str  | path to samplesheet, mandatory        | "config/samples.tsv"           |
+| **get_genome**     |      |                                       |                                |
+| ncbi_ftp           | str  | link to a genome on NCBI's FTP server | link to _S. cerevisiae_ genome |
+| **simulate_reads** |      |                                       |                                |
+| read_length        | num  | length of target reads in bp          | 100                            |
+| read_number        | num  | number of total reads to be simulated | 100000                         |
+| random_freq        | num  | frequency of random read sequences    | 0.01                           |
 
 ## Authors
 
@@ -110,13 +97,13 @@ This table lists all parameters that can be used to run the workflow.
 
 ## References
 
-> Köster, J., Mölder, F., Jablonski, K. P., Letcher, B., Hall, M. B., Tomkins-Tinch, C. H., Sochat, V., Forster, J., Lee, S., Twardziok, S. O., Kanitz, A., Wilm, A., Holtgrewe, M., Rahmann, S., & Nahnsen, S. *Sustainable data analysis with Snakemake*. F1000Research, 10:33, 10, 33, **2021**. https://doi.org/10.12688/f1000research.29032.2.
+> Köster, J., Mölder, F., Jablonski, K. P., Letcher, B., Hall, M. B., Tomkins-Tinch, C. H., Sochat, V., Forster, J., Lee, S., Twardziok, S. O., Kanitz, A., Wilm, A., Holtgrewe, M., Rahmann, S., & Nahnsen, S. _Sustainable data analysis with Snakemake_. F1000Research, 10:33, 10, 33, **2021**. https://doi.org/10.12688/f1000research.29032.2.
 
 ## TODO
 
-* Replace `<owner>` and `<repo>` everywhere in the template (also under .github/workflows) with the correct `<repo>` name and owning user or organization.
-* Replace `<name>` with the workflow name (can be the same as `<repo>`).
-* Replace `<description>` with a description of what the workflow does.
-* Update the workflow description, parameters, running options, authors and references in the `README.md`
-* Update the `README.md` badges. Add or remove badges for `conda`/`singularity`/`apptainer` usage depending on the workflow's capability
-* The workflow will occur in the snakemake-workflow-catalog once it has been made public. Then the link under "Usage" will point to the usage instructions if `<owner>` and `<repo>` were correctly set.
+- Replace `<owner>` and `<repo>` everywhere in the template with the correct user name/organization, and the repository name. The workflow will be automaticallky added to the [snakemake workflow catalog](https://snakemake.github.io/snakemake-workflow-catalog/index.html) once it is publicly available on Github.
+- Replace `<name>` with the workflow name (can be the same as `<repo>`).
+- Replace `<description>` with a description of what the workflow does.
+- Update the [workflow overview](#running-the-workflow), and [running instructions](#running-the-workflow) including parameters, deployment, authors and references
+- Update the `README.md` badges. Add or remove badges for `conda`/`singularity`/`apptainer` usage depending on the workflow's [deployment options](#execution)
+- Do not forget to also adjust the configuration-specific `config/README.md` file
@@ -3,11 +3,10 @@
 This workflow is a best-practice workflow for `<detailed description>`.
 The workflow is built using [snakemake](https://snakemake.readthedocs.io/en/stable/) and consists of the following steps:
 
-1. Parse sample sheet containing sample meta data (`python`)
+1. Download genome reference from NCBI
 2. Simulate short read sequencing data on the fly (`dwgsim`)
 3. Check quality of input read data (`FastQC`)
-4. Trim adapters from input data (`cutadapt`)
-5. Collect statistics from tool output (`MultiQC`)
+4. Collect statistics from tool output (`MultiQC`)
 
 ## Running the workflow
 
@@ -20,7 +19,6 @@ This template workflow creates artificial sequencing data in `*.fastq.gz` format
 | sample1 | wild_type | 1         | sample1.bwa.read1.fastq.gz | sample1.bwa.read2.fastq.gz |
 | sample2 | wild_type | 2         | sample2.bwa.read1.fastq.gz | sample2.bwa.read2.fastq.gz |
 
-
 ### Execution
 
 To run the workflow from command line, change the working directory.
@@ -30,53 +28,36 @@ cd path/to/snakemake-workflow-name
 ```
 
 Adjust options in the default config file `config/config.yml`.
-Before running the entire workflow, you can perform a dry run using:
+Before running the complete workflow, you can perform a dry run using:
 
 ```bash
 snakemake --dry-run
 ```
 
-To run the complete workflow with test files using **conda**, execute the following command. The definition of the number of compute cores is mandatory.
+To run the workflow with test files using **conda**:
 
 ```bash
-snakemake --cores 3 --sdm conda --directory .test
+snakemake --cores 2 --sdm conda --directory .test
 ```
 
-To run the workflow with **singularity** / **apptainer**, add a link to a container registry in the `Snakefile`, for example:
+To run the workflow with **apptainer** / **singularity**, add a link to a container registry in the `Snakefile`, for example:
 `container: "oras://ghcr.io/<user>/<repository>:<version>"` for Github's container registry. Run the workflow with:
 
 ```bash
-snakemake --cores 3 --sdm conda apptainer --directory .test
+snakemake --cores 2 --sdm conda apptainer --directory .test
 ```
 
 ### Parameters
 
 This table lists all parameters that can be used to run the workflow.
 
-| parameter          | type | details                                 | default                                       |
-| ------------------ | ---- | --------------------------------------- | --------------------------------------------- |
-| **samplesheet**    |      |                                         |                                               |
-| path               | str  | path to samplesheet, mandatory          | "config/samples.tsv"                          |
-| **get_genome**     |      |                                         |                                               |
-| database           | str  | one of `manual`, `ncbi`                 | `ncbi`                                        |
-| assembly           | str  | RefSeq ID                               | `GCF_000006785.2`                             |
-| fasta              | str  | optional path to fasta file             | Null                                          |
-| gff                | str  | optional path to gff file               | Null                                          |
-| gff_source_type    | str  | list of name/value pairs for GFF source | see config file                               |
-| **simulate_reads** |      |                                         |                                               |
-| read_length        | num  | length of target reads in bp            | 100                                           |
-| read_number        | num  | number of total reads to be simulated   | 100000                                        |
-| random_freq        | num  | frequency of random read sequences      | 0.01                                          |
-| **cutadapt**       |      |                                         |                                               |
-| threep_adapter     | str  | sequence of the 3' adapter              | `-a ATCGTAGATCGG`                             |
-| fivep_adapter      | str  | sequence of the 5' adapter              | `-A GATGGCGATAGG`                             |
-| default            | str  | additional options passed to `cutadapt` | [`-q 10 `, `-m 25 `, `-M 100`, `--overlap=5`] |
-| **multiqc**        |      |                                         |                                               |
-| config             | str  | path to multiQC config                  | `config/multiqc_config.yml`                   |
-
-## TODO
-
-* Replace `<owner>` and `<repo>` everywhere in the template (also under .github/workflows) with the correct `<repo>` name and owning user or organization.
-* Replace `<name>` with the workflow name (can be the same as `<repo>`).
-* Replace `<description>` with a description of what the workflow does.
-* Update the workflow parameters and running options
+| parameter          | type | details                               | default                        |
+| ------------------ | ---- | ------------------------------------- | ------------------------------ |
+| **samplesheet**    |      |                                       |                                |
+| path               | str  | path to samplesheet, mandatory        | "config/samples.tsv"           |
+| **get_genome**     |      |                                       |                                |
+| ncbi_ftp           | str  | link to a genome on NCBI's FTP server | link to _S. cerevisiae_ genome |
+| **simulate_reads** |      |                                       |                                |
+| read_length        | num  | length of target reads in bp          | 100                            |
+| read_number        | num  | number of total reads to be simulated | 100000                         |
+| random_freq        | num  | frequency of random read sequences    | 0.01                           |
@@ -1,27 +1,9 @@
 samplesheet: ".test/config/samples.tsv"
 
 get_genome:
-  database: "ncbi"
-  assembly: "GCF_000006785.2"
-  fasta: Null
-  gff: Null
-  gff_source_type:
-    [
-      "RefSeq": "gene",
-      "RefSeq": "pseudogene",
-      "RefSeq": "CDS",
-      "Protein Homology": "CDS",
-    ]
+  ncbi_ftp: https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/000/146/045/GCF_000146045.2_R64/GCF_000146045.2_R64_genomic.fna.gz
 
 simulate_reads:
   read_length: 100
   read_number: 100000
-  random_freq: 0.01
-
-cutadapt:
-  threep_adapter: "-a ATCGTAGATCGG"
-  fivep_adapter: "-A GATGGCGATAGG"
-  default: ["-q 10 ", "-m 25 ", "-M 100", "--overlap=5"]
-
-multiqc:
-  config: "config/multiqc_config.yml"
+  random_reads: 0.01