Skip to content

Yleaf module added @3.3.0 #8210

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions modules/nf-core/yleaf/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- bioconda
- conda-forge
dependencies:
- python=3.9
- numpy=1.24.3
- pandas=1.0.0
- bcftools=1.18
# Edit this below only once yleaf 3.3.0 has been released to bioconda.
# - yleaf=3.3.0
# Otherwise, install from my local branch with pip:
- pip
- pip:
- -e /home/a/Yleaf
95 changes: 95 additions & 0 deletions modules/nf-core/yleaf/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
process YLEAF {
tag "$meta.id"
label 'process_medium'

// TODO AH: Remove this once yleaf 3.3.0 is released to bioconda
conda "nf-core"

input:
tuple val(meta), path(input_file)
path reference_fasta, stageAs: "reference.fa"
path y_reference_fasta, stageAs: "y_reference.fa"
val reference_genome
val reads_threshold
val quality_thresh
val base_majority
val prediction_quality
val draw_haplogroups
val collapsed_draw_mode
val ancient_dna
val private_mutations
val minor_allele_frequency

output:
tuple val(meta), path("${meta.id}/hg_prediction.hg"), emit: haplogroup
tuple val(meta), path("${meta.id}/*.log"), emit: log
tuple val(meta), path("${meta.id}/hg_tree_image.pdf"), emit: tree
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def ref_genome = reference_genome ?: "hg38"
// These defaults correspond to the defaults used in the Yleaf package
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Small comment: if the tool itself has it's own defaults then you don't need to define all of these. Input vals should ONLY be for mandatory inputs where there is no default 1

Footnotes

  1. https://nf-co.re/docs/guidelines/components/modules#non-file-mandatory-command-arguments

def reads_thresh = reads_threshold ?: 10
def qual_thresh = quality_thresh ?: 20
def base_maj = base_majority ?: 90
def pred_qual = prediction_quality ?: 0.95
def draw_hg = draw_haplogroups ?: false
def collapsed_mode = collapsed_draw_mode ?: false
def ancient = ancient_dna ?: false
def private_mut = private_mutations ?: false
def maf = minor_allele_frequency ?: 0.01

"""
mkdir -p ${prefix}

echo "Input file: ${input_file}"
echo "Reference genome: ${ref_genome}"

PYTHONPATH=\$PWD:/home/a/Yleaf Yleaf \\
--vcffile ${input_file} \\
--output ${prefix} \\
--reference_genome ${ref_genome} \\
--full_genome_reference ${reference_fasta} \\
--y_chromosome_reference ${y_reference_fasta} \\
--threads ${task.cpus} \\
--force \\
--reanalyze \\
--reads_treshold ${reads_thresh} \\
--quality_thresh ${qual_thresh} \\
--base_majority ${base_maj} \\
--prediction_quality ${pred_qual} \\
${draw_hg ? '--draw_haplogroups' : ''} \\
${collapsed_mode ? '--collapsed_draw_mode' : ''} \\
${ancient ? '--ancient_DNA' : ''} \\
${private_mut ? '--private_mutations' : ''} \\
--minor_allele_frequency ${maf} \\
${args}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
yleaf: \$(Yleaf --help | grep "version" | sed 's/.*version //g' | sed 's/).*//g')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
mkdir -p ${prefix}

touch ${prefix}/run.log
touch ${prefix}/hg_prediction.hg
touch ${prefix}/hg_tree_image.pdf

cat <<-END_VERSIONS > versions.yml
"${task.process}":
yleaf: "3.3.0"
END_VERSIONS
"""
}
65 changes: 65 additions & 0 deletions modules/nf-core/yleaf/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "yleaf"
description: Software for human Y-chromosomal haplogroup inference from next generation sequencing data
keywords:
- genomics
- y-chromosome
- haplogroup
- prediction
- next-generation-sequencing
tools:
- "yleaf":
description: "Yleaf is software for human Y-chromosomal haplogroup inference from next generation sequencing data"
homepage: "https://github.com/genid/Yleaf"
documentation: "https://academic.oup.com/mbe/article/35/5/1291/4922696"
tool_dev_url: "https://github.com/genid/Yleaf"
doi: "10.1093/molbev/msy032"
licence: ['GPL-3.0']
identifier: "biotools:yleaf"

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- input_file:
type: file
description: Input sequencing file - VCF
pattern: "*.{vcf,vcf.gz}"
ontologies:
- edam: "http://edamontology.org/format_3016" # VCF
- - reference_file:
type: file
description: Reference genome file
pattern: "*.{fa,fasta,fna}"
ontologies:
- edam: "http://edamontology.org/format_1929" # FASTA

output:
- haplogroup:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- "*.hg.txt":
type: file
description: Predicted Y-chromosome haplogroup output
pattern: "*.hg.txt"
- log:
- "*.log":
type: file
description: Log file generated by Yleaf
pattern: "*.log"
- versions:
- "versions.yml":
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@trianglegrrl"
maintainers:
- "@trianglegrrl"
16 changes: 16 additions & 0 deletions modules/nf-core/yleaf/tests/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- bioconda
- conda-forge
dependencies:
- python=3.9
- numpy=1.24.3
- pandas=1.0.0
- bcftools=1.18
# Edit this below only once yleaf 3.3.0 has been released to bioconda.
# - yleaf=3.3.0
# Otherwise, install from my local branch with pip:
- pip
- pip:
- -e /home/a/Yleaf
90 changes: 90 additions & 0 deletions modules/nf-core/yleaf/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
nextflow_process {

name "Test Process YLEAF"
script "../main.nf"
process "YLEAF"

tag "modules"
tag "modules_nfcore"
tag "yleaf"

def VCF_FILE = "/20tb/2025-01-project-drive/2024-12-19-yhaplo/ancient0003.chrY.vcf.gz"
def REF_GENOME_FULL = "/references/reference_genomes/hg38.analysisSet.fa"
def REF_GENOME_Y = "/references/reference_genomes/hg38.chrY.analysisSet.fa"

test("real_data - vcf - with all parameters") {
when {
process {
"""
input[0] = [
[ id:'ancient0003' ],
file("${VCF_FILE}", checkIfExists: true)
]
input[1] = file("${REF_GENOME_FULL}", checkIfExists: true)
input[2] = file("${REF_GENOME_Y}", checkIfExists: true)
input[3] = "hg38"
input[4] = 10
input[5] = 20
input[6] = 90
input[7] = 0.95
input[8] = true
input[9] = false
input[10] = true
input[11] = true
input[12] = 0.01
"""
}
"""
process.ext.args = "--force --reanalyze"
"""
}

then {
assertAll(
{ assert process.success },
{ assert process.out.versions != null },
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
{ assert process.out.versions != null },
{ assert process.out.versions },

Also all of these can go into a snapshot() function, so they are more explicitly recorded - see other modules for examples

{ assert process.out.haplogroup != null },
{ assert process.out.tree != null },
{ assert process.out.log != null }
Comment on lines +45 to +48
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this checking that teh file exists because they are variable? If so we have other patterns for this: https://nf-co.re/docs/contributing/nf-test/assertions

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In fact you don't need the != bit at all

)
}
}

test("real_data - vcf - minimal parameters") {
when {
process {
"""
input[0] = [
[ id:'ancient0003' ],
file("${VCF_FILE}", checkIfExists: true)
]
input[1] = file("${REF_GENOME_FULL}", checkIfExists: true)
input[2] = file("${REF_GENOME_Y}", checkIfExists: true)
input[3] = "hg38"
input[4] = null
input[5] = null
input[6] = null
input[7] = null
input[8] = null
input[9] = null
input[10] = null
input[11] = null
input[12] = null
"""
}
"""
process.ext.args = "--force --reanalyze"
"""
}

then {
assertAll(
{ assert process.success },
{ assert process.out.versions != null },
{ assert process.out.haplogroup != null },
{ assert process.out.tree != null },
{ assert process.out.log != null }
)
}
}
}
99 changes: 99 additions & 0 deletions modules/nf-core/yleaf/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{
"vcf_with_references - stub": {
"content": [
{
"0": [
[
{
"id": "test_sample"
},
"hg_prediction.hg:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
[
{
"id": "test_sample"
},
"run.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"2": [
[
{
"id": "test_sample"
},
"hg_tree_image.pdf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"3": [
[
{
"id": "test_sample"
},
"test_sample.filtered.out:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"4": [
[
{
"id": "test_sample"
},
"/home/a/nf-core/modules/.nf-test/tests/b999677892a41ca0bc784311e59f5db8/work/e3/a6c0c3f9c52a0daeffc19569c5a905/test_sample/filtered_vcf_files/test_sample.filtered.vcf.gz"
]
],
"5": [
"versions.yml:md5,664e30ba361578a9b5c7f897a3e6d429"
],
"filtered_out": [
[
{
"id": "test_sample"
},
"test_sample.filtered.out:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"filtered_vcf": [
[
{
"id": "test_sample"
},
"/home/a/nf-core/modules/.nf-test/tests/b999677892a41ca0bc784311e59f5db8/work/e3/a6c0c3f9c52a0daeffc19569c5a905/test_sample/filtered_vcf_files/test_sample.filtered.vcf.gz"
]
],
"haplogroup": [
[
{
"id": "test_sample"
},
"hg_prediction.hg:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"log": [
[
{
"id": "test_sample"
},
"run.log:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"tree": [
[
{
"id": "test_sample"
},
"hg_tree_image.pdf:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,664e30ba361578a9b5c7f897a3e6d429"
]
}
],
"meta": {
"nf-test": "0.9.2",
"nextflow": "24.10.5"
},
"timestamp": "2025-04-02T05:49:24.004827"
}
}