-
Notifications
You must be signed in to change notification settings - Fork 840
Yleaf module added @3.3.0 #8210
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- bioconda | ||
- conda-forge | ||
dependencies: | ||
- python=3.9 | ||
- numpy=1.24.3 | ||
- pandas=1.0.0 | ||
- bcftools=1.18 | ||
# Edit this below only once yleaf 3.3.0 has been released to bioconda. | ||
# - yleaf=3.3.0 | ||
# Otherwise, install from my local branch with pip: | ||
- pip | ||
- pip: | ||
- -e /home/a/Yleaf |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
process YLEAF { | ||
tag "$meta.id" | ||
label 'process_medium' | ||
|
||
// TODO AH: Remove this once yleaf 3.3.0 is released to bioconda | ||
conda "nf-core" | ||
|
||
input: | ||
tuple val(meta), path(input_file) | ||
path reference_fasta, stageAs: "reference.fa" | ||
path y_reference_fasta, stageAs: "y_reference.fa" | ||
val reference_genome | ||
val reads_threshold | ||
val quality_thresh | ||
val base_majority | ||
val prediction_quality | ||
val draw_haplogroups | ||
val collapsed_draw_mode | ||
val ancient_dna | ||
val private_mutations | ||
val minor_allele_frequency | ||
|
||
output: | ||
tuple val(meta), path("${meta.id}/hg_prediction.hg"), emit: haplogroup | ||
tuple val(meta), path("${meta.id}/*.log"), emit: log | ||
tuple val(meta), path("${meta.id}/hg_tree_image.pdf"), emit: tree | ||
path "versions.yml", emit: versions | ||
|
||
when: | ||
task.ext.when == null || task.ext.when | ||
|
||
script: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
def ref_genome = reference_genome ?: "hg38" | ||
// These defaults correspond to the defaults used in the Yleaf package | ||
def reads_thresh = reads_threshold ?: 10 | ||
def qual_thresh = quality_thresh ?: 20 | ||
def base_maj = base_majority ?: 90 | ||
def pred_qual = prediction_quality ?: 0.95 | ||
def draw_hg = draw_haplogroups ?: false | ||
def collapsed_mode = collapsed_draw_mode ?: false | ||
def ancient = ancient_dna ?: false | ||
def private_mut = private_mutations ?: false | ||
def maf = minor_allele_frequency ?: 0.01 | ||
|
||
""" | ||
mkdir -p ${prefix} | ||
|
||
echo "Input file: ${input_file}" | ||
echo "Reference genome: ${ref_genome}" | ||
|
||
PYTHONPATH=\$PWD:/home/a/Yleaf Yleaf \\ | ||
--vcffile ${input_file} \\ | ||
--output ${prefix} \\ | ||
--reference_genome ${ref_genome} \\ | ||
--full_genome_reference ${reference_fasta} \\ | ||
--y_chromosome_reference ${y_reference_fasta} \\ | ||
--threads ${task.cpus} \\ | ||
--force \\ | ||
--reanalyze \\ | ||
--reads_treshold ${reads_thresh} \\ | ||
--quality_thresh ${qual_thresh} \\ | ||
--base_majority ${base_maj} \\ | ||
--prediction_quality ${pred_qual} \\ | ||
${draw_hg ? '--draw_haplogroups' : ''} \\ | ||
${collapsed_mode ? '--collapsed_draw_mode' : ''} \\ | ||
${ancient ? '--ancient_DNA' : ''} \\ | ||
${private_mut ? '--private_mutations' : ''} \\ | ||
--minor_allele_frequency ${maf} \\ | ||
${args} | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
yleaf: \$(Yleaf --help | grep "version" | sed 's/.*version //g' | sed 's/).*//g') | ||
END_VERSIONS | ||
""" | ||
|
||
stub: | ||
def args = task.ext.args ?: '' | ||
def prefix = task.ext.prefix ?: "${meta.id}" | ||
|
||
""" | ||
mkdir -p ${prefix} | ||
|
||
touch ${prefix}/run.log | ||
touch ${prefix}/hg_prediction.hg | ||
touch ${prefix}/hg_tree_image.pdf | ||
|
||
cat <<-END_VERSIONS > versions.yml | ||
"${task.process}": | ||
yleaf: "3.3.0" | ||
END_VERSIONS | ||
""" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json | ||
name: "yleaf" | ||
description: Software for human Y-chromosomal haplogroup inference from next generation sequencing data | ||
keywords: | ||
- genomics | ||
- y-chromosome | ||
- haplogroup | ||
- prediction | ||
- next-generation-sequencing | ||
tools: | ||
- "yleaf": | ||
description: "Yleaf is software for human Y-chromosomal haplogroup inference from next generation sequencing data" | ||
homepage: "https://github.com/genid/Yleaf" | ||
documentation: "https://academic.oup.com/mbe/article/35/5/1291/4922696" | ||
tool_dev_url: "https://github.com/genid/Yleaf" | ||
doi: "10.1093/molbev/msy032" | ||
licence: ['GPL-3.0'] | ||
identifier: "biotools:yleaf" | ||
|
||
input: | ||
- - meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- input_file: | ||
type: file | ||
description: Input sequencing file - VCF | ||
pattern: "*.{vcf,vcf.gz}" | ||
ontologies: | ||
- edam: "http://edamontology.org/format_3016" # VCF | ||
- - reference_file: | ||
type: file | ||
description: Reference genome file | ||
pattern: "*.{fa,fasta,fna}" | ||
ontologies: | ||
- edam: "http://edamontology.org/format_1929" # FASTA | ||
|
||
output: | ||
- haplogroup: | ||
- meta: | ||
type: map | ||
description: | | ||
Groovy Map containing sample information | ||
e.g. `[ id:'sample1', single_end:false ]` | ||
- "*.hg.txt": | ||
type: file | ||
description: Predicted Y-chromosome haplogroup output | ||
pattern: "*.hg.txt" | ||
- log: | ||
- "*.log": | ||
type: file | ||
description: Log file generated by Yleaf | ||
pattern: "*.log" | ||
- versions: | ||
- "versions.yml": | ||
type: file | ||
description: File containing software versions | ||
pattern: "versions.yml" | ||
|
||
authors: | ||
- "@trianglegrrl" | ||
maintainers: | ||
- "@trianglegrrl" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
--- | ||
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json | ||
channels: | ||
- bioconda | ||
- conda-forge | ||
dependencies: | ||
- python=3.9 | ||
- numpy=1.24.3 | ||
- pandas=1.0.0 | ||
- bcftools=1.18 | ||
# Edit this below only once yleaf 3.3.0 has been released to bioconda. | ||
# - yleaf=3.3.0 | ||
# Otherwise, install from my local branch with pip: | ||
- pip | ||
- pip: | ||
- -e /home/a/Yleaf |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1,90 @@ | ||||||
nextflow_process { | ||||||
|
||||||
name "Test Process YLEAF" | ||||||
script "../main.nf" | ||||||
process "YLEAF" | ||||||
|
||||||
tag "modules" | ||||||
tag "modules_nfcore" | ||||||
tag "yleaf" | ||||||
|
||||||
def VCF_FILE = "/20tb/2025-01-project-drive/2024-12-19-yhaplo/ancient0003.chrY.vcf.gz" | ||||||
def REF_GENOME_FULL = "/references/reference_genomes/hg38.analysisSet.fa" | ||||||
def REF_GENOME_Y = "/references/reference_genomes/hg38.chrY.analysisSet.fa" | ||||||
|
||||||
test("real_data - vcf - with all parameters") { | ||||||
when { | ||||||
process { | ||||||
""" | ||||||
input[0] = [ | ||||||
[ id:'ancient0003' ], | ||||||
file("${VCF_FILE}", checkIfExists: true) | ||||||
] | ||||||
input[1] = file("${REF_GENOME_FULL}", checkIfExists: true) | ||||||
input[2] = file("${REF_GENOME_Y}", checkIfExists: true) | ||||||
input[3] = "hg38" | ||||||
input[4] = 10 | ||||||
input[5] = 20 | ||||||
input[6] = 90 | ||||||
input[7] = 0.95 | ||||||
input[8] = true | ||||||
input[9] = false | ||||||
input[10] = true | ||||||
input[11] = true | ||||||
input[12] = 0.01 | ||||||
""" | ||||||
} | ||||||
""" | ||||||
process.ext.args = "--force --reanalyze" | ||||||
""" | ||||||
} | ||||||
|
||||||
then { | ||||||
assertAll( | ||||||
{ assert process.success }, | ||||||
{ assert process.out.versions != null }, | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Also all of these can go into a |
||||||
{ assert process.out.haplogroup != null }, | ||||||
{ assert process.out.tree != null }, | ||||||
{ assert process.out.log != null } | ||||||
Comment on lines
+45
to
+48
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this checking that teh file exists because they are variable? If so we have other patterns for this: https://nf-co.re/docs/contributing/nf-test/assertions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In fact you don't need the |
||||||
) | ||||||
} | ||||||
} | ||||||
|
||||||
test("real_data - vcf - minimal parameters") { | ||||||
when { | ||||||
process { | ||||||
""" | ||||||
input[0] = [ | ||||||
[ id:'ancient0003' ], | ||||||
file("${VCF_FILE}", checkIfExists: true) | ||||||
] | ||||||
input[1] = file("${REF_GENOME_FULL}", checkIfExists: true) | ||||||
input[2] = file("${REF_GENOME_Y}", checkIfExists: true) | ||||||
input[3] = "hg38" | ||||||
input[4] = null | ||||||
input[5] = null | ||||||
input[6] = null | ||||||
input[7] = null | ||||||
input[8] = null | ||||||
input[9] = null | ||||||
input[10] = null | ||||||
input[11] = null | ||||||
input[12] = null | ||||||
""" | ||||||
} | ||||||
""" | ||||||
process.ext.args = "--force --reanalyze" | ||||||
""" | ||||||
} | ||||||
|
||||||
then { | ||||||
assertAll( | ||||||
{ assert process.success }, | ||||||
{ assert process.out.versions != null }, | ||||||
{ assert process.out.haplogroup != null }, | ||||||
{ assert process.out.tree != null }, | ||||||
{ assert process.out.log != null } | ||||||
) | ||||||
} | ||||||
} | ||||||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
{ | ||
"vcf_with_references - stub": { | ||
"content": [ | ||
{ | ||
"0": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"hg_prediction.hg:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"1": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"run.log:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"2": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"hg_tree_image.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"3": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"test_sample.filtered.out:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"4": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"/home/a/nf-core/modules/.nf-test/tests/b999677892a41ca0bc784311e59f5db8/work/e3/a6c0c3f9c52a0daeffc19569c5a905/test_sample/filtered_vcf_files/test_sample.filtered.vcf.gz" | ||
] | ||
], | ||
"5": [ | ||
"versions.yml:md5,664e30ba361578a9b5c7f897a3e6d429" | ||
], | ||
"filtered_out": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"test_sample.filtered.out:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"filtered_vcf": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"/home/a/nf-core/modules/.nf-test/tests/b999677892a41ca0bc784311e59f5db8/work/e3/a6c0c3f9c52a0daeffc19569c5a905/test_sample/filtered_vcf_files/test_sample.filtered.vcf.gz" | ||
] | ||
], | ||
"haplogroup": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"hg_prediction.hg:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"log": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"run.log:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"tree": [ | ||
[ | ||
{ | ||
"id": "test_sample" | ||
}, | ||
"hg_tree_image.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" | ||
] | ||
], | ||
"versions": [ | ||
"versions.yml:md5,664e30ba361578a9b5c7f897a3e6d429" | ||
] | ||
} | ||
], | ||
"meta": { | ||
"nf-test": "0.9.2", | ||
"nextflow": "24.10.5" | ||
}, | ||
"timestamp": "2025-04-02T05:49:24.004827" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Small comment: if the tool itself has it's own defaults then you don't need to define all of these. Input
vals
should ONLY be for mandatory inputs where there is no default 1Footnotes
https://nf-co.re/docs/guidelines/components/modules#non-file-mandatory-command-arguments ↩