Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding Assembly Stub Output and Generating IRIDA Next-Compliant JSON Output #9

Merged
merged 18 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions bin/irida-next-output.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ def main(argv=None):
epilog="Example: python irida-next-output.py --json-output output.json *.json *.json.gz",
)
parser.add_argument("files", nargs="+")
parser.add_argument(
"--summary-file",
action="store",
dest="summary_file",
type=str,
help="pipeline summary file",
default=None,
required=True,
)
parser.add_argument(
"--json-output",
action="store",
Expand All @@ -41,6 +50,9 @@ def main(argv=None):
sys.stderr.write(f"Error: --json-output [{json_output_file}] exists")
return 1

# Not checking for the existance of the summary file
# because the path may be relative to the outdir, which we don't have here.

input_files = args.files
if isinstance(input_files, str):
input_files = [input_files]
Expand All @@ -55,7 +67,10 @@ def main(argv=None):
},
}

output_metadata = {"files": {"samples": {}}, "metadata": {"samples": {}}}
output_metadata = {
"files": {"global": [{"path": str(args.summary_file)}], "samples": {}},
"metadata": {"samples": {}},
}

for f in input_files:
_open = get_open(f)
Expand All @@ -64,7 +79,7 @@ def main(argv=None):
output_metadata["files"]["samples"] |= sample_metadata["files"]["samples"]
output_metadata["metadata"]["samples"] |= sample_metadata["metadata"]["samples"]

data_json = json.dumps(output_metadata, indent=4)
data_json = json.dumps(output_metadata, sort_keys=True, indent=4)
_open = get_open(json_output_file)
with _open(json_output_file, "wt") as oh:
oh.write(data_json)
Expand Down
77 changes: 77 additions & 0 deletions bin/simplify_irida_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/env python

import json
import argparse
import sys
import gzip
from mimetypes import guess_type
from functools import partial
from pathlib import Path


def flatten_dictionary(dictionary):
result = {}

def flatten(item, name=""):
if type(item) is dict:
for component in item:
flatten(item[component], str(name) + str(component) + ".")

elif type(item) is list:
for i in range(len(item)):
flatten(item[i], str(name) + str(i + 1) + ".") # i + 1 because biologists

else:
result[str(name)[:-1]] = item # [:-1] avoids the "." appended on the previous recursion

flatten(dictionary)
return result


def main():
parser = argparse.ArgumentParser(
description="Simplifies JSON files for use with IRIDA Next",
epilog="Example: python simplify_irida_json.py --json-output output.json input.json",
)
parser.add_argument("input")
parser.add_argument(
"--json-output",
action="store",
dest="json_output",
type=str,
help="JSON output file",
default=None,
required=True,
)

args = parser.parse_args()

json_output_location = Path(args.json_output)
if json_output_location.exists():
sys.stderr.write("Error: --json-output [{json_output_location}] exists!\n")
return 1

json_input_file = args.input

# Handle GZIP and non-GZIP
encoding = guess_type(json_input_file)[1]
open_file = partial(gzip.open, mode="rt") if encoding == "gzip" else open # partial (function pointer)

with open_file(json_input_file) as input_file:
input_json = json.load(input_file)

# Flatten metadata:
for sample in input_json["metadata"]["samples"]:
input_json["metadata"]["samples"][sample] = flatten_dictionary(input_json["metadata"]["samples"][sample])

json_data = json.dumps(input_json, sort_keys=True, indent=4)
with open(json_output_location, "w") as output_file:
output_file.write(json_data)

print("Output written to " + str(json_output_location) + "!")

return 0


if __name__ == "__main__":
sys.exit(main())
28 changes: 28 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

process {

// Publish directory names
assembly_directory_name = "assembly"
summary_directory_name = "summary"

publishDir = [
path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
mode: params.publish_dir_mode,
Expand All @@ -26,6 +30,30 @@ process {
]
}

withName: ASSEMBLY_STUB {
publishDir = [
path: { ["${params.outdir}", "${task.assembly_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: GENERATE_SUMMARY {
publishDir = [
path: { ["${params.outdir}", "${task.summary_directory_name}"].join(File.separator) },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: IRIDA_NEXT_OUTPUT {
publishDir = [
path: { "${params.outdir}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: CUSTOM_DUMPSOFTWAREVERSIONS {
publishDir = [
path: { "${params.outdir}/pipeline_info" },
Expand Down
33 changes: 33 additions & 0 deletions modules/local/assembly_stub/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process ASSEMBLY_STUB {
tag "$meta.id"
label 'process_single'

container 'docker.io/python:3.9.17'

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.assembly.fa.gz"), emit: assembly
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
cat <<-EOF > ${prefix}.assembly.fa
>${meta.id}-stub-assembly
ACGTAACCGGTTAAACCCGGGTTTAAAACCCCGGGGTTTTAAAAACCCCCGGGGGTTTTT
EOF

gzip -n ${prefix}.assembly.fa

cat <<-END_VERSIONS > versions.yml
"${task.process}":
assembly_stub : 0.1.0.dev0
END_VERSIONS
"""
}
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
process SAMPLE_METADATA {
process GENERATE_SAMPLE_JSON {
tag "$meta.id"
label 'process_single'

container 'docker.io/python:3.9.17'

input:
tuple val(meta), path(reads)
tuple val(meta), path(reads), path(assembly)

output:
tuple val(meta), path("*.json.gz"), emit: json
Expand All @@ -17,18 +17,23 @@ process SAMPLE_METADATA {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def assembly_path = ["${task.assembly_directory_name}", "${assembly}"].join(File.separator)
"""
reads_1=`basename ${reads[0]}`
reads_2=`basename ${reads[1]}`
cat <<-EOF > "${meta.id}.json"
{
"files": {
"samples": {}
"samples": {
"${meta.id}": [
{
"path": "${assembly_path}"
}
]
}
},
"metadata": {
"samples": {
"${meta.id}": {
"reads": ["\${reads_1}", "\${reads_2}"]
"reads": ["${reads[0]}", "${reads[1]}"]
}
}
}
Expand All @@ -38,7 +43,7 @@ process SAMPLE_METADATA {

cat <<-END_VERSIONS > versions.yml
"${task.process}":
irida-next-output : 0.1.0.dev0
generate_sample_json : 0.1.0.dev0
END_VERSIONS
"""
}
38 changes: 38 additions & 0 deletions modules/local/generate_summary/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
process GENERATE_SUMMARY {
label 'process_single'
container 'docker.io/python:3.9.17'

input:
val summaries

output:
path("summary.txt.gz"), emit: summary
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def sorted_summaries = summaries.sort{ it[0].id }

// Generate summary text:
def summary_text = "IRIDANEXT-EXAMPLE-NF Pipeline Summary\n\nSUCCESS!\n"

// TODO: Consider the possibility of code injection.
// Should probably be moved to file processing through Python.
for (summary in sorted_summaries) {
summary_text += "\n${summary[0].id}:\n"
summary_text += " reads.1: ${summary[1][0]}\n"
summary_text += " reads.2: ${summary[1][1]}\n"
summary_text += " assembly: ${summary[2]}\n"
}

version_text = "\"${task.process}\":\n generate_summary : 0.1.0.dev0"

"""
echo "${summary_text}" > summary.txt
gzip -n summary.txt
echo "${version_text}" > versions.yml
"""
}
5 changes: 3 additions & 2 deletions modules/local/irida-next-output/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ process IRIDA_NEXT_OUTPUT {
path(samples_data)

output:
path("output.json.gz"), emit: output_json
path("iridanext.output.json.gz"), emit: output_json
path "versions.yml", emit: versions

when:
Expand All @@ -19,7 +19,8 @@ process IRIDA_NEXT_OUTPUT {
"""
irida-next-output.py \\
$args \\
--json-output output.json.gz \\
--summary-file ${task.summary_directory_name}/summary.txt.gz \\
--json-output iridanext.output.json.gz \\
${samples_data}

cat <<-END_VERSIONS > versions.yml
Expand Down
33 changes: 33 additions & 0 deletions modules/local/simplify_irida_json/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
process SIMPLIFY_IRIDA_JSON {
tag "$meta.id"
label 'process_single'

container 'docker.io/python:3.9.17'

input:
tuple val(meta), path(json)

output:
tuple val(meta), path("*.simple.json.gz") , emit: simple_json
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
simplify_irida_json.py \\
$args \\
--json-output ${meta.id}.simple.json \\
${json}

gzip ${meta.id}.simple.json

cat <<-END_VERSIONS > versions.yml
"${task.process}":
simplify_irida_json : 0.1.0.dev0
END_VERSIONS
"""
}
1 change: 0 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ params {
validationSchemaIgnoreParams = 'genomes,igenomes_base'
validationShowHiddenParams = false
validate_params = true

}

// Load base.config by default for all pipelines
Expand Down
3 changes: 2 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -244,5 +244,6 @@
{
"$ref": "#/definitions/generic_options"
}
]
],
"properties": {}
}
Loading