Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

renamed metadata fields #149

Draft
wants to merge 6 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/linting_comment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Download lint results
uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6
uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8
with:
workflow: linting.yml
workflow_conclusion: completed
Expand Down
33 changes: 17 additions & 16 deletions conf/irida_next.config
Original file line number Diff line number Diff line change
Expand Up @@ -48,29 +48,29 @@ iridanext {
samples {
flatten = true
rename = [
"QCStatus" : "QC Status",
"QualityAnalysis.checkm_contamination.qc_status" : "Checkm Status",
"QualityAnalysis.checkm_contamination.value" : "Checkm Value",
"QualityAnalysis.average_coverage.qc_status" : "Average Coverage Status",
"QualityAnalysis.average_coverage.value" : "Average Coverage Value",
"QCStatus" : "qc_status_overall",
"QualityAnalysis.checkm_contamination.qc_status" : "qc_status_contamination",
"QualityAnalysis.checkm_contamination.value" : "checkm_value",
"QualityAnalysis.average_coverage.qc_status" : "qc_status_read_depth_fixed",
"QualityAnalysis.average_coverage.value" : "mean_read_depth_fixed",
"QualityAnalysis.n50_value.qc_status" : "n50 Status",
"QualityAnalysis.n50_value.value" : "n50 Value",
"QualityAnalysis.raw_average_quality.qc_status" : "Raw Average Quality Status",
"QualityAnalysis.raw_average_quality.qc_status" : "qc_status_read_qual",
"QualityAnalysis.raw_average_quality.value" : "Raw Average Quality Value",
"QualityAnalysis.length.qc_status" : "Length Status",
"QualityAnalysis.length.value" : "Length Value",
"QualityAnalysis.nr_contigs.qc_status" : "nr contigs Status",
"QualityAnalysis.nr_contigs.value" : "nr contigs Value",
"QCSummary" : "QC Summary",
"QualityAnalysis.length.qc_status" : "qc_status_assembly_genome_size",
"QualityAnalysis.length.value" : "assembly_length",
"QualityAnalysis.nr_contigs.qc_status" : "qc_status_assembly_num_contigs",
"QualityAnalysis.nr_contigs.value" : "assembly_num_contigs",
"QCSummary" : "qc_summary",
"meta.downsampled" : "Downsampled",
"SpeciesTopHit" : "predicted_identification_name",
"IdentificationMethod" : "predicted_identification_method",
"LocidexDatabaseInformation.db_name" : "locidex_db_name",
"LocidexDatabaseInformation.db_date" : "locidex_db_date",
"LocidexDatabaseInformation.db_version" : "locidex_db_version",
"LocidexSummary.TotalLoci" : "total_loci",
"LocidexSummary.AllelesPresent" : "count_loci_found",
"LocidexSummary.MissingAllelesCount" : "count_loci_missing",
"LocidexSummary.TotalLoci" : "locidex_db_total_loci_count",
"LocidexSummary.AllelesPresent" : "locidex_count_loci_found",
"LocidexSummary.MissingAllelesCount" : "locidex_count_loci_missing",
"ECTyperSubtyping.0.Database" : "ECTyper Database",
"ECTyperSubtyping.0.Evidence" : "ECTyper Evidence",
"ECTyperSubtyping.0.GeneCoverages(%)" : "ECTyper GeneCoverages (%)",
Expand All @@ -83,8 +83,9 @@ iridanext {
"ECTyperSubtyping.0.Species" : "ECTyper Subtyping",
"ECTyperSubtyping.0.Warnings" : "ECTyper Warnings",
"LISSEROSubtyping.0.SEROTYPE" : "LISSERO Serotype",
"QUAST.0.GC (%)" : "GC (%)",
"QUAST.0.GC (%)" : "gc",
"RawReadSummary.R1.mean_sequence_length" : "Mean Sequence Length Forward",
"RawReadSummary.R2.mean_sequence_length" : "Mean Sequence Length Reverse",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you align this with the other text (I assume there's a tab here where there should be spaces).

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just following up on this PR. I reached out to @dfornika to mention that I can finish off this PR for him. I just had to wrap something else up.

I did have some questions related to the two new fields, the average read length one will not be too hard to add. But I want to verify some of the requirements for the new fields.

"SISTRSubtyping.0.cgmlst_ST" : "SISTR cgMLST ST",
"SISTRSubtyping.0.cgmlst_found_loci" : "SISTR cgMLST Found Loci",
"SISTRSubtyping.0.cgmlst_genome_match" : "SISTR cgMLST Genome Match",
Expand All @@ -99,7 +100,7 @@ iridanext {
"SISTRSubtyping.0.serovar" : "SISTR Serovar",
"SISTRSubtyping.0.serovar_antigen" : "SISTR Serovar Antigen",
"SISTRSubtyping.0.serovar_cgmlst" : "SISTR Serovar cgMLST",
"SeqtkBaseCount" : "BaseCount",
"SeqtkBaseCount" : "total_sequenced_bases",
"SevenGeneMLSTReport.0.alleles.abcZ" : "abcZ",
"SevenGeneMLSTReport.0.alleles.adk" : "adk",
"SevenGeneMLSTReport.0.alleles.arcA" : "arcA",
Expand Down
78 changes: 39 additions & 39 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,29 @@ nextflow_pipeline {
assert iridanext_samples.CSE.findAll { it.path.contains("Assembly/FinalReports/FlattenedReports/CSE.flat_sample.json.gz") }.size() == 0

// metadata
assert iridanext_metadata.CSE.containsKey("QC Status")
assert iridanext_metadata.CSE."QC Status" == "FAILED"
assert !iridanext_metadata.CSE.containsKey("Checkm Status")
assert !iridanext_metadata.CSE.containsKey("Checkm Value")
assert !iridanext_metadata.CSE.containsKey("Average Coverage Status")
assert !iridanext_metadata.CSE.containsKey("Average Coverage Value")
assert iridanext_metadata.CSE.containsKey("qc_status_overall")
assert iridanext_metadata.CSE."qc_status_overall" == "FAILED"
assert !iridanext_metadata.CSE.containsKey("qc_status_contamination")
assert !iridanext_metadata.CSE.containsKey("checkm_value")
assert !iridanext_metadata.CSE.containsKey("qc_status_read_depth_fixed")
assert !iridanext_metadata.CSE.containsKey("mean_read_depth_fixed")
assert !iridanext_metadata.CSE.containsKey("n50 Status")
assert !iridanext_metadata.CSE.containsKey("n50 Value")
assert !iridanext_metadata.CSE.containsKey("Raw Average Quality Status")
assert !iridanext_metadata.CSE.containsKey("Raw Average Quality Value")
assert !iridanext_metadata.CSE.containsKey("Length Status")
assert !iridanext_metadata.CSE.containsKey("Length Value")
assert !iridanext_metadata.CSE.containsKey("nr contigs Status")
assert !iridanext_metadata.CSE.containsKey("nr contigs Value")
assert iridanext_metadata.CSE."QC Summary" == "FAILED Species ID: No organism specific QC data available.; Passed Tests: 0/6; Organism QC Criteria: No organism specific QC data available."
assert !iridanext_metadata.CSE.containsKey("qc_status_assembly_genome_size")
assert !iridanext_metadata.CSE.containsKey("assembly_length")
assert !iridanext_metadata.CSE.containsKey("qc_status_assembly_num_contigs")
assert !iridanext_metadata.CSE.containsKey("assembly_num_contigs")
assert iridanext_metadata.CSE."qc_summary" == "FAILED Species ID: No organism specific QC data available.; Passed Tests: 0/6; Organism QC Criteria: No organism specific QC data available."

assert iridanext_metadata.CSE."Downsampled" == false
assert !iridanext_metadata.CSE.containsKey("predicted_identification_name")
assert !iridanext_metadata.CSE.containsKey("predicted_identification_method")
assert !iridanext_metadata.CSE.containsKey("GC (%)")
assert !iridanext_metadata.CSE.containsKey("gc")
//assert iridanext_metadata.CSE."Mean Sequence Length Forward" == 150
// Base count after decontamination
assert iridanext_metadata.CSE."BaseCount" == 37200
assert iridanext_metadata.CSE."total_sequenced_bases" == 37200
}
}

Expand Down Expand Up @@ -218,7 +218,7 @@ nextflow_pipeline {
assert iridanext_samples.INX.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1

// output metadata
assert iridanext_metadata.INX."QC Status" == "PASSED"
assert iridanext_metadata.INX."qc_status_overall" == "PASSED"

}

Expand Down Expand Up @@ -346,27 +346,27 @@ nextflow_pipeline {
assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1

// output metadata
assert iridanext_metadata.short."QC Status" == "PASSED"
assert iridanext_metadata.short."Checkm Status" == "PASSED"
assert iridanext_metadata.short."Checkm Value" == 0.0
assert iridanext_metadata.short."qc_status_overall" == "PASSED"
assert iridanext_metadata.short."qc_status_contamination" == "PASSED"
assert iridanext_metadata.short."checkm_value" == 0.0
assert iridanext_metadata.short."Average Coverage Status" == "PASSED"
assert iridanext_metadata.short."Average Coverage Value" == 237.5
assert iridanext_metadata.short."n50 Status" == "WARNING"
assert iridanext_metadata.short."n50 Value" == 4949
assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"
assert iridanext_metadata.short."qc_status_read_qual" == "PASSED"
assert iridanext_metadata.short."Raw Average Quality Value" == 40.0
assert iridanext_metadata.short."Length Status" == "WARNING"
assert iridanext_metadata.short."Length Value" == 4949
assert iridanext_metadata.short."nr contigs Status" == "WARNING"
assert iridanext_metadata.short."nr contigs Value" == 1
assert iridanext_metadata.short."QC Summary" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available."
assert iridanext_metadata.short."qc_status_assembly_genome_size" == "WARNING"
assert iridanext_metadata.short."assembly_length" == 4949
assert iridanext_metadata.short."qc_status_assembly_num_contigs" == "WARNING"
assert iridanext_metadata.short."assembly_num_contigs" == 1
assert iridanext_metadata.short."qc_status_overall" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available."

assert iridanext_metadata.short."Downsampled" == false
assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified"
assert iridanext_metadata.short."predicted_identification_method" == "Mash"
assert iridanext_metadata.short."GC (%)" == "52.96"
assert iridanext_metadata.short."gc" == "52.96"
assert iridanext_metadata.short."Mean Sequence Length Forward" == 250
assert iridanext_metadata.short."BaseCount" == 237500
assert iridanext_metadata.short."total_sequenced_bases" == 237500

assert iridanext_metadata.short."StarAMR Genotype" == "None"
assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible"
Expand Down Expand Up @@ -495,27 +495,27 @@ nextflow_pipeline {
assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1

// output metadata
assert iridanext_metadata.short."QC Status" == "FAILED"
assert iridanext_metadata.short."Checkm Status" == "PASSED"
assert iridanext_metadata.short."Checkm Value" == 0.0
assert iridanext_metadata.short."Average Coverage Status" == "FAILED"
assert iridanext_metadata.short."Average Coverage Value" == 0.24
assert iridanext_metadata.short."qc_status_overall" == "FAILED"
assert iridanext_metadata.short."qc_status_contamination" == "PASSED"
assert iridanext_metadata.short."checkm_value" == 0.0
assert iridanext_metadata.short."qc_status_read_depth_fixed" == "FAILED"
assert iridanext_metadata.short."mean_read_depth_fixed" == 0.24
assert iridanext_metadata.short."n50 Status" == "WARNING"
assert iridanext_metadata.short."n50 Value" == 4949
assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED"
assert iridanext_metadata.short."qc_status_read_qual" == "PASSED"
assert iridanext_metadata.short."Raw Average Quality Value" == 40.0
assert iridanext_metadata.short."Length Status" == "WARNING"
assert iridanext_metadata.short."Length Value" == 4949
assert iridanext_metadata.short."nr contigs Status" == "WARNING"
assert iridanext_metadata.short."nr contigs Value" == 1
assert iridanext_metadata.short."qc_status_assembly_genome_size" == "WARNING"
assert iridanext_metadata.short."assembly_length" == 4949
assert iridanext_metadata.short."qc_status_assembly_num_contigs" == "WARNING"
assert iridanext_metadata.short."assembly_num_contigs" == 1
assert iridanext_metadata.short."QC Summary" == "FAILED Species ID: No Species Identified; Passed Tests: 5/6; Organism QC Criteria: No organism specific QC data available."

assert iridanext_metadata.short."Downsampled" == false
assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified"
assert iridanext_metadata.short."predicted_identification_method" == "Mash"
assert iridanext_metadata.short."GC (%)" == "52.96"
assert iridanext_metadata.short."gc" == "52.96"
assert iridanext_metadata.short."Mean Sequence Length Forward" == 250
assert iridanext_metadata.short."BaseCount" == 237500
assert iridanext_metadata.short."total_sequenced_bases" == 237500

assert iridanext_metadata.short."StarAMR Genotype" == "None"
assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible"
Expand Down Expand Up @@ -563,7 +563,7 @@ nextflow_pipeline {
assert path("$launchDir/results").exists()
// parse output json file
def iridanext_json = path("$launchDir/results/iridanext.output.json").json
assert iridanext_json.metadata.samples.short."QC Status" == "NA"
assert iridanext_json.metadata.samples.short."qc_status_overall" == "NA"
assert iridanext_json.metadata.samples.short."QC Summary" == "No quality control criteria is applied for metagenomic samples."

}
Expand Down Expand Up @@ -604,7 +604,7 @@ nextflow_pipeline {
assert path("$launchDir/results").exists()
// parse output json file
def iridanext_json = path("$launchDir/results/iridanext.output.json").json
assert iridanext_json.metadata.samples."meta-small"."QC Status" == "FAILED"
assert iridanext_json.metadata.samples."meta-small"."qc_status_overall" == "FAILED"
assert iridanext_json.metadata.samples."meta-small"."QC Summary" == "[FAILED] Sample was determined to be metagenomic and this was not specified as a metagenomic run indicating contamination."

}
Expand Down
Loading
Loading