From bbec50649ea9ab1ccf5c9363e169071c2d2df0cd Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 12:21:27 -0800 Subject: [PATCH 1/6] renamed metadata fields --- conf/irida_next.config | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/conf/irida_next.config b/conf/irida_next.config index c2a03d41..c1e618a7 100755 --- a/conf/irida_next.config +++ b/conf/irida_next.config @@ -48,29 +48,29 @@ iridanext { samples { flatten = true rename = [ - "QCStatus" : "QC Status", - "QualityAnalysis.checkm_contamination.qc_status" : "Checkm Status", - "QualityAnalysis.checkm_contamination.value" : "Checkm Value", - "QualityAnalysis.average_coverage.qc_status" : "Average Coverage Status", - "QualityAnalysis.average_coverage.value" : "Average Coverage Value", + "QCStatus" : "qc_status_overall", + "QualityAnalysis.checkm_contamination.qc_status" : "qc_status_contamination", + "QualityAnalysis.checkm_contamination.value" : "checkm_value", + "QualityAnalysis.average_coverage.qc_status" : "qc_status_read_depth_fixed", + "QualityAnalysis.average_coverage.value" : "mean_read_depth_fixed", "QualityAnalysis.n50_value.qc_status" : "n50 Status", "QualityAnalysis.n50_value.value" : "n50 Value", - "QualityAnalysis.raw_average_quality.qc_status" : "Raw Average Quality Status", + "QualityAnalysis.raw_average_quality.qc_status" : "qc_status_read_qual", "QualityAnalysis.raw_average_quality.value" : "Raw Average Quality Value", - "QualityAnalysis.length.qc_status" : "Length Status", - "QualityAnalysis.length.value" : "Length Value", - "QualityAnalysis.nr_contigs.qc_status" : "nr contigs Status", - "QualityAnalysis.nr_contigs.value" : "nr contigs Value", - "QCSummary" : "QC Summary", + "QualityAnalysis.length.qc_status" : "qc_status_assembly_genome_size", + "QualityAnalysis.length.value" : "assembly_length", + "QualityAnalysis.nr_contigs.qc_status" : "qc_status_assembly_num_contigs", + "QualityAnalysis.nr_contigs.value" : "assembly_num_contigs", + "QCSummary" : "qc_summary", "meta.downsampled" : "Downsampled", "SpeciesTopHit" : "predicted_identification_name", "IdentificationMethod" : "predicted_identification_method", "LocidexDatabaseInformation.db_name" : "locidex_db_name", "LocidexDatabaseInformation.db_date" : "locidex_db_date", "LocidexDatabaseInformation.db_version" : "locidex_db_version", - "LocidexSummary.TotalLoci" : "total_loci", - "LocidexSummary.AllelesPresent" : "count_loci_found", - "LocidexSummary.MissingAllelesCount" : "count_loci_missing", + "LocidexSummary.TotalLoci" : "locidex_db_total_loci_count", + "LocidexSummary.AllelesPresent" : "locidex_count_loci_found", + "LocidexSummary.MissingAllelesCount" : "locidex_count_loci_missing", "ECTyperSubtyping.0.Database" : "ECTyper Database", "ECTyperSubtyping.0.Evidence" : "ECTyper Evidence", "ECTyperSubtyping.0.GeneCoverages(%)" : "ECTyper GeneCoverages (%)", @@ -83,8 +83,9 @@ iridanext { "ECTyperSubtyping.0.Species" : "ECTyper Subtyping", "ECTyperSubtyping.0.Warnings" : "ECTyper Warnings", "LISSEROSubtyping.0.SEROTYPE" : "LISSERO Serotype", - "QUAST.0.GC (%)" : "GC (%)", + "QUAST.0.GC (%)" : "gc", "RawReadSummary.R1.mean_sequence_length" : "Mean Sequence Length Forward", + "RawReadSummary.R2.mean_sequence_length" : "Mean Sequence Length Reverse", "SISTRSubtyping.0.cgmlst_ST" : "SISTR cgMLST ST", "SISTRSubtyping.0.cgmlst_found_loci" : "SISTR cgMLST Found Loci", "SISTRSubtyping.0.cgmlst_genome_match" : "SISTR cgMLST Genome Match", @@ -99,7 +100,7 @@ iridanext { "SISTRSubtyping.0.serovar" : "SISTR Serovar", "SISTRSubtyping.0.serovar_antigen" : "SISTR Serovar Antigen", "SISTRSubtyping.0.serovar_cgmlst" : "SISTR Serovar cgMLST", - "SeqtkBaseCount" : "BaseCount", + "SeqtkBaseCount" : "total_sequenced_bases", "SevenGeneMLSTReport.0.alleles.abcZ" : "abcZ", "SevenGeneMLSTReport.0.alleles.adk" : "adk", "SevenGeneMLSTReport.0.alleles.arcA" : "arcA", From a793fc6429d8807e98ce7c0e97b681026df0a5cd Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 12:35:02 -0800 Subject: [PATCH 2/6] appease the linter --- .github/workflows/linting_comment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 42e519bf..95b6b6af 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # v6 + uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 with: workflow: linting.yml workflow_conclusion: completed From be3986e45938d969aa44683a910fd515d02bcb99 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 13:23:17 -0800 Subject: [PATCH 3/6] update tests --- tests/main.nf.test | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index c5ac5a39..d5561a5f 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -74,29 +74,29 @@ nextflow_pipeline { assert iridanext_samples.CSE.findAll { it.path.contains("Assembly/FinalReports/FlattenedReports/CSE.flat_sample.json.gz") }.size() == 0 // metadata - assert iridanext_metadata.CSE.containsKey("QC Status") - assert iridanext_metadata.CSE."QC Status" == "FAILED" - assert !iridanext_metadata.CSE.containsKey("Checkm Status") - assert !iridanext_metadata.CSE.containsKey("Checkm Value") - assert !iridanext_metadata.CSE.containsKey("Average Coverage Status") - assert !iridanext_metadata.CSE.containsKey("Average Coverage Value") + assert iridanext_metadata.CSE.containsKey("qc_status_overall") + assert iridanext_metadata.CSE."qc_status_overall" == "FAILED" + assert !iridanext_metadata.CSE.containsKey("qc_status_contamination") + assert !iridanext_metadata.CSE.containsKey("checkm_value") + assert !iridanext_metadata.CSE.containsKey("qc_status_read_depth_fixed") + assert !iridanext_metadata.CSE.containsKey("mean_read_depth_fixed") assert !iridanext_metadata.CSE.containsKey("n50 Status") assert !iridanext_metadata.CSE.containsKey("n50 Value") assert !iridanext_metadata.CSE.containsKey("Raw Average Quality Status") assert !iridanext_metadata.CSE.containsKey("Raw Average Quality Value") - assert !iridanext_metadata.CSE.containsKey("Length Status") - assert !iridanext_metadata.CSE.containsKey("Length Value") - assert !iridanext_metadata.CSE.containsKey("nr contigs Status") - assert !iridanext_metadata.CSE.containsKey("nr contigs Value") - assert iridanext_metadata.CSE."QC Summary" == "FAILED Species ID: No organism specific QC data available.; Passed Tests: 0/6; Organism QC Criteria: No organism specific QC data available." + assert !iridanext_metadata.CSE.containsKey("qc_status_assembly_genome_size") + assert !iridanext_metadata.CSE.containsKey("assembly_length") + assert !iridanext_metadata.CSE.containsKey("qc_status_assembly_num_contigs") + assert !iridanext_metadata.CSE.containsKey("assembly_num_contigs") + assert iridanext_metadata.CSE."qc_summary" == "FAILED Species ID: No organism specific QC data available.; Passed Tests: 0/6; Organism QC Criteria: No organism specific QC data available." assert iridanext_metadata.CSE."Downsampled" == false assert !iridanext_metadata.CSE.containsKey("predicted_identification_name") assert !iridanext_metadata.CSE.containsKey("predicted_identification_method") - assert !iridanext_metadata.CSE.containsKey("GC (%)") + assert !iridanext_metadata.CSE.containsKey("gc") //assert iridanext_metadata.CSE."Mean Sequence Length Forward" == 150 // Base count after decontamination - assert iridanext_metadata.CSE."BaseCount" == 37200 + assert iridanext_metadata.CSE."total_sequenced_bases" == 37200 } } @@ -218,7 +218,7 @@ nextflow_pipeline { assert iridanext_samples.INX.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1 // output metadata - assert iridanext_metadata.INX."QC Status" == "PASSED" + assert iridanext_metadata.INX."qc_status_overall" == "PASSED" } From 1c942707643f550c6182e52b3da92954945e6318 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 13:47:56 -0800 Subject: [PATCH 4/6] update tests --- tests/main.nf.test | 48 +++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index d5561a5f..789dd5e2 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -346,27 +346,27 @@ nextflow_pipeline { assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1 // output metadata - assert iridanext_metadata.short."QC Status" == "PASSED" - assert iridanext_metadata.short."Checkm Status" == "PASSED" - assert iridanext_metadata.short."Checkm Value" == 0.0 + assert iridanext_metadata.short."qc_status_overall" == "PASSED" + assert iridanext_metadata.short."qc_status_contamination" == "PASSED" + assert iridanext_metadata.short."checkm_value" == 0.0 assert iridanext_metadata.short."Average Coverage Status" == "PASSED" assert iridanext_metadata.short."Average Coverage Value" == 237.5 assert iridanext_metadata.short."n50 Status" == "WARNING" assert iridanext_metadata.short."n50 Value" == 4949 - assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED" + assert iridanext_metadata.short."qc_status_read_qual" == "PASSED" assert iridanext_metadata.short."Raw Average Quality Value" == 40.0 - assert iridanext_metadata.short."Length Status" == "WARNING" - assert iridanext_metadata.short."Length Value" == 4949 - assert iridanext_metadata.short."nr contigs Status" == "WARNING" - assert iridanext_metadata.short."nr contigs Value" == 1 - assert iridanext_metadata.short."QC Summary" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available." + assert iridanext_metadata.short."qc_status_assembly_genome_size" == "WARNING" + assert iridanext_metadata.short."assembly_length" == 4949 + assert iridanext_metadata.short."qc_status_assembly_num_contigs" == "WARNING" + assert iridanext_metadata.short."assembly_num_contigs" == 1 + assert iridanext_metadata.short."qc_status_overall" == "PASSED Species ID: No Species Identified; Passed Tests: 6/6; Organism QC Criteria: No organism specific QC data available." assert iridanext_metadata.short."Downsampled" == false assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified" assert iridanext_metadata.short."predicted_identification_method" == "Mash" - assert iridanext_metadata.short."GC (%)" == "52.96" + assert iridanext_metadata.short."gc" == "52.96" assert iridanext_metadata.short."Mean Sequence Length Forward" == 250 - assert iridanext_metadata.short."BaseCount" == 237500 + assert iridanext_metadata.short."total_sequenced_bases" == 237500 assert iridanext_metadata.short."StarAMR Genotype" == "None" assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible" @@ -495,27 +495,27 @@ nextflow_pipeline { assert iridanext_samples.short.findAll { it.path == "FinalReports/FlattenedReports/short.flat_sample.json.gz" }.size() == 1 // output metadata - assert iridanext_metadata.short."QC Status" == "FAILED" - assert iridanext_metadata.short."Checkm Status" == "PASSED" - assert iridanext_metadata.short."Checkm Value" == 0.0 - assert iridanext_metadata.short."Average Coverage Status" == "FAILED" - assert iridanext_metadata.short."Average Coverage Value" == 0.24 + assert iridanext_metadata.short."qc_status_overall" == "FAILED" + assert iridanext_metadata.short."qc_status_contamination" == "PASSED" + assert iridanext_metadata.short."checkm_value" == 0.0 + assert iridanext_metadata.short."qc_status_read_depth_fixed" == "FAILED" + assert iridanext_metadata.short."mean_read_depth_fixed" == 0.24 assert iridanext_metadata.short."n50 Status" == "WARNING" assert iridanext_metadata.short."n50 Value" == 4949 - assert iridanext_metadata.short."Raw Average Quality Status" == "PASSED" + assert iridanext_metadata.short."qc_status_read_qual" == "PASSED" assert iridanext_metadata.short."Raw Average Quality Value" == 40.0 - assert iridanext_metadata.short."Length Status" == "WARNING" - assert iridanext_metadata.short."Length Value" == 4949 - assert iridanext_metadata.short."nr contigs Status" == "WARNING" - assert iridanext_metadata.short."nr contigs Value" == 1 + assert iridanext_metadata.short."qc_status_assembly_genome_size" == "WARNING" + assert iridanext_metadata.short."assembly_length" == 4949 + assert iridanext_metadata.short."qc_status_assembly_num_contigs" == "WARNING" + assert iridanext_metadata.short."assembly_num_contigs" == 1 assert iridanext_metadata.short."QC Summary" == "FAILED Species ID: No Species Identified; Passed Tests: 5/6; Organism QC Criteria: No organism specific QC data available." assert iridanext_metadata.short."Downsampled" == false assert iridanext_metadata.short."predicted_identification_name" == "No Species Identified" assert iridanext_metadata.short."predicted_identification_method" == "Mash" - assert iridanext_metadata.short."GC (%)" == "52.96" + assert iridanext_metadata.short."gc" == "52.96" assert iridanext_metadata.short."Mean Sequence Length Forward" == 250 - assert iridanext_metadata.short."BaseCount" == 237500 + assert iridanext_metadata.short."total_sequenced_bases" == 237500 assert iridanext_metadata.short."StarAMR Genotype" == "None" assert iridanext_metadata.short."StarAMR Predicted Phenotype" == "Susceptible" @@ -604,7 +604,7 @@ nextflow_pipeline { assert path("$launchDir/results").exists() // parse output json file def iridanext_json = path("$launchDir/results/iridanext.output.json").json - assert iridanext_json.metadata.samples."meta-small"."QC Status" == "FAILED" + assert iridanext_json.metadata.samples."meta-small"."qc_status_overall" == "FAILED" assert iridanext_json.metadata.samples."meta-small"."QC Summary" == "[FAILED] Sample was determined to be metagenomic and this was not specified as a metagenomic run indicating contamination." } From 2d8637617c553af5e241aea89e11744e9b59f13f Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 16:18:52 -0800 Subject: [PATCH 5/6] update tests --- tests/main.nf.test | 2 +- tests/pipelines/main.from_assemblies.nf.test | 32 ++++++++++---------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/main.nf.test b/tests/main.nf.test index 789dd5e2..9fc4c749 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -563,7 +563,7 @@ nextflow_pipeline { assert path("$launchDir/results").exists() // parse output json file def iridanext_json = path("$launchDir/results/iridanext.output.json").json - assert iridanext_json.metadata.samples.short."QC Status" == "NA" + assert iridanext_json.metadata.samples.short."qc_status_overall" == "NA" assert iridanext_json.metadata.samples.short."QC Summary" == "No quality control criteria is applied for metagenomic samples." } diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index ae8a41c1..4de853b2 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -108,10 +108,10 @@ nextflow_pipeline { // output metadata def ecoli_metadata = iridanext_metadata.ecoli_GCA_000947975 - assert ecoli_metadata."QC Status" == "FAILED" - assert !ecoli_metadata.containsKey("Checkm Status") + assert ecoli_metadata."qc_status_overall" == "FAILED" + assert !ecoli_metadata.containsKey("qc_status_contamination") assert !ecoli_metadata.containsKey("Checkm Value") - assert !ecoli_metadata.containsKey("Average Coverage Status") + assert !ecoli_metadata.containsKey("qc_status_read_depth_fixed") assert !ecoli_metadata.containsKey("Average Coverage Value") assert ecoli_metadata."n50 Status" == "PASSED" assert ecoli_metadata."n50 Value" == 122025 @@ -293,9 +293,9 @@ nextflow_pipeline { def ecoli_metadata = iridanext_metadata.ecoli_GCA_000947975 assert ecoli_metadata."QC Status" == "FAILED" - assert !ecoli_metadata.containsKey("Checkm Status") + assert !ecoli_metadata.containsKey("qc_status_contamination") assert !ecoli_metadata.containsKey("Checkm Value") - assert !ecoli_metadata.containsKey("Average Coverage Status") + assert !ecoli_metadata.containsKey("qc_status_read_depth_fixed") assert !ecoli_metadata.containsKey("Average Coverage Value") assert ecoli_metadata."n50 Status" == "PASSED" assert ecoli_metadata."n50 Value" == 122025 @@ -377,9 +377,9 @@ nextflow_pipeline { def salmonella_metadata = iridanext_metadata.salmonella_GCA_000008105 assert salmonella_metadata."QC Status" == "FAILED" - assert !salmonella_metadata.containsKey("Checkm Status") + assert !salmonella_metadata.containsKey("qc_status_contamination") assert !salmonella_metadata.containsKey("Checkm Value") - assert !salmonella_metadata.containsKey("Average Coverage Status") + assert !salmonella_metadata.containsKey("qc_status_read_depth_fixed") assert !salmonella_metadata.containsKey("Average Coverage Value") assert salmonella_metadata."n50 Status" == "PASSED" assert salmonella_metadata."n50 Value" == 4755700 @@ -543,18 +543,18 @@ nextflow_pipeline { def listeria_metadata = iridanext_metadata.listeria_GCF_000196035 assert listeria_metadata."QC Status" == "FAILED" - assert !listeria_metadata.containsKey("Checkm Status") - assert !listeria_metadata.containsKey("Checkm Value") - assert !listeria_metadata.containsKey("Average Coverage Status") - assert !listeria_metadata.containsKey("Average Coverage Value") + assert !listeria_metadata.containsKey("qc_status_contamination") + assert !listeria_metadata.containsKey("checkm_value") + assert !listeria_metadata.containsKey("qc_status_read_depth_fixed") + assert !listeria_metadata.containsKey("mean_read_depth_fixed") assert listeria_metadata."n50 Status" == "PASSED" assert listeria_metadata."n50 Value" == 2944528 - assert !listeria_metadata.containsKey("Raw Average Quality Status") + assert !listeria_metadata.containsKey("qc_status_read_qual") assert !listeria_metadata.containsKey("Raw Average Quality Value") - assert listeria_metadata."Length Status" == "PASSED" - assert listeria_metadata."Length Value" == 2944528 - assert listeria_metadata."nr contigs Status" == "PASSED" - assert listeria_metadata."nr contigs Value" == 1 + assert listeria_metadata."qc_status_assembly_genome_size" == "PASSED" + assert listeria_metadata."assembly_length" == 2944528 + assert listeria_metadata."qc_status_assembly_num_contigs" == "PASSED" + assert listeria_metadata."assembly_num_contigs" == 1 assert listeria_metadata."QC Summary" == "FAILED Species ID: Listeria monocytogenes; Passed Tests: 3/4; Organism QC Criteria: Listeria monocytogenes" assert listeria_metadata."Downsampled" == false From c0dd307595c6ecaf3b96ea229d9b765d45811994 Mon Sep 17 00:00:00 2001 From: Dan Fornika Date: Mon, 3 Feb 2025 16:20:15 -0800 Subject: [PATCH 6/6] update tests --- tests/pipelines/main.from_assemblies.nf.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/pipelines/main.from_assemblies.nf.test b/tests/pipelines/main.from_assemblies.nf.test index 4de853b2..f02c7b12 100644 --- a/tests/pipelines/main.from_assemblies.nf.test +++ b/tests/pipelines/main.from_assemblies.nf.test @@ -710,7 +710,7 @@ nextflow_pipeline { assert vibrio_metadata.locidex_db_name == "Vibrio cholerae" assert vibrio_metadata.locidex_db_date == "2024-07-30" assert vibrio_metadata.locidex_db_version == "1.0.0" - assert vibrio_metadata.total_loci == 7 + assert vibrio_metadata.locidex_db_loci_total_count == 7 assert vibrio_metadata.count_loci_found == 6 assert vibrio_metadata.count_loci_missing == 1 @@ -782,7 +782,7 @@ nextflow_pipeline { assert vibrio_metadata.locidex_db_name == "Vibrio cholerae_1" assert vibrio_metadata.locidex_db_date == "2024-08-01" assert vibrio_metadata.locidex_db_version == "1.0.0" - assert vibrio_metadata.total_loci == 7 + assert vibrio_metadata.locidex_db_loci_total_count == 7 assert vibrio_metadata.count_loci_found == 6 assert vibrio_metadata.count_loci_missing == 1 @@ -850,7 +850,7 @@ nextflow_pipeline { assert vibrio_metadata.locidex_db_name == "Vibrio cholerae" assert vibrio_metadata.locidex_db_date == "2024-07-30" assert vibrio_metadata.locidex_db_version == "1.0.0" - assert vibrio_metadata.total_loci == 7 + assert vibrio_metadata.locidex_db_loci_total_count == 7 assert vibrio_metadata.count_loci_found == 6 assert vibrio_metadata.count_loci_missing == 1