diff --git a/tests/test.nf.test b/tests/test.nf.test
index 4a29d41b..d820164e 100644
--- a/tests/test.nf.test
+++ b/tests/test.nf.test
@@ -17,26 +17,58 @@ nextflow_pipeline {
then {
// Each top level results directory should be tested with individual snapshots/variables
- // stable_name_
is for files with variable md5sums (i.e. content) so only names will be compared
- // stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared
- // For both cases, use the third argument to specify globs to exclude certain files
+ // - stable_name_ is for files with variable md5sums (i.e. content) so only names will be compared
+ // - stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared
+ // - bams_ is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
// If a directory is fully stable, you can drop `stable_name_*`
+ // If a directory contains no BAMs, you can drop `bams_*`
+
// Generate with: nf-test test --tag test --profile docker,test --update-snapshot
// Test with: nf-test test --tag test --profile docker,test
// NOTE: BAMs are always only stable in name, because:
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
// point b) also causes BAIs to be unstable.
+ // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)
+
+ // Define exclusion patterns for files with unstable contents
+ // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
+ // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
+ // TODO Maybe these should be moved to files in the test-datasets? Just worried that is less transparent
+ def unstable_patterns_auth = [
+ '**/qualimapReport.html',
+ '**/mapped_reads_gc-content_distribution.txt',
+ '**/genome_gc_content_per_window.png',
+ '**/*.{svg,pdf}',
+ '**/DamageProfiler.log',
+ ]
// Check that no files are missing/added
- // Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list, include patterns
- def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] )
+ // Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list , include patterns
+ def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] )
// Authentication
- def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png'], null , ['*', '**/*'] )
- def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['*.{yaml,tsv}'] , null , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png'])
+ def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , unstable_patterns_auth , null , ['**/*'] )
+ def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , null , null , unstable_patterns_auth)
+
+ // Deduplication
+ def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.flagstat'] )
+ def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.{bam,bai}'] )
+ // def bams_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.bam'] )
+
+ // Final_bams
+ def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] )
+ def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] )
+ // def bams_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.bam'] )
+ // Mapping (incl. bam_input flasgstat)
+ def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] )
+ def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.{bam,bai}'] )
+ // def bams_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.bam'] )
+ // MQC
+ // preprocessing
+ // read filtering
// // Bam filtering
// def stable_content_bamfiltering = getAllFilesFromDir("$outputDir/bam_filtering" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] )
@@ -49,13 +81,6 @@ nextflow_pipeline {
// def stable_content_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{svg,pdf,log}'] , null , ['**/*.{txt,json}'] )
// def stable_name_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{txt,json}'] , null , ['**/*.{svg,pdf,log}'] )
- // // Deduplication
- // def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] )
- // def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.flagstat'] , null , ['*.{bam,bai}'] )
-
- // // Final_bams
- // def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] )
- // def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] )
// // Mapping
// def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] )
@@ -80,14 +105,16 @@ nextflow_pipeline {
{ assert snapshot( stable_name_all*.name ).match("all_files") },
// Checking changes to contents of each section
- { assert snapshot( stable_content_auth ).match("authentication") },
+ // NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
+ // Each section should first check stable_content, stable_name second (if applicable), and BAM header MD5 third (if applicable).
+ { assert snapshot( stable_content_auth , stable_name_auth*.name ).match("authentication") },
+ { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
+ { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
+ // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
+ { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
// { assert snapshot( stable_content_bamfiltering , stable_name_bamfiltering*.name ).match("bam_filtering") },
// { assert snapshot( stable_content_baminputstats ).match("bam_input_stats") },
// { assert snapshot( stable_content_damageestimation, stable_name_damageestimation*.name ).match("damage_estimation") },
- // { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
- // { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
- // // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
- // { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
// { assert snapshot( stable_content_mapstats , stable_name_mapstats*.name ).match("mapstats") },
// { assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
// { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
@@ -99,3 +126,7 @@ nextflow_pipeline {
}
}
}
+
+// , bams_deduplication.collect { bam(it.toString()).getReadsMD5() }
+// , bams_final_bams.collect { bam(it.toString()).getReadsMD5() }
+// , bams_mapping.collect { bam(it.toString()).getReadsMD5() }