diff --git a/tests/test.nf.test b/tests/test.nf.test index 4a29d41b..d820164e 100644 --- a/tests/test.nf.test +++ b/tests/test.nf.test @@ -17,26 +17,58 @@ nextflow_pipeline { then { // Each top level results directory should be tested with individual snapshots/variables - // stable_name_ is for files with variable md5sums (i.e. content) so only names will be compared - // stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared - // For both cases, use the third argument to specify globs to exclude certain files + // - stable_name_ is for files with variable md5sums (i.e. content) so only names will be compared + // - stable_content_ is for files with stable md5sums (i.e. content) so md5sums will be compared + // - bams_ is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable) // If a directory is fully stable, you can drop `stable_name_*` + // If a directory contains no BAMs, you can drop `bams_*` + // Generate with: nf-test test --tag test --profile docker,test --update-snapshot // Test with: nf-test test --tag test --profile docker,test // NOTE: BAMs are always only stable in name, because: // a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112) // b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order) // point b) also causes BAIs to be unstable. + // c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes) + + // Define exclusion patterns for files with unstable contents + // NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here + // This is particularly important if the patterns excluded in the stable content section should be included in the stable name section + // TODO Maybe these should be moved to files in the test-datasets? Just worried that is less transparent + def unstable_patterns_auth = [ + '**/qualimapReport.html', + '**/mapped_reads_gc-content_distribution.txt', + '**/genome_gc_content_per_window.png', + '**/*.{svg,pdf}', + '**/DamageProfiler.log', + ] // Check that no files are missing/added - // Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list, include patterns - def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] ) + // Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list , include patterns + def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] ) // Authentication - def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png'], null , ['*', '**/*'] ) - def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['*.{yaml,tsv}'] , null , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png']) + def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , unstable_patterns_auth , null , ['**/*'] ) + def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , null , null , unstable_patterns_auth) + + // Deduplication + def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.flagstat'] ) + def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.{bam,bai}'] ) + // def bams_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.bam'] ) + + // Final_bams + def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] ) + def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] ) + // def bams_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.bam'] ) + // Mapping (incl. bam_input flasgstat) + def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] ) + def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.{bam,bai}'] ) + // def bams_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.bam'] ) + // MQC + // preprocessing + // read filtering // // Bam filtering // def stable_content_bamfiltering = getAllFilesFromDir("$outputDir/bam_filtering" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] ) @@ -49,13 +81,6 @@ nextflow_pipeline { // def stable_content_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{svg,pdf,log}'] , null , ['**/*.{txt,json}'] ) // def stable_name_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{txt,json}'] , null , ['**/*.{svg,pdf,log}'] ) - // // Deduplication - // def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] ) - // def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.flagstat'] , null , ['*.{bam,bai}'] ) - - // // Final_bams - // def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] ) - // def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] ) // // Mapping // def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] ) @@ -80,14 +105,16 @@ nextflow_pipeline { { assert snapshot( stable_name_all*.name ).match("all_files") }, // Checking changes to contents of each section - { assert snapshot( stable_content_auth ).match("authentication") }, + // NOTE: Keep the order of the sections in the alphanumeric order of the output directories. + // Each section should first check stable_content, stable_name second (if applicable), and BAM header MD5 third (if applicable). + { assert snapshot( stable_content_auth , stable_name_auth*.name ).match("authentication") }, + { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, + { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, + // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 + { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, // { assert snapshot( stable_content_bamfiltering , stable_name_bamfiltering*.name ).match("bam_filtering") }, // { assert snapshot( stable_content_baminputstats ).match("bam_input_stats") }, // { assert snapshot( stable_content_damageestimation, stable_name_damageestimation*.name ).match("damage_estimation") }, - // { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") }, - // { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") }, - // // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279 - // { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") }, // { assert snapshot( stable_content_mapstats , stable_name_mapstats*.name ).match("mapstats") }, // { assert snapshot( stable_name_multiqc*.name ).match("multiqc") }, // { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") }, @@ -99,3 +126,7 @@ nextflow_pipeline { } } } + +// , bams_deduplication.collect { bam(it.toString()).getReadsMD5() } +// , bams_final_bams.collect { bam(it.toString()).getReadsMD5() } +// , bams_mapping.collect { bam(it.toString()).getReadsMD5() }