Skip to content

Commit

Permalink
WIP test reimplementation
Browse files Browse the repository at this point in the history
  • Loading branch information
TCLamnidis committed Feb 28, 2025
1 parent 64106a6 commit 44cda40
Showing 1 changed file with 50 additions and 19 deletions.
69 changes: 50 additions & 19 deletions tests/test.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,58 @@ nextflow_pipeline {
then {

// Each top level results directory should be tested with individual snapshots/variables
// stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
// stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
// For both cases, use the third argument to specify globs to exclude certain files
// - stable_name_<dir> is for files with variable md5sums (i.e. content) so only names will be compared
// - stable_content_<dir> is for files with stable md5sums (i.e. content) so md5sums will be compared
// - bams_<dir> is for BAM files, where the headerMD5 is checked for stability (since the content can be unstable)
// If a directory is fully stable, you can drop `stable_name_*`
// If a directory contains no BAMs, you can drop `bams_*`

// Generate with: nf-test test --tag test --profile docker,test --update-snapshot
// Test with: nf-test test --tag test --profile docker,test
// NOTE: BAMs are always only stable in name, because:
// a) sharding breaks header since the shard that was first is named in the header (Fixed in https://github.com/nf-core/eager/pull/1112)
// b) the order of the reads in the BAMs is not stable (sorted, but reads that share a start position can be in any order)
// point b) also causes BAIs to be unstable.
// c) Merging of multiple BAMs with duplicate @RG / @PG tags can cause the header to be unstable (particularly in the case of shards/lanes)

// Define exclusion patterns for files with unstable contents
// NOTE: When a section needs more than a couple of small patterns, consider adding a variable to store the patterns here
// This is particularly important if the patterns excluded in the stable content section should be included in the stable name section
// TODO Maybe these should be moved to files in the test-datasets? Just worried that is less transparent
def unstable_patterns_auth = [
'**/qualimapReport.html',
'**/mapped_reads_gc-content_distribution.txt',
'**/genome_gc_content_per_window.png',
'**/*.{svg,pdf}',
'**/DamageProfiler.log',
]

// Check that no files are missing/added
// Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list, include patterns
def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] )
// Command legend: Result directory to index , include dirs?, exclude patterns , exclude pattern list , include patterns
def stable_name_all = getAllFilesFromDir("$outputDir/" , true , ['pipeline_info/*'] , null , ['*', '**/*'] )

// Authentication
def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png'], null , ['*', '**/*'] )
def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , ['*.{yaml,tsv}'] , null , ['**/*.html', '**/mapped_reads_gc-content_distribution.txt', '**/genome_gc_content_per_window.png'])
def stable_content_auth = getAllFilesFromDir("$outputDir/authentication" , true , unstable_patterns_auth , null , ['**/*'] )
def stable_name_auth = getAllFilesFromDir("$outputDir/authentication" , true , null , null , unstable_patterns_auth)

// Deduplication
def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.flagstat'] )
def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.{bam,bai}'] )
// def bams_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , null , null , ['*.bam'] )

// Final_bams
def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] )
def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] )
// def bams_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.bam'] )

// Mapping (incl. bam_input flasgstat)
def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] )
def stable_name_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.{bam,bai}'] )
// def bams_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.bam'] )

// MQC
// preprocessing
// read filtering

// // Bam filtering
// def stable_content_bamfiltering = getAllFilesFromDir("$outputDir/bam_filtering" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] )
Expand All @@ -49,13 +81,6 @@ nextflow_pipeline {
// def stable_content_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{svg,pdf,log}'] , null , ['**/*.{txt,json}'] )
// def stable_name_damageestimation = getAllFilesFromDir("$outputDir/damage_estimation" , true , ['**/*.{txt,json}'] , null , ['**/*.{svg,pdf,log}'] )

// // Deduplication
// def stable_content_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.{bam,bai}'] , null , ['*.flagstat'] )
// def stable_name_deduplication = getAllFilesFromDir("$outputDir/deduplication" , true , ['*.flagstat'] , null , ['*.{bam,bai}'] )

// // Final_bams
// def stable_content_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.flagstat'] )
// def stable_name_final_bams = getAllFilesFromDir("$outputDir/final_bams" , true , null , null , ['**/*.{bam,bai}'] )

// // Mapping
// def stable_content_mapping = getAllFilesFromDir("$outputDir/mapping" , true , null , null , ['**/*.flagstat'] )
Expand All @@ -80,14 +105,16 @@ nextflow_pipeline {
{ assert snapshot( stable_name_all*.name ).match("all_files") },

// Checking changes to contents of each section
{ assert snapshot( stable_content_auth ).match("authentication") },
// NOTE: Keep the order of the sections in the alphanumeric order of the output directories.
// Each section should first check stable_content, stable_name second (if applicable), and BAM header MD5 third (if applicable).
{ assert snapshot( stable_content_auth , stable_name_auth*.name ).match("authentication") },
{ assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
{ assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
// NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
{ assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
// { assert snapshot( stable_content_bamfiltering , stable_name_bamfiltering*.name ).match("bam_filtering") },
// { assert snapshot( stable_content_baminputstats ).match("bam_input_stats") },
// { assert snapshot( stable_content_damageestimation, stable_name_damageestimation*.name ).match("damage_estimation") },
// { assert snapshot( stable_content_deduplication , stable_name_deduplication*.name ).match("deduplication") },
// { assert snapshot( stable_content_final_bams , stable_name_final_bams*.name ).match("final_bams") },
// // NOTE: The snapshot section for mapping cannot be named 'mapping'. See https://github.com/askimed/nf-test/issues/279
// { assert snapshot( stable_content_mapping , stable_name_mapping*.name ).match("mapping_output") },
// { assert snapshot( stable_content_mapstats , stable_name_mapstats*.name ).match("mapstats") },
// { assert snapshot( stable_name_multiqc*.name ).match("multiqc") },
// { assert snapshot( stable_content_preprocessing , stable_name_preprocessing*.name ).match("preprocessing") },
Expand All @@ -99,3 +126,7 @@ nextflow_pipeline {
}
}
}

// , bams_deduplication.collect { bam(it.toString()).getReadsMD5() }
// , bams_final_bams.collect { bam(it.toString()).getReadsMD5() }
// , bams_mapping.collect { bam(it.toString()).getReadsMD5() }

0 comments on commit 44cda40

Please sign in to comment.