nextflow.config

/************************************************default params***************************************/

params {
    default_config = true
    stat = false
    fastqc = false

    map.assembly = "unknown"
    map.genome = ""
    map.chunksize = 16000000
    map.merge_bam = false
    
    parse.parse_arags = "--min-mapq 0 --drop-sam --drop-seq --drop-readid" 
    parse.sort_tmpdir = "/tmp"
    
    filter.frag_path = ""
    filter.enzyme = ""
    filter.filter_expr = "((pair_type=='UU') or (pair_type=='UR') or (pair_type=='RU'))"
    
    resolutions = [5000, 10000, 100000, 1000000] 
}

/**********************************************nexflow setting***************************************/
profiles {
    standard {
        executor {
            queueSize = 40
        }
        process {
            executor = 'local'
            cache = true
            withName: map_parse_sort {
                maxForks = 20
            }
            withName: split_fastq_chunks {
                maxForks = 2
            }
        }
    }
    cluster {
        params.ratio = 2.0
        executor {
            queueSize = 100
            queueStatInterval = "10 sec"
        }
        process {
            executor = 'sge'
            scratch = false
            cache = true
            penv = "mpi"
            queue = "all.q"
            withName: map_parse_sort {
                maxForks = 80
            }
            withName: split_fastq_chunks {
                maxForks = 10
            }
        }
    }
}

process {
    errorStrategy = { 'retry' }
    maxRetries = 0
    cpus = {1 * task.attempt}
    memory = {500.MB* task.attempt}
    cache = "lenient"
    conda = "$baseDir/environment.yml"
    
    withName: digest {
        memory = "2 GB"
    }
    withName: fastq_dump {
        cpus = 10
        memory = "2 GB"
    }
    withName: fastqc {
        cpus = 5
        memory = "2 GB"
    }
    withName: split_fastq_chunks {
        cpus = 2
    }
    withName: map_parse_sort {
        cpus = 5
        memory = "10 GB"
    }
    withName: merge_bam {
        cpus = 8
        memory = "2 GB"
    }
    withName: merge_pair {
        cpus = 5
        memory = "15 GB"
    }
    withName: merge_exp_dedup {
        cpus = 3
        memory = "10 GB"
    }
    withName: merge_bio {
        cpus = 3
        memory = "10 GB"
    }
    withName: pair_to_cool {
        memory = "4 GB"
    }
    withName: zoomify_cool {
        memory = "1 GB"
    }
    withName: mcool_to_features {
        cpus = 20
        memory = "20 GB"
    }
}

docker {
    enabled = true
    remove = true
    temp = 'auto'
}

conda {
    cacheDir = "$HOME/.nextflow/hictools_env"
}

trace {
    enabled = true
    fields = "name,hash,native_id,status,exit,disk,start,complete,duration,%cpu,%mem,rchar,wchar"
    file = "log/trace.log"
}

timeline {
    enabled = true
    file = "log/timeline.html"
}

report {
    enabled = true
    file = "log/report.html"
}

manifest {
    name = 'hictools'
    homePage = 'https://github.com/zhqu1148980644/hictools'
    description = 'Hic processing pipeline'
    mainScript = 'main.nf'
    version = '1.0.0'
}

params.config_template = """
# Your sample configuration should be looks like this:
# There are three levels:
# 1. Different samples.
# 2. Each sample contains multiple biological replicates(biosamples)
# 3. Each biosample contains multiple technical replicates(experiments)
#
# notation: file parameters: filepath and url are both supported.(Downloading will take much time.)
# notation: sra file: SRR number or local sra file are supported.
# notation: glob syntax: The returned number of files should be even.(Only support for paired-end mode.)
# With the help of `glob`, `URL` and `SRR number`, filling in this file can be quite easy without massive annoying typing.
#
#
# raw_reads:
#     sample1:
#         bio1:
#             - 'x06_1.fastq'
#             - 'x06_2.fastq'
#         bio2:
#             - 'https://url....lane1_1.fastq.gz'
#             - 'lane1_2.fastq'
#             - 'lane[4,5,6]*fastq'
#             - 'lane10*'
#     sample2:
#         bio1:
#             - 'SRR4272024'
#             - 'x07_1.fastq'
#             - 'x07_2.fastq'
# 
# These folders will be generated after the execution of pipeline:
# work         # Working directory generated by nextflow.
# log          # Logging file generated by nextflow.
# results      # Main folder containing results of thi pipeline.
#     fastqc   # QC results generated by running `fasqc` and `multiqc`.
#     bams     # Mapping results of each pair of fastq file generated by `bwa`.
#     pairs    # Contact pairs in .pair format and basic statistical info generated by `pairtools`.
#     cools    # HDF5 based .cool file containing contact matrix of hic experiment generated by `cooler`.
#     features # Loops, insulation score, di score, tad, decay, compartment.
#     other    # Downloaded files, fastq/bam/pair chunks, sra-dumped fastqs and enzyme-digested .bed file.

index: "index/hg19.bwa"
chromsize: "http://genome.ucsc.edu/goldenpath/help/hg19.chrom.sizes"
stat: false
fastqc: false
fastq_dump_threads: 10
map:
    assembly: "hg19"
    genome: '/store/qzhong/tmp/hic/hg19.fa'
    chunksize: 16000000
    # whether to keep alignment bam files in disk.
    merge_bam: false
parse:
    parse_args: "--min-mapq 0 --drop-sam --drop-seq --drop-readid"
    sort_tmpdir: '/tmp'
filter:
    filter_expr: "((pair_type=='UU') or (pair_type=='UR') or (pair_type=='RU'))"
    frag_path: ''
    enzyme: ''
    # In cases of in-situ hic, dilution hic and capture hic, ligated pair within the same fragment need to be removed.
    # 1: Specify the digested fragment bed file in 'frag_path' or choose a certain enzyme name.
    # 2: Put additional filter expressions in 'filter_expr'.
    #    filter_expr: "((pair_type=='UU') or (pair_type=='UR') or (pair_type=='RU')) and not ((COLS[-6]==COLS[-3]) and (chrom1==chrom2))"
    #    enzyme: 'HindIII'
resolutions:
    - 5000
    - 10000
    - 50000
    - 100000
    - 1000000
    - 10000000
"""