diff --git a/dadi_cli/parsers/common_arguments.py b/dadi_cli/parsers/common_arguments.py index c9a3ad7..7cbdb9b 100644 --- a/dadi_cli/parsers/common_arguments.py +++ b/dadi_cli/parsers/common_arguments.py @@ -253,7 +253,7 @@ def add_eps_argument(parser: argparse.ArgumentParser) -> None: type=positive_num, nargs="+", required=False, - help="Step sizes to try for Godambe analysis. Default: [0.1, 0.01, 0.001]", + help="Step sizes to try for Godambe analysis. Default: [0.1, 0.01, 0.001].", ) diff --git a/docs/userguide/stat.md b/docs/userguide/stat.md index b8ea81b..d07acbc 100644 --- a/docs/userguide/stat.md +++ b/docs/userguide/stat.md @@ -1,5 +1,7 @@ # Statistical testing +## Bootstrapped spectrum generation + To perform uncertainty analysis, `dadi` offers [an approach](https://dadi.readthedocs.io/en/latest/user-guide/uncertainty-analysis/) using the Godambe Information Matrix (GIM). To utilize this method, users should begin by using the `GenerateFs` subcommand to generate bootstrapped data from VCF files. In this example, we generate `20` bootstraps using the `--bootstrap` argument to save time, though we recommend users perform `100` bootstraps for more robust results. The `--chunk-size` argument specifies the maximum length of the chromosome chunks, which are then randomly drawn with replacement to create the bootstrapped chromosomes. The `--output` argument sets the prefix for the output files and the directory where the allele frequency spectra from the bootstrapped chromosomes will be stored. @@ -12,6 +14,8 @@ dadi-cli GenerateFs --vcf examples/data/1KG.YRI.CEU.non.vcf.gz --pop-info exampl The output files from the above commands can be found in the `bootstrapping_syn` directory for synonymous SNPs and the `bootstrapping_non` directory for non-synonymous SNPs [here](https://github.com/xin-huang/dadi-cli/tree/revision/examples/results/fs). +## Confidence interval estimation + To estimate the confidence intervals for the demographic parameters, users can use ``` @@ -25,3 +29,39 @@ dadi-cli StatDFE --fs ./examples/results/fs/1KG.YRI.CEU.20.nonsynonymous.snps.un ``` Three different step sizes are tested when using the GIM. Ideally 95% confidence intervals will be consistent between step sizes. + +## Arguments + +For the `StatDM` subcommand: + +| Argument | Description | +| - | - | +| `--fs` | Frequency spectrum of mutations used for inference. To generate the frequency spectrum, please use `dadi-cli GenerateFs`. Can be an HTML link. | +| `--model` | Name of the demographic model. To check available demographic models, please use `dadi-cli Model`. | +| `--model-file` | Name of python module file (not including .py) that contains custom models to use. Can be an HTML link. Default: None. | +| `--grids` | Sizes of grids. Default: Based on sample size. | +| `--nomisid` | Enable to *not* include a parameter modeling ancestral state misidentification when data are polarized. | +| `--output` | Name of the output file. | +| `--constants` | Fixed parameters during the inference or using Godambe analysis. Use -1 to indicate a parameter is NOT fixed. Default: None. | +| `--eps` | Step sizes to try for Godambe analysis. Default: [0.1, 0.01, 0.001]. | +| `--demo-popt` | File contains the bestfit demographic parameters, generated by `dadi-cli BestFit`. | +| `--bootstrapping-dir` | Directory containing boostrapping spectra. | +| `--logscale` | Determine whether estimating the uncertainties by assuming log-normal distribution of parameters; Default: False. | + +For the `StatDFE` subcommand: + +| Argument | Description | +| - | - | +| `--fs` | Frequency spectrum of mutations used for inference. To generate the frequency spectrum, please use `dadi-cli GenerateFs`. Can be an HTML link. | +| `--cache1d` | File name of the 1D DFE cache. To generate the cache, please use `dadi-cli GenerateCache`. | +| `--cache2d` | File name of the 2D DFE cache. To generate the cache, please use `dadi-cli GenerateCache`. | +| `--pdf1d` | 1D probability density function for the DFE inference. To check available probability density functions, please use `dadi-cli Pdf`. | +| `--pdf2d` | 2D probability density function for the joint DFE inference. To check available probability density functions, please use `dadi-cli Pdf`. | +| `--nomisid` | Enable to *not* include a parameter modeling ancestral state misidentification when data are polarized. | +| `--output` | Name of the output file. | +| `--constants` | Fixed parameters during the inference or using Godambe analysis. Use -1 to indicate a parameter is NOT fixed. Default: None. | +| `--eps` | Step sizes to try for Godambe analysis. Default: [0.1, 0.01, 0.001]. | +| `--dfe-popt` | File containing the bestfit DFE parameters, generated by `dadi-cli BestFit`. | +| `--bootstrapping-nonsynonymous-dir` | Directory containing boostrapping spectra. | +| `--bootstrapping-synonymous-dir` | Directory containing boostrapping spectra, required to adjust nonsynonymous theta for differences in synonymous theta. | +| `--logscale` | Determine whether estimating the uncertainties by assuming log-normal distribution of parameters; Default: False. |