Merge with master

xin-huang · Feb 26, 2025 · 04d923d · 04d923d
2 parents eda60ec + 84c2e4b
commit 04d923d
Show file tree

Hide file tree

Showing 31 changed files with 287 additions and 115 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -17,7 +17,7 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
     - name: Add micromamba to system path
-      uses: mamba-org/provision-with-micromamba@main
+      uses: mamba-org/setup-micromamba@main
       with:
         environment-name: dadi-cli
         activate-environment: dadi-cli

diff --git a/dadi_cli/GenerateFs.py b/dadi_cli/GenerateFs.py
@@ -94,9 +94,10 @@ def generate_fs(
 
     if calc_coverage:    
         import pickle
-        coverage_dd = {chrom_pos:{'coverage':dd[chrom_pos]['coverage']} for chrom_pos in dd}
-        print(f"\nSaving coverage dictionary in pickle named:\n{output}.coverage.pickle\n")
-        pickle.dump(coverage_dd, open(f"{output}.coverage.pickle","wb"))
+        import dadi.LowPass.LowPass as lp
+        cov_dist = lp.compute_cov_dist(dd, pop_ids)
+        print(f"\nSaving coverage distribution data in pickle named:\n{output}.coverage.pickle\n")
+        pickle.dump(cov_dist, open(f"{output}.coverage.pickle","wb"))
 
     if bootstrap is None:
         fs = dadi.Spectrum.from_data_dict(

diff --git a/dadi_cli/InferDFE.py b/dadi_cli/InferDFE.py
@@ -13,6 +13,7 @@ def infer_dfe(
     cache2d: dadi.DFE.Cache2D_mod,
     sele_dist: str,
     sele_dist2: str,
+    pdf_file: str,
     theta: float,
     p0: list[float],
     upper_bounds: list[float],
@@ -96,9 +97,9 @@ def infer_dfe(
         func = cache2d.integrate
 
     if sele_dist is not None:
-        sele_dist = get_dadi_pdf(sele_dist)
+        sele_dist, _ = get_dadi_pdf(sele_dist, pdf_file)
     if sele_dist2 is not None:
-        sele_dist2 = get_dadi_pdf(sele_dist2)
+        sele_dist2, _ = get_dadi_pdf(sele_dist2, pdf_file)
     if (sele_dist is None) and (sele_dist2 is not None):
         sele_dist = sele_dist2
 
@@ -125,8 +126,8 @@ def infer_dfe(
         func = func_cov(func, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
 
     p0_len = len(p0)
-    lower_bounds = convert_to_None(lower_bounds, p0_len)
-    upper_bounds = convert_to_None(upper_bounds, p0_len)
+    # lower_bounds = convert_to_None(lower_bounds, p0_len)
+    # upper_bounds = convert_to_None(upper_bounds, p0_len)
     fixed_params = convert_to_None(fixed_params, p0_len)
 
     # Fit a DFE to the data

diff --git a/dadi_cli/InferDM.py b/dadi_cli/InferDM.py
@@ -99,9 +99,8 @@ def infer_demography(
         func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
 
     p0_len = len(p0)
-    lower_bounds = convert_to_None(lower_bounds, p0_len)
-    upper_bounds = convert_to_None(upper_bounds, p0_len)
-    fixed_params = convert_to_None(fixed_params, p0_len)
+    if fixed_params == -1:
+        fixed_params = convert_to_None(fixed_params, p0_len)
 
     p0 = dadi.Misc.perturb_params(
         p0, fold=1, upper_bound=upper_bounds, lower_bound=lower_bounds
@@ -224,9 +223,8 @@ def infer_global_opt(
         func_ex = func_cov(func_ex, cov_args[0], fs.pop_ids, nseq, fs.sample_sizes, Fx=Fx)
 
     p0_len = len(p0)
-    lower_bounds = convert_to_None(lower_bounds, p0_len)
-    upper_bounds = convert_to_None(upper_bounds, p0_len)
-    fixed_params = convert_to_None(fixed_params, p0_len)
+    if fixed_params == -1:
+        fixed_params = convert_to_None(fixed_params, p0_len)
 
     p0 = dadi.Misc.perturb_params(
         p0, fold=1, upper_bound=upper_bounds, lower_bound=lower_bounds

diff --git a/dadi_cli/Pdfs.py b/dadi_cli/Pdfs.py
@@ -1,7 +1,12 @@
+import dadi, importlib, os, sys
 import dadi.DFE as DFE
+from inspect import getmembers, isfunction
 
 
-def get_dadi_pdf(pdf: str) -> callable:
+def get_dadi_pdf(
+    pdf: str,
+    pdf_file: str = None
+    ) -> tuple[callable, list[str]]:
     """
     Obtains a built-in probability density function for modeling the distribution
     of fitness effects in dadi.
@@ -22,7 +27,27 @@ def get_dadi_pdf(pdf: str) -> callable:
         If the specified probability density function name is not recognized.
 
     """
-    if pdf == "beta":
+    if pdf_file is not None:
+        # If the user has the model folder in their PATH
+        #try:
+        #    func = getattr(importlib.import_module(model_file), model_name)
+        # If the user does not have the model folder in their PATH we add it
+        # This currently can mess with the User's PATH while running dadi-cli
+        #except:
+        #    model_file = os.path.abspath(model_file)
+        #    model_path = os.path.dirname(model_file)
+        #    model_file = os.path.basename(model_file)
+        #    model_file = os.path.splitext(model_file)[0]
+        #    sys.path.append(model_path)
+        #    func = getattr(importlib.import_module(model_file), model_name)
+        try:
+            module_name = os.path.splitext(os.path.basename(pdf_file))[0]
+            model_path = os.path.dirname(os.path.abspath(pdf_file))
+            sys.path.append(model_path)
+            func = getattr(importlib.import_module(module_name), pdf)
+        except ImportError as e:
+            raise ImportError(f"Failed to import module: {pdf_file}") from e
+    elif pdf == "beta":
         func = DFE.PDFs.beta
     elif pdf == "biv_ind_gamma":
         func = DFE.PDFs.biv_ind_gamma
@@ -41,7 +66,10 @@ def get_dadi_pdf(pdf: str) -> callable:
     else:
         raise ValueError("Probability density function " + pdf + " is not available!")
 
-    return func
+    if pdf_file is not None:
+        return func, func.__param_names__
+    else:
+        return func, None
 
 
 def get_dadi_pdf_params(pdf: str) -> list[str]:

diff --git a/dadi_cli/Plot.py b/dadi_cli/Plot.py
@@ -229,6 +229,7 @@ def plot_fitted_dfe(
     projections: list[int],
     pdf: str,
     pdf2: str,
+    pdf_file: str,
     cov_args: list,
     cov_inbreeding: list,
     output: str,
@@ -255,6 +256,8 @@ def plot_fitted_dfe(
         Name of the 1D probability density function file for modeling the DFE.
     pdf2 : str
         Name of the 2D probability density function file for modeling the DFE.
+    pdf_file : str
+        Name of file with custom probability density function model(s) in it.
     output : str
         Path where the comparison plot will be saved. The file format is inferred from the file extension.
     vmin : float
@@ -275,9 +278,9 @@ def plot_fitted_dfe(
     fs = dadi.Spectrum.from_file(fs)
 
     if pdf != None:
-        pdf = get_dadi_pdf(pdf)
+        pdf, _ = get_dadi_pdf(pdf, pdf_file)
     if pdf2 != None:
-        pdf2 = get_dadi_pdf(pdf2)
+        pdf2, _ = get_dadi_pdf(pdf2, pdf_file)
         if pdf == None:
             pdf = pdf2
 

diff --git a/dadi_cli/SimulateFs.py b/dadi_cli/SimulateFs.py
@@ -102,6 +102,7 @@ def simulate_dfe(
     cache2d: dadi.DFE.Cache2D_mod, 
     sele_dist: str, 
     sele_dist2: str, 
+    pdf_file: str,
     ratio: float, 
     misid: bool, 
     output: str
@@ -143,9 +144,9 @@ def simulate_dfe(
         func = cache2d.integrate
 
     if sele_dist is not None:
-        sele_dist = get_dadi_pdf(sele_dist)
+        sele_dist, _ = get_dadi_pdf(sele_dist, pdf_file)
     if sele_dist2 is not None:
-        sele_dist2 = get_dadi_pdf(sele_dist2)
+        sele_dist2, _ = get_dadi_pdf(sele_dist2, pdf_file)
     if (sele_dist is None) and (sele_dist2 is not None):
         sele_dist = sele_dist2
 

diff --git a/dadi_cli/Stat.py b/dadi_cli/Stat.py
@@ -6,7 +6,7 @@
 from typing import Optional
 from dadi_cli.Models import get_model
 from dadi_cli.Pdfs import get_dadi_pdf
-from dadi_cli.utilities import get_opts_and_theta, convert_to_None
+from dadi_cli.utilities import get_opts_and_theta, convert_to_None, pts_l_func
 
 
 def godambe_stat_demograpy(
@@ -49,11 +49,15 @@ def godambe_stat_demograpy(
     # We want the best fits from the demograpic fit.
     # We set the second argument to true, since we want
     # all the parameters from the file.
-    demo_popt, _, param_names = get_opts_and_theta(demo_popt, post_infer=True)
+    demo_popt, theta, param_names = get_opts_and_theta(demo_popt, post_infer=True)
     fixed_params = convert_to_None(fixed_params, len(demo_popt))
     free_params = _free_params(demo_popt, fixed_params)
     fs = dadi.Spectrum.from_file(fs)
+    if grids is None:
+        grids = pts_l_func(fs.sample_sizes)
     fs_boot_files = glob.glob(bootstrap_dir + "/*.fs")
+    if fs_boot_files == []:
+        raise ValueError(f"ERROR:\n{bootstrap_dir} is empty\n")
     all_boot = []
     for f in fs_boot_files:
         boot_fs = dadi.Spectrum.from_file(f)
@@ -74,9 +78,10 @@ def gfunc(free_params, ns, pts):
             uncerts_adj = dadi.Godambe.GIM_uncert(
                 gfunc, grids, all_boot, popt, fs, multinom=True, log=logscale, eps=eps
             )
-            # The uncertainty for theta is predicted, so we slice the
-            # the uncertainties for just the parameters.
-            uncerts_adj = uncerts_adj[:-1]
+            # Add theta into popt
+            popt = np.append(popt, np.array([theta]))
+            # param_string = '\t'.join(param_names[1:])
+            # f.write(f"Description\t{param_string}\n")
 
             f.write(
                 "Estimated 95% uncerts (theta adj), with step size "
@@ -118,6 +123,7 @@ def godambe_stat_dfe(
     cache2d: str,
     sele_dist: str,
     sele_dist2: str,
+    pdf_file: str,
     output: str,
     bootstrap_syn_dir: str,
     bootstrap_non_dir: str,
@@ -141,6 +147,8 @@ def godambe_stat_dfe(
         Name of the 1D PDF for modeling DFE.
     sele_dist2 : str
         Name of the 2D PDF for modeling DFE.
+    pdf_file : str
+        Name of file with custom probability density function model(s) in it.
     output : str
         File path for the output results.
     bootstrap_syn_dir : str
@@ -171,11 +179,17 @@ def godambe_stat_dfe(
 
     fs = dadi.Spectrum.from_file(fs)
     non_fs_files = glob.glob(bootstrap_non_dir + "/*.fs")
+    # Raise error if directory is empty
+    if non_fs_files == []:
+        raise ValueError(f"ERROR:\n{bootstrap_dir} is empty\n")
     all_non_boot = []
     for f in non_fs_files:
         boot_fs = dadi.Spectrum.from_file(f)
         all_non_boot.append(boot_fs)
     syn_fs_files = glob.glob(bootstrap_syn_dir + "/*.fs")
+    # Raise error if directory is empty
+    if syn_fs_files == []:
+        raise ValueError(f"ERROR:\n{bootstrap_dir} is empty\n")
     all_syn_boot = []
     for f in syn_fs_files:
         boot_fs = dadi.Spectrum.from_file(f)
@@ -189,9 +203,9 @@ def godambe_stat_dfe(
         sfunc = s2.integrate
 
     if sele_dist2 is not None:
-        sele_dist2 = get_dadi_pdf(sele_dist2)
+        sele_dist2, _ = get_dadi_pdf(sele_dist2, pdf_file)
     if sele_dist is not None:
-        sele_dist = get_dadi_pdf(sele_dist)
+        sele_dist, _ = get_dadi_pdf(sele_dist, pdf_file)
     else:
         sele_dist = sele_dist2
 

diff --git a/dadi_cli/parsers/common_arguments.py b/dadi_cli/parsers/common_arguments.py
@@ -39,15 +39,15 @@ def add_bounds_argument(parser: argparse.ArgumentParser) -> None:
 
     parser.add_argument(
         "--lbounds",
-        type=float,
+        type=str,
         nargs="+",
         required=boundary_req,
         help="Lower bounds of the optimized parameters.",
     )
 
     parser.add_argument(
         "--ubounds",
-        type=float,
+        type=str,
         nargs="+",
         required=boundary_req,
         help="Upper bounds of the optimized parameters.",
@@ -191,7 +191,7 @@ def add_constant_argument(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--constants",
         default=-1,
-        type=float,
+        type=str,
         nargs="+",
         help="Fixed parameters during the inference or using Godambe analysis. Use -1 to indicate a parameter is NOT fixed. Default: None.",
     )
@@ -315,6 +315,14 @@ def add_dfe_argument(parser: argparse.ArgumentParser) -> None:
         type=str,
         help="2D probability density function for the joint DFE inference. To check available probability density functions, please use `dadi-cli Pdf`.",
     )
+    parser.add_argument(
+        "--pdf-file",
+        type=existed_file,
+        required=False,
+        dest="pdf_file",
+        help="Name of python probability density function module file (not including .py) that contains custom probability density functions to use. Default: None.",
+    )
+
 
 
 # Currently this command is only needed for param_names.