From a92d02783696f4eb1bb761036ec82c9edefccef9 Mon Sep 17 00:00:00 2001
From: tjstruck <tjstruck@email.arizona.edu>
Date: Wed, 15 Jan 2025 16:31:48 -0700
Subject: [PATCH] Add ability to pass in theta and theta_ns for SimulateDM and
 SimulateDFE

---
 dadi_cli/InferDFE.py                     |  4 ++--
 dadi_cli/SimulateFs.py                   | 23 +++++++++++++----------
 dadi_cli/parsers/infer_dfe_parsers.py    |  2 +-
 dadi_cli/parsers/simulate_dfe_parsers.py | 23 +++++++++++++++--------
 dadi_cli/parsers/simulate_dm_parsers.py  |  8 ++++++++
 tests/test_SimulateFs.py                 |  8 +++++++-
 tests/test_main.py                       |  6 ++++--
 7 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/dadi_cli/InferDFE.py b/dadi_cli/InferDFE.py
index 4f512b3e..a5aa3b91 100644
--- a/dadi_cli/InferDFE.py
+++ b/dadi_cli/InferDFE.py
@@ -82,7 +82,7 @@ def infer_dfe(
 
     """
     if not cache1d and not cache2d:
-        raise ValueError("At least one of cache1d or cache2d must be provided.")
+        raise ValueError("\nAt least one of cache1d or cache2d must be provided.")
 
     # Randomize starting parameter values
     if seed is not None:
@@ -116,7 +116,7 @@ def infer_dfe(
         try:
             from dadi.LowPass.LowPass import make_low_pass_func_GATK_multisample as func_cov
         except ModuleNotFoundError:
-            raise ImportError("ERROR:\nCurrent dadi version does not support coverage model\n")
+            raise ImportError("\nCurrent dadi version does not support coverage model\n")
         nseq = [int(ele) for ele in cov_args[1:]]
         if cov_inbreeding == []:
             Fx = None
diff --git a/dadi_cli/SimulateFs.py b/dadi_cli/SimulateFs.py
index 906b051c..9ee10885 100644
--- a/dadi_cli/SimulateFs.py
+++ b/dadi_cli/SimulateFs.py
@@ -13,6 +13,7 @@ def simulate_demography(
     ns: int, 
     pts_l: tuple[int],
     misid: bool, 
+    theta: float, 
     output: str, 
     inference_file: bool
 ) -> None:
@@ -33,6 +34,8 @@ def simulate_demography(
         Grid sizes for modeling. If None, a default is calculated based on ns.
     misid : bool
         If True, adds a parameter for modeling ancestral state misidentification.
+    theta : float
+        Set the theta value for the simulated demographic SFS. Default 1.
     output : str
         Name of the output file.
     inference_file : bool
@@ -49,7 +52,7 @@ def simulate_demography(
         func = dadi.Numerics.make_anc_state_misid_func(func)
         func.__param_names__ = param_names + ["misid"]
     func_ex = dadi.Numerics.make_extrap_func(func)
-    fs = func_ex(p0, ns, pts_l)
+    fs = func_ex(p0, ns, pts_l) * theta
     fs.to_file(output)
 
     if inference_file:
@@ -58,7 +61,7 @@ def simulate_demography(
                 "# Ran SimulateDM\n# This is a fake inference output results to generate caches\n# Log(likelihood)   "
                 + "\t".join(func.__param_names__)
                 + "\ttheta0\n"
-                + "-0\t" + "\t".join([str(ele) for ele in p0]) + "\t1"
+                + "-0\t" + "\t".join([str(ele) for ele in p0]) + "\t" + str(theta)
             )
 
 
@@ -103,7 +106,7 @@ def simulate_dfe(
     sele_dist: str, 
     sele_dist2: str, 
     pdf_file: str,
-    ratio: float, 
+    theta_ns: float, 
     misid: bool, 
     output: str
 ) -> None:
@@ -122,8 +125,8 @@ def simulate_dfe(
         Name of the 1D Probability Density Function (PDF) for modeling DFEs.
     sele_dist2 : str
         Name of the 2D PDF for modeling DFEs.
-    ratio : float
-        Ratio of synonymous to non-synonymous mutations.
+    theta_ns : float
+        Theta of non-synonymous mutations.
     misid : bool
         If True, adds a parameter for modeling ancestral state misidentification when data are polarized.
     output : str
@@ -152,16 +155,16 @@ def simulate_dfe(
 
     if (cache1d is not None) and (cache2d is not None):
         func = dadi.DFE.Cache2D_mod.mixture
-        func_args = [cache1d, cache2d, sele_dist, sele_dist2, ratio]
+        func_args = [cache1d, cache2d, sele_dist, sele_dist2, theta_ns]
     else:
-        func_args = [sele_dist, ratio]
+        func_args = [sele_dist, theta_ns]
 
     if misid:
         func = dadi.Numerics.make_anc_state_misid_func(func)
-    print(p0, None, sele_dist, ratio, None)
+    print(p0, None, sele_dist, theta_ns, None)
     # Get expected SFS for MLE
     if (cache1d is not None) and (cache2d is not None):
-        fs = func(p0, None, cache1d, cache2d, sele_dist, sele_dist2, ratio, None)
+        fs = func(p0, None, cache1d, cache2d, sele_dist, sele_dist2, theta_ns, None)
     else:
-        fs = func(p0, None, sele_dist, ratio, None)
+        fs = func(p0, None, sele_dist, theta_ns, None)
     fs.to_file(output)
diff --git a/dadi_cli/parsers/infer_dfe_parsers.py b/dadi_cli/parsers/infer_dfe_parsers.py
index 5fc5322d..30804c60 100644
--- a/dadi_cli/parsers/infer_dfe_parsers.py
+++ b/dadi_cli/parsers/infer_dfe_parsers.py
@@ -84,7 +84,7 @@ def _run_infer_dfe(args: argparse.Namespace) -> None:
     if args.pdf1d == None and args.pdf2d == None:
         raise ValueError("Require --pdf1d and/or --pdf2d depending on DFE model")
     if args.cache1d == None and args.cache2d == None:
-        raise ValueError("cache1d --pdf1d and/or --cache2d depending on DFE model")
+        raise ValueError("Require --cache1d and/or --cache2d depending on DFE model")
     if "://" in args.fs:
         import urllib.request
         sfs_fi = open("sfs.fs","w")
diff --git a/dadi_cli/parsers/simulate_dfe_parsers.py b/dadi_cli/parsers/simulate_dfe_parsers.py
index c5d30ed8..cbb653ae 100644
--- a/dadi_cli/parsers/simulate_dfe_parsers.py
+++ b/dadi_cli/parsers/simulate_dfe_parsers.py
@@ -31,13 +31,18 @@ def _run_simulate_dfe(args: argparse.Namespace) -> None:
             The 2D probability distribution function name for the DFE.
         - pdf_file : str, optional
             Name of file with custom probability density function model(s) in it.
-        - ratio : float
-            Ratio for adjusting the selection parameters, typically used to scale between different
-            types of mutations or fitness effects.
+        - theta_ns : float
+            Nonsynonymous theta for the SFS with selection.
         - nomisid : bool
             Flag to not consider misidentification, which is converted internally to `misid`.
 
     """
+    # Make sure flags are used:
+    if args.cache1d == None and args.cache2d == None:
+        raise ValueError("\nRequire --cache1d and/or --cache2d depending on DFE model")
+    if args.pdf1d == None and args.pdf2d == None:
+        raise ValueError("\nRequire --pdf1d and/or --pdf2d depending on DFE model")
+
     if args.cache1d != None:
         if "://" in args.cache1d:
             from urllib.request import urlopen
@@ -66,7 +71,7 @@ def _run_simulate_dfe(args: argparse.Namespace) -> None:
         sele_dist=args.pdf1d,
         sele_dist2=args.pdf2d,
         pdf_file=args.pdf_file,
-        ratio=args.ratio,
+        theta_ns=args.theta_ns,
         misid=args.misid,
         output=args.output,
     )
@@ -94,11 +99,13 @@ def add_simulate_dfe_parsers(subparsers: argparse.ArgumentParser) -> None:
     )
     add_dfe_argument(parser)
     parser.add_argument(
-        "--ratio",
+        "--theta-ns",
         type=positive_num,
-        dest="ratio",
-        required=True,
-        help="Ratio for the nonsynonymous mutations to the synonymous mutations.",
+        dest="theta_ns",
+        default=1,
+        help="Set the theta for the nonsynonymous mutations.\n" + \
+        "In dadi this is usually determined by the [ demography model theta ] multiplied by the ratio of [ amount of potential neutral SNPs ] to [ potential selective SNPs ];\n" + \
+        "Default: 1.",
     )
     add_p0_argument(parser)
     add_misid_argument(parser)
diff --git a/dadi_cli/parsers/simulate_dm_parsers.py b/dadi_cli/parsers/simulate_dm_parsers.py
index e62e1fdd..05899162 100644
--- a/dadi_cli/parsers/simulate_dm_parsers.py
+++ b/dadi_cli/parsers/simulate_dm_parsers.py
@@ -56,6 +56,7 @@ def _run_simulate_dm(args: argparse.Namespace) -> None:
         ns=args.sample_sizes,
         pts_l=args.grids,
         misid=args.misid,
+        theta=args.theta,
         output=args.output,
         inference_file=args.inference_file,
     )
@@ -90,5 +91,12 @@ def add_simulate_dm_parsers(subparsers: argparse.ArgumentParser) -> None:
         action="store_true",
         help='Make an output file like you would get for running InferDM to pass into GenerateCache to make caches with your simulated demographic model. Will be the same name and path as output + ".SimulateFs.pseudofit"; Default: False.',
     )
+    parser.add_argument(
+        "--theta",
+        dest="theta",
+        default=1,
+        type=positive_num,
+        help='Set the theta of the demographic model; Default: 1.',
+    )
     add_output_argument(parser)
     parser.set_defaults(runner=_run_simulate_dm)
diff --git a/tests/test_SimulateFs.py b/tests/test_SimulateFs.py
index 3b1ce5c6..35340510 100644
--- a/tests/test_SimulateFs.py
+++ b/tests/test_SimulateFs.py
@@ -24,6 +24,7 @@ def test_simulate_demography_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = False
+    theta = 1
     output = "tests/test_results/simulate_two_epoch.fs"
     inference_file = False
     simulate_demography(
@@ -33,6 +34,7 @@ def test_simulate_demography_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -49,6 +51,7 @@ def test_simulate_custom_demography_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = False
+    theta = 1
     output = "tests/test_results/simulate_three_epoch_bottleneck.fs"
     inference_file = False
     simulate_demography(
@@ -58,6 +61,7 @@ def test_simulate_custom_demography_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -75,6 +79,7 @@ def test_simulate_demography_misid_code():
     ns = [10]
     pts_l = [20, 30, 40]
     misid = True
+    theta = 1
     output = "tests/test_results/simulate_two_epoch_with_misid.fs"
     inference_file = True
     simulate_demography(
@@ -84,6 +89,7 @@ def test_simulate_demography_misid_code():
         ns=ns, 
         pts_l=pts_l, 
         misid=misid, 
+        theta=theta, 
         output=output, 
         inference_file=inference_file
     )
@@ -105,7 +111,7 @@ def test_simulate_dfe_code():
     sele_dist = "lognormal"
     sele_dist2 = "biv_lognormal"
     pdf_file = None
-    theta_ns = 2.31
+    theta_ns = 2.31*1000
     misid = False
     output_1d = "tests/test_results/simulate_dfe_split_mig_one_s_lognormal.fs"
     output_2d = "tests/test_results/simulate_dfe_split_mig_two_s_lognormal.fs"
diff --git a/tests/test_main.py b/tests/test_main.py
index 372d2001..81c701e4 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -75,6 +75,7 @@ def simulate_args():
     simulate_args.sample_sizes = [10]
     simulate_args.grids = [20, 30, 40]
     simulate_args.nomisid = True
+    simulate_args.theta = 1000
     simulate_args.output = "tests/test_results/main_simulate_two_epoch.fs"
     simulate_args.inference_file = False
 
@@ -91,6 +92,7 @@ def simulate_args():
     simulate_args.sample_sizes = [10]
     simulate_args.grids = [20, 30, 40]
     simulate_args.nomisid = False
+    simulate_args.theta = 1000
     simulate_args.output = "tests/test_results/main_simulate_three_epoch_bottleneck_html.fs"
     simulate_args.inference_file = False
 
@@ -142,7 +144,7 @@ def simulate_args():
     simulate_args.pdf1d = "lognormal"
     simulate_args.pdf2d = "biv_lognormal"
     simulate_args.pdf_file = None
-    simulate_args.ratio = 2.31
+    simulate_args.theta_ns = 2.31*1000
     simulate_args.nomisid = False
     simulate_args.output = "tests/test_results/main_simulate_mix_dfe.fs"
 
@@ -159,7 +161,7 @@ def simulate_args():
     simulate_args.pdf1d = "lognormal"
     simulate_args.pdf2d = "biv_lognormal"
     simulate_args.pdf_file = None
-    simulate_args.ratio = 2.31
+    simulate_args.theta_ns = 2.31*1000
     simulate_args.nomisid = False
     simulate_args.output = "tests/test_results/main_simulate_mix_dfe_html.fs"