pymc-labs · drbenvincent · Jul 24, 2023 · Jul 22, 2023 · Jul 22, 2023 · Jul 22, 2023
diff --git a/causalpy/pymc_experiments.py b/causalpy/pymc_experiments.py
@@ -543,18 +543,19 @@ class RegressionDiscontinuity(ExperimentalDesign):
     """
     A class to analyse regression discontinuity experiments.
 
-    :param data: A pandas dataframe
-    :param formula: A statistical model formula
-    :param treatment_threshold: A scalar threshold value at which the treatment
-                                is applied
-    :param model: A PyMC model
-    :param running_variable_name: The name of the predictor variable that the treatment
-                                  threshold is based upon
-
-    .. note::
-
-        There is no pre/post intervention data distinction for the regression
-        discontinuity design, we fit all the data available.
+    :param data:
+        A pandas dataframe
+    :param formula:
+        A statistical model formula
+    :param treatment_threshold:
+        A scalar threshold value at which the treatment is applied
+    :param model:
+        A PyMC model
+    :param running_variable_name:
+        The name of the predictor variable that the treatment threshold is based upon
+    :param epsilon:
+        A small scalar value which determines how far above and below the treatment
+        threshold to evaluate the causal impact.
     """
 
     def __init__(
@@ -564,6 +565,7 @@ def __init__(
         treatment_threshold: float,
         model=None,
         running_variable_name: str = "x",
+        epsilon: float = 0.001,
         **kwargs,
     ):
         super().__init__(model=model, **kwargs)
@@ -572,6 +574,7 @@ def __init__(
         self.formula = formula
         self.running_variable_name = running_variable_name
         self.treatment_threshold = treatment_threshold
+        self.epsilon = epsilon
         self._input_validation()
 
         y, X = dmatrices(formula, self.data)
@@ -609,7 +612,10 @@ def __init__(
         self.x_discon = pd.DataFrame(
             {
                 self.running_variable_name: np.array(
-                    [self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
+                    [
+                        self.treatment_threshold - self.epsilon,
+                        self.treatment_threshold + self.epsilon,
+                    ]
                 ),
                 "treated": np.array([0, 1]),
             }

diff --git a/causalpy/skl_experiments.py b/causalpy/skl_experiments.py
@@ -346,13 +346,21 @@ def plot(self):
 
 class RegressionDiscontinuity(ExperimentalDesign):
     """
-    Analyse data from regression discontinuity experiments.
-
-    .. note::
-
-        There is no pre/post intervention data distinction for the regression
-        discontinuity design, we fit all the data available.
-
+    A class to analyse regression discontinuity experiments.
+
+    :param data:
+        A pandas dataframe
+    :param formula:
+        A statistical model formula
+    :param treatment_threshold:
+        A scalar threshold value at which the treatment is applied
+    :param model:
+        A sci-kit learn model object
+    :param running_variable_name:
+        The name of the predictor variable that the treatment threshold is based upon
+    :param epsilon:
+        A small scalar value which determines how far above and below the treatment
+        threshold to evaluate the causal impact.
     """
 
     def __init__(
@@ -362,13 +370,15 @@ def __init__(
         treatment_threshold,
         model=None,
         running_variable_name="x",
+        epsilon: float = 0.001,
         **kwargs,
     ):
         super().__init__(model=model, **kwargs)
         self.data = data
         self.formula = formula
         self.running_variable_name = running_variable_name
         self.treatment_threshold = treatment_threshold
+        self.epsilon = epsilon
         y, X = dmatrices(formula, self.data)
         self._y_design_info = y.design_info
         self._x_design_info = X.design_info
@@ -404,7 +414,10 @@ def __init__(
         self.x_discon = pd.DataFrame(
             {
                 self.running_variable_name: np.array(
-                    [self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
+                    [
+                        self.treatment_threshold - self.epsilon,
+                        self.treatment_threshold + self.epsilon,
+                    ]
                 ),
                 "treated": np.array([0, 1]),
             }

diff --git a/causalpy/tests/test_integration_pymc_examples.py b/causalpy/tests/test_integration_pymc_examples.py
@@ -112,6 +112,7 @@ def test_rd():
         formula="y ~ 1 + bs(x, df=6) + treated",
         model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
         treatment_threshold=0.5,
+        epsilon=0.001,
     )
     assert isinstance(df, pd.DataFrame)
     assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)

diff --git a/causalpy/tests/test_integration_skl_examples.py b/causalpy/tests/test_integration_skl_examples.py
@@ -36,6 +36,7 @@ def test_rd_drinking():
         running_variable_name="age",
         model=LinearRegression(),
         treatment_threshold=21,
+        epsilon=0.001,
     )
     assert isinstance(df, pd.DataFrame)
     assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
@@ -81,6 +82,7 @@ def test_rd_linear_main_effects():
         formula="y ~ 1 + x + treated",
         model=LinearRegression(),
         treatment_threshold=0.5,
+        epsilon=0.001,
     )
     assert isinstance(data, pd.DataFrame)
     assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
@@ -94,6 +96,7 @@ def test_rd_linear_with_interaction():
         formula="y ~ 1 + x + treated + x:treated",
         model=LinearRegression(),
         treatment_threshold=0.5,
+        epsilon=0.001,
     )
     assert isinstance(data, pd.DataFrame)
     assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
@@ -108,6 +111,7 @@ def test_rd_linear_with_gaussian_process():
         formula="y ~ 1 + x + treated",
         model=GaussianProcessRegressor(kernel=kernel),
         treatment_threshold=0.5,
+        epsilon=0.001,
     )
     assert isinstance(data, pd.DataFrame)
     assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)