Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add an epsilon parameter to RegressionDiscontinuity classes #224

Merged
merged 6 commits into from
Jul 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions causalpy/pymc_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,18 +543,19 @@ class RegressionDiscontinuity(ExperimentalDesign):
"""
A class to analyse regression discontinuity experiments.

:param data: A pandas dataframe
:param formula: A statistical model formula
:param treatment_threshold: A scalar threshold value at which the treatment
is applied
:param model: A PyMC model
:param running_variable_name: The name of the predictor variable that the treatment
threshold is based upon

.. note::

There is no pre/post intervention data distinction for the regression
discontinuity design, we fit all the data available.
:param data:
A pandas dataframe
:param formula:
A statistical model formula
:param treatment_threshold:
A scalar threshold value at which the treatment is applied
:param model:
A PyMC model
:param running_variable_name:
The name of the predictor variable that the treatment threshold is based upon
:param epsilon:
A small scalar value which determines how far above and below the treatment
threshold to evaluate the causal impact.
"""

def __init__(
Expand All @@ -564,6 +565,7 @@ def __init__(
treatment_threshold: float,
model=None,
running_variable_name: str = "x",
epsilon: float = 0.001,
**kwargs,
):
super().__init__(model=model, **kwargs)
Expand All @@ -572,6 +574,7 @@ def __init__(
self.formula = formula
self.running_variable_name = running_variable_name
self.treatment_threshold = treatment_threshold
self.epsilon = epsilon
self._input_validation()

y, X = dmatrices(formula, self.data)
Expand Down Expand Up @@ -609,7 +612,10 @@ def __init__(
self.x_discon = pd.DataFrame(
{
self.running_variable_name: np.array(
[self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
[
self.treatment_threshold - self.epsilon,
self.treatment_threshold + self.epsilon,
]
),
"treated": np.array([0, 1]),
}
Expand Down
29 changes: 21 additions & 8 deletions causalpy/skl_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,13 +346,21 @@ def plot(self):

class RegressionDiscontinuity(ExperimentalDesign):
"""
Analyse data from regression discontinuity experiments.

.. note::

There is no pre/post intervention data distinction for the regression
discontinuity design, we fit all the data available.

A class to analyse regression discontinuity experiments.

:param data:
A pandas dataframe
:param formula:
A statistical model formula
:param treatment_threshold:
A scalar threshold value at which the treatment is applied
:param model:
A sci-kit learn model object
:param running_variable_name:
The name of the predictor variable that the treatment threshold is based upon
:param epsilon:
A small scalar value which determines how far above and below the treatment
threshold to evaluate the causal impact.
"""

def __init__(
Expand All @@ -362,13 +370,15 @@ def __init__(
treatment_threshold,
model=None,
running_variable_name="x",
epsilon: float = 0.001,
**kwargs,
):
super().__init__(model=model, **kwargs)
self.data = data
self.formula = formula
self.running_variable_name = running_variable_name
self.treatment_threshold = treatment_threshold
self.epsilon = epsilon
y, X = dmatrices(formula, self.data)
self._y_design_info = y.design_info
self._x_design_info = X.design_info
Expand Down Expand Up @@ -404,7 +414,10 @@ def __init__(
self.x_discon = pd.DataFrame(
{
self.running_variable_name: np.array(
[self.treatment_threshold - 0.001, self.treatment_threshold + 0.001]
[
self.treatment_threshold - self.epsilon,
self.treatment_threshold + self.epsilon,
]
),
"treated": np.array([0, 1]),
}
Expand Down
1 change: 1 addition & 0 deletions causalpy/tests/test_integration_pymc_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def test_rd():
formula="y ~ 1 + bs(x, df=6) + treated",
model=cp.pymc_models.LinearRegression(sample_kwargs=sample_kwargs),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(df, pd.DataFrame)
assert isinstance(result, cp.pymc_experiments.RegressionDiscontinuity)
Expand Down
4 changes: 4 additions & 0 deletions causalpy/tests/test_integration_skl_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def test_rd_drinking():
running_variable_name="age",
model=LinearRegression(),
treatment_threshold=21,
epsilon=0.001,
)
assert isinstance(df, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand Down Expand Up @@ -81,6 +82,7 @@ def test_rd_linear_main_effects():
formula="y ~ 1 + x + treated",
model=LinearRegression(),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand All @@ -94,6 +96,7 @@ def test_rd_linear_with_interaction():
formula="y ~ 1 + x + treated + x:treated",
model=LinearRegression(),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)
Expand All @@ -108,6 +111,7 @@ def test_rd_linear_with_gaussian_process():
formula="y ~ 1 + x + treated",
model=GaussianProcessRegressor(kernel=kernel),
treatment_threshold=0.5,
epsilon=0.001,
)
assert isinstance(data, pd.DataFrame)
assert isinstance(result, cp.skl_experiments.RegressionDiscontinuity)