Skip to content

Commit

Permalink
Allow Results objects to work with Estimators' fit() method (#104)
Browse files Browse the repository at this point in the history
* Override .dataset_ with None when calling fit().

* Warn about missing dataset attribute.

* Test new behavior.

* Improve Estimator fit docstrings.

* Remove extra blank line.

* Add @JulioAPeraza's review.
  • Loading branch information
tsalo authored Jun 5, 2022
1 parent be85ec4 commit f425ff9
Show file tree
Hide file tree
Showing 4 changed files with 207 additions and 5 deletions.
3 changes: 3 additions & 0 deletions pymare/estimators/combination.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ def _z_to_p(self, z):

def fit(self, z, *args, **kwargs):
"""Fit the estimator to z-values."""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if self.mode == "concordant":
ose = self.__class__(mode="directed")
p1 = ose.p_value(z, *args, **kwargs)
Expand Down
98 changes: 93 additions & 5 deletions pymare/estimators/estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,24 @@ def __init__(self, tau2=0.0):
self.tau2 = tau2

def fit(self, y, X, v=None):
"""Fit the estimator to data."""
"""Fit the estimator to data.
Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
v : :obj:`numpy.ndarray` of shape (n, d), optional
Sampling variances. If not provided, unit weights will be used.
Returns
-------
:obj:`~pymare.estimators.WeightedLeastSquares`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if v is None:
v = np.ones_like(y)

Expand Down Expand Up @@ -219,7 +236,24 @@ class DerSimonianLaird(BaseEstimator):
"""

def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.
Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
Returns
-------
:obj:`~pymare.estimators.DerSimonianLaird`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

y = ensure_2d(y)
v = ensure_2d(v)

Expand Down Expand Up @@ -266,7 +300,24 @@ class Hedges(BaseEstimator):
"""

def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.
Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
Returns
-------
:obj:`~pymare.estimators.Hedges`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

k, p = X.shape[:2]
_unit_v = np.ones_like(y)
beta, inv_cov = weighted_least_squares(y, _unit_v, X, return_cov=True)
Expand Down Expand Up @@ -317,7 +368,24 @@ def __init__(self, method="ml", **kwargs):

@_loopable
def fit(self, y, v, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.
Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
v : :obj:`numpy.ndarray` of shape (n, d)
Sampling variances.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
Returns
-------
:obj:`~pymare.estimators.VarianceBasedLikelihoodEstimator`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

# use D-L estimate for initial values
est_DL = DerSimonianLaird().fit(y, v, X).params_
beta = est_DL["fe_params"]
Expand Down Expand Up @@ -393,7 +461,24 @@ def __init__(self, method="ml", **kwargs):

@_loopable
def fit(self, y, n, X):
"""Fit the estimator to data."""
"""Fit the estimator to data.
Parameters
----------
y : :obj:`numpy.ndarray` of shape (n, d)
The dependent variable(s) (y).
n : :obj:`numpy.ndarray` of shape (n, d)
Sample sizes.
X : :obj:`numpy.ndarray` of shape (n, p)
The independent variable(s) (X).
Returns
-------
:obj:`~pymare.estimators.SampleSizeBasedLikelihoodEstimator`
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if n.std() < np.sqrt(np.finfo(float).eps):
raise ValueError(
"Sample size-based likelihood estimator cannot "
Expand Down Expand Up @@ -550,6 +635,9 @@ def fit(self, y, v, X, groups=None):
`groups` argument can be used to specify the nesting structure
(i.e., which rows in `y`, `v`, and `X` belong to each study).
"""
# This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
self.dataset_ = None

if y.ndim > 1 and y.shape[1] > 1:
raise ValueError(
"The StanMetaRegression estimator currently does "
Expand Down
46 changes: 46 additions & 0 deletions pymare/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ class MetaRegressionResults:
tau2 : None or :obj:`numpy.ndarray` of shape (d,) or :obj:`float`, optional
A 1-d array containing the estimated tau^2 value for each parallel dataset
(or a float, for a single dataset). May be omitted by fixed-effects estimators.
Warning
-------
When an Estimator is fitted to arrays directly using the ``fit`` method, the Results object's
utility is limited.
Many methods will not work.
"""

def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None):
Expand Down Expand Up @@ -95,6 +101,11 @@ def get_fe_stats(self, alpha=0.05):
def get_re_stats(self, method="QP", alpha=0.05):
"""Get random-effect statistics.
.. warning::
This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.
Parameters
----------
method : {"QP"}, optional
Expand All @@ -118,6 +129,9 @@ def get_re_stats(self, method="QP", alpha=0.05):
----------
.. footbibliography::
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

if method == "QP":
n_datasets = np.atleast_2d(self.tau2).shape[1]
if n_datasets > 10:
Expand Down Expand Up @@ -160,6 +174,11 @@ def get_re_stats(self, method="QP", alpha=0.05):
def get_heterogeneity_stats(self):
"""Get heterogeneity statistics.
.. warning::
This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.
Returns
-------
:obj:`dict`
Expand All @@ -183,6 +202,9 @@ def get_heterogeneity_stats(self):
----------
.. footbibliography::
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

v = self.estimator.get_v(self.dataset)
q_fe = q_gen(self.dataset.y, v, self.dataset.X, 0)
df = self.dataset.y.shape[0] - self.dataset.X.shape[1]
Expand All @@ -198,6 +220,11 @@ def to_df(self, alpha=0.05):
This method only works for one-dimensional results.
.. warning::
This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.
Parameters
----------
alpha : :obj:`float`, optional
Expand All @@ -220,6 +247,9 @@ def to_df(self, alpha=0.05):
the CI columns will be ``"ci_0.025"`` and ``"ci_0.975"``.
=========== ==========================================================================
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

b_shape = self.fe_params.shape
if len(b_shape) > 1 and b_shape[1] > 1:
raise ValueError(
Expand All @@ -240,6 +270,11 @@ def to_df(self, alpha=0.05):
def permutation_test(self, n_perm=1000):
"""Run permutation test.
.. warning::
This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.
Parameters
----------
n_perm : :obj:`int`, optional
Expand All @@ -263,6 +298,9 @@ def permutation_test(self, n_perm=1000):
This means that one can often set very high n_perm values (e.g., 100k) with little
performance degradation.
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

n_obs, n_datasets = self.dataset.y.shape
has_mods = self.dataset.X.shape[1] > 1

Expand Down Expand Up @@ -384,6 +422,11 @@ def p(self):
def permutation_test(self, n_perm=1000):
"""Run permutation test.
.. warning::
This method relies on the ``.dataset`` attribute, so the original Estimator must have
be fitted with ``fit_dataset``, not ``fit``.
Parameters
----------
n_perm : :obj:`int`, optional
Expand All @@ -406,6 +449,9 @@ def permutation_test(self, n_perm=1000):
set very high n_perm values (e.g., 100k) with little performance
degradation.
"""
if self.dataset is None:
raise ValueError("The Dataset is unavailable. This method requires a Dataset.")

n_obs, n_datasets = self.dataset.y.shape

# create results arrays
Expand Down
65 changes: 65 additions & 0 deletions pymare/tests/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,71 @@ def results_2d(fitted_estimator, dataset_2d):
return est.fit_dataset(dataset_2d).summary()


def test_meta_regression_results_from_arrays(dataset):
"""Ensure that a MetaRegressionResults can be created from arrays.
This is a regression test for a bug that caused the MetaRegressionResults
to fail when Estimators were fitted to arrays instead of Datasets.
See https://github.com/neurostuff/PyMARE/issues/52 for more info.
"""
est = DerSimonianLaird()
fitted_estimator = est.fit(y=dataset.y, X=dataset.X, v=dataset.v)
results = fitted_estimator.summary()
assert isinstance(results, MetaRegressionResults)
assert results.fe_params.shape == (2, 1)
assert results.fe_cov.shape == (2, 2, 1)
assert results.tau2.shape == (1,)

# fit overwrites dataset_ attribute with None
assert fitted_estimator.dataset_ is None
# fit_dataset overwrites it with the Dataset
fitted_estimator.fit_dataset(dataset)
assert isinstance(fitted_estimator.dataset_, Dataset)
# fit sets it back to None
fitted_estimator.fit(y=dataset.y, X=dataset.X, v=dataset.v)
assert fitted_estimator.dataset_ is None

# Some methods are not available if fit was used
results = fitted_estimator.summary()
with pytest.raises(ValueError):
results.get_re_stats()

with pytest.raises(ValueError):
results.get_heterogeneity_stats()

with pytest.raises(ValueError):
results.to_df()

with pytest.raises(ValueError):
results.permutation_test(1000)


def test_combination_test_results_from_arrays(dataset):
"""Ensure that a CombinationTestResults can be created from arrays.
This is a regression test for a bug that caused the MetaRegressionResults
to fail when Estimators were fitted to arrays instead of Datasets.
See https://github.com/neurostuff/PyMARE/issues/52 for more info.
"""
fitted_estimator = StoufferCombinationTest().fit(z=dataset.y)
results = fitted_estimator.summary()
assert isinstance(results, CombinationTestResults)
assert results.p.shape == (1,)

# fit overwrites dataset_ attribute with None
assert fitted_estimator.dataset_ is None
# fit_dataset overwrites it with the Dataset
fitted_estimator.fit_dataset(dataset)
assert isinstance(fitted_estimator.dataset_, Dataset)
# fit sets it back to None
fitted_estimator.fit(z=dataset.y)
assert fitted_estimator.dataset_ is None

# Some methods are not available if fit was used
with pytest.raises(ValueError):
fitted_estimator.summary().permutation_test(1000)


def test_meta_regression_results_init_1d(fitted_estimator):
"""Test MetaRegressionResults from 1D data."""
est = fitted_estimator
Expand Down

0 comments on commit f425ff9

Please # to comment.