diff --git a/pymare/estimators/combination.py b/pymare/estimators/combination.py index 8a8d6ce..f2ffabc 100644 --- a/pymare/estimators/combination.py +++ b/pymare/estimators/combination.py @@ -36,6 +36,9 @@ def _z_to_p(self, z): def fit(self, z, *args, **kwargs): """Fit the estimator to z-values.""" + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + if self.mode == "concordant": ose = self.__class__(mode="directed") p1 = ose.p_value(z, *args, **kwargs) diff --git a/pymare/estimators/estimators.py b/pymare/estimators/estimators.py index 7149c3f..7a2b1a9 100644 --- a/pymare/estimators/estimators.py +++ b/pymare/estimators/estimators.py @@ -191,7 +191,24 @@ def __init__(self, tau2=0.0): self.tau2 = tau2 def fit(self, y, X, v=None): - """Fit the estimator to data.""" + """Fit the estimator to data. + + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (n, d) + The dependent variable(s) (y). + X : :obj:`numpy.ndarray` of shape (n, p) + The independent variable(s) (X). + v : :obj:`numpy.ndarray` of shape (n, d), optional + Sampling variances. If not provided, unit weights will be used. + + Returns + ------- + :obj:`~pymare.estimators.WeightedLeastSquares` + """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + if v is None: v = np.ones_like(y) @@ -219,7 +236,24 @@ class DerSimonianLaird(BaseEstimator): """ def fit(self, y, v, X): - """Fit the estimator to data.""" + """Fit the estimator to data. + + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (n, d) + The dependent variable(s) (y). + v : :obj:`numpy.ndarray` of shape (n, d) + Sampling variances. + X : :obj:`numpy.ndarray` of shape (n, p) + The independent variable(s) (X). + + Returns + ------- + :obj:`~pymare.estimators.DerSimonianLaird` + """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + y = ensure_2d(y) v = ensure_2d(v) @@ -266,7 +300,24 @@ class Hedges(BaseEstimator): """ def fit(self, y, v, X): - """Fit the estimator to data.""" + """Fit the estimator to data. + + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (n, d) + The dependent variable(s) (y). + v : :obj:`numpy.ndarray` of shape (n, d) + Sampling variances. + X : :obj:`numpy.ndarray` of shape (n, p) + The independent variable(s) (X). + + Returns + ------- + :obj:`~pymare.estimators.Hedges` + """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + k, p = X.shape[:2] _unit_v = np.ones_like(y) beta, inv_cov = weighted_least_squares(y, _unit_v, X, return_cov=True) @@ -317,7 +368,24 @@ def __init__(self, method="ml", **kwargs): @_loopable def fit(self, y, v, X): - """Fit the estimator to data.""" + """Fit the estimator to data. + + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (n, d) + The dependent variable(s) (y). + v : :obj:`numpy.ndarray` of shape (n, d) + Sampling variances. + X : :obj:`numpy.ndarray` of shape (n, p) + The independent variable(s) (X). + + Returns + ------- + :obj:`~pymare.estimators.VarianceBasedLikelihoodEstimator` + """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + # use D-L estimate for initial values est_DL = DerSimonianLaird().fit(y, v, X).params_ beta = est_DL["fe_params"] @@ -393,7 +461,24 @@ def __init__(self, method="ml", **kwargs): @_loopable def fit(self, y, n, X): - """Fit the estimator to data.""" + """Fit the estimator to data. + + Parameters + ---------- + y : :obj:`numpy.ndarray` of shape (n, d) + The dependent variable(s) (y). + n : :obj:`numpy.ndarray` of shape (n, d) + Sample sizes. + X : :obj:`numpy.ndarray` of shape (n, p) + The independent variable(s) (X). + + Returns + ------- + :obj:`~pymare.estimators.SampleSizeBasedLikelihoodEstimator` + """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + if n.std() < np.sqrt(np.finfo(float).eps): raise ValueError( "Sample size-based likelihood estimator cannot " @@ -550,6 +635,9 @@ def fit(self, y, v, X, groups=None): `groups` argument can be used to specify the nesting structure (i.e., which rows in `y`, `v`, and `X` belong to each study). """ + # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called. + self.dataset_ = None + if y.ndim > 1 and y.shape[1] > 1: raise ValueError( "The StanMetaRegression estimator currently does " diff --git a/pymare/results.py b/pymare/results.py index 02bfb75..b84b1f6 100644 --- a/pymare/results.py +++ b/pymare/results.py @@ -34,6 +34,12 @@ class MetaRegressionResults: tau2 : None or :obj:`numpy.ndarray` of shape (d,) or :obj:`float`, optional A 1-d array containing the estimated tau^2 value for each parallel dataset (or a float, for a single dataset). May be omitted by fixed-effects estimators. + + Warning + ------- + When an Estimator is fitted to arrays directly using the ``fit`` method, the Results object's + utility is limited. + Many methods will not work. """ def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None): @@ -95,6 +101,11 @@ def get_fe_stats(self, alpha=0.05): def get_re_stats(self, method="QP", alpha=0.05): """Get random-effect statistics. + .. warning:: + + This method relies on the ``.dataset`` attribute, so the original Estimator must have + be fitted with ``fit_dataset``, not ``fit``. + Parameters ---------- method : {"QP"}, optional @@ -118,6 +129,9 @@ def get_re_stats(self, method="QP", alpha=0.05): ---------- .. footbibliography:: """ + if self.dataset is None: + raise ValueError("The Dataset is unavailable. This method requires a Dataset.") + if method == "QP": n_datasets = np.atleast_2d(self.tau2).shape[1] if n_datasets > 10: @@ -160,6 +174,11 @@ def get_re_stats(self, method="QP", alpha=0.05): def get_heterogeneity_stats(self): """Get heterogeneity statistics. + .. warning:: + + This method relies on the ``.dataset`` attribute, so the original Estimator must have + be fitted with ``fit_dataset``, not ``fit``. + Returns ------- :obj:`dict` @@ -183,6 +202,9 @@ def get_heterogeneity_stats(self): ---------- .. footbibliography:: """ + if self.dataset is None: + raise ValueError("The Dataset is unavailable. This method requires a Dataset.") + v = self.estimator.get_v(self.dataset) q_fe = q_gen(self.dataset.y, v, self.dataset.X, 0) df = self.dataset.y.shape[0] - self.dataset.X.shape[1] @@ -198,6 +220,11 @@ def to_df(self, alpha=0.05): This method only works for one-dimensional results. + .. warning:: + + This method relies on the ``.dataset`` attribute, so the original Estimator must have + be fitted with ``fit_dataset``, not ``fit``. + Parameters ---------- alpha : :obj:`float`, optional @@ -220,6 +247,9 @@ def to_df(self, alpha=0.05): the CI columns will be ``"ci_0.025"`` and ``"ci_0.975"``. =========== ========================================================================== """ + if self.dataset is None: + raise ValueError("The Dataset is unavailable. This method requires a Dataset.") + b_shape = self.fe_params.shape if len(b_shape) > 1 and b_shape[1] > 1: raise ValueError( @@ -240,6 +270,11 @@ def to_df(self, alpha=0.05): def permutation_test(self, n_perm=1000): """Run permutation test. + .. warning:: + + This method relies on the ``.dataset`` attribute, so the original Estimator must have + be fitted with ``fit_dataset``, not ``fit``. + Parameters ---------- n_perm : :obj:`int`, optional @@ -263,6 +298,9 @@ def permutation_test(self, n_perm=1000): This means that one can often set very high n_perm values (e.g., 100k) with little performance degradation. """ + if self.dataset is None: + raise ValueError("The Dataset is unavailable. This method requires a Dataset.") + n_obs, n_datasets = self.dataset.y.shape has_mods = self.dataset.X.shape[1] > 1 @@ -384,6 +422,11 @@ def p(self): def permutation_test(self, n_perm=1000): """Run permutation test. + .. warning:: + + This method relies on the ``.dataset`` attribute, so the original Estimator must have + be fitted with ``fit_dataset``, not ``fit``. + Parameters ---------- n_perm : :obj:`int`, optional @@ -406,6 +449,9 @@ def permutation_test(self, n_perm=1000): set very high n_perm values (e.g., 100k) with little performance degradation. """ + if self.dataset is None: + raise ValueError("The Dataset is unavailable. This method requires a Dataset.") + n_obs, n_datasets = self.dataset.y.shape # create results arrays diff --git a/pymare/tests/test_results.py b/pymare/tests/test_results.py index 28aaacc..2baf3bd 100644 --- a/pymare/tests/test_results.py +++ b/pymare/tests/test_results.py @@ -33,6 +33,71 @@ def results_2d(fitted_estimator, dataset_2d): return est.fit_dataset(dataset_2d).summary() +def test_meta_regression_results_from_arrays(dataset): + """Ensure that a MetaRegressionResults can be created from arrays. + + This is a regression test for a bug that caused the MetaRegressionResults + to fail when Estimators were fitted to arrays instead of Datasets. + See https://github.com/neurostuff/PyMARE/issues/52 for more info. + """ + est = DerSimonianLaird() + fitted_estimator = est.fit(y=dataset.y, X=dataset.X, v=dataset.v) + results = fitted_estimator.summary() + assert isinstance(results, MetaRegressionResults) + assert results.fe_params.shape == (2, 1) + assert results.fe_cov.shape == (2, 2, 1) + assert results.tau2.shape == (1,) + + # fit overwrites dataset_ attribute with None + assert fitted_estimator.dataset_ is None + # fit_dataset overwrites it with the Dataset + fitted_estimator.fit_dataset(dataset) + assert isinstance(fitted_estimator.dataset_, Dataset) + # fit sets it back to None + fitted_estimator.fit(y=dataset.y, X=dataset.X, v=dataset.v) + assert fitted_estimator.dataset_ is None + + # Some methods are not available if fit was used + results = fitted_estimator.summary() + with pytest.raises(ValueError): + results.get_re_stats() + + with pytest.raises(ValueError): + results.get_heterogeneity_stats() + + with pytest.raises(ValueError): + results.to_df() + + with pytest.raises(ValueError): + results.permutation_test(1000) + + +def test_combination_test_results_from_arrays(dataset): + """Ensure that a CombinationTestResults can be created from arrays. + + This is a regression test for a bug that caused the MetaRegressionResults + to fail when Estimators were fitted to arrays instead of Datasets. + See https://github.com/neurostuff/PyMARE/issues/52 for more info. + """ + fitted_estimator = StoufferCombinationTest().fit(z=dataset.y) + results = fitted_estimator.summary() + assert isinstance(results, CombinationTestResults) + assert results.p.shape == (1,) + + # fit overwrites dataset_ attribute with None + assert fitted_estimator.dataset_ is None + # fit_dataset overwrites it with the Dataset + fitted_estimator.fit_dataset(dataset) + assert isinstance(fitted_estimator.dataset_, Dataset) + # fit sets it back to None + fitted_estimator.fit(z=dataset.y) + assert fitted_estimator.dataset_ is None + + # Some methods are not available if fit was used + with pytest.raises(ValueError): + fitted_estimator.summary().permutation_test(1000) + + def test_meta_regression_results_init_1d(fitted_estimator): """Test MetaRegressionResults from 1D data.""" est = fitted_estimator