Allow Results objects to work with Estimators' fit() method (#104)

* Override .dataset_ with None when calling fit(). * Warn about missing dataset attribute. * Test new behavior. * Improve Estimator fit docstrings. * Remove extra blank line. * Add @JulioAPeraza's review.
neurostuff · Jun 5, 2022 · f425ff9 · f425ff9
1 parent be85ec4
commit f425ff9
Show file tree

Hide file tree

Showing 4 changed files with 207 additions and 5 deletions.
diff --git a/pymare/estimators/combination.py b/pymare/estimators/combination.py
@@ -36,6 +36,9 @@ def _z_to_p(self, z):
 
     def fit(self, z, *args, **kwargs):
         """Fit the estimator to z-values."""
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         if self.mode == "concordant":
             ose = self.__class__(mode="directed")
             p1 = ose.p_value(z, *args, **kwargs)

diff --git a/pymare/estimators/estimators.py b/pymare/estimators/estimators.py
@@ -191,7 +191,24 @@ def __init__(self, tau2=0.0):
         self.tau2 = tau2
 
     def fit(self, y, X, v=None):
-        """Fit the estimator to data."""
+        """Fit the estimator to data.
+
+        Parameters
+        ----------
+        y : :obj:`numpy.ndarray` of shape (n, d)
+            The dependent variable(s) (y).
+        X : :obj:`numpy.ndarray` of shape (n, p)
+            The independent variable(s) (X).
+        v : :obj:`numpy.ndarray` of shape (n, d), optional
+            Sampling variances. If not provided, unit weights will be used.
+
+        Returns
+        -------
+        :obj:`~pymare.estimators.WeightedLeastSquares`
+        """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         if v is None:
             v = np.ones_like(y)
 
@@ -219,7 +236,24 @@ class DerSimonianLaird(BaseEstimator):
     """
 
     def fit(self, y, v, X):
-        """Fit the estimator to data."""
+        """Fit the estimator to data.
+
+        Parameters
+        ----------
+        y : :obj:`numpy.ndarray` of shape (n, d)
+            The dependent variable(s) (y).
+        v : :obj:`numpy.ndarray` of shape (n, d)
+            Sampling variances.
+        X : :obj:`numpy.ndarray` of shape (n, p)
+            The independent variable(s) (X).
+
+        Returns
+        -------
+        :obj:`~pymare.estimators.DerSimonianLaird`
+        """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         y = ensure_2d(y)
         v = ensure_2d(v)
 
@@ -266,7 +300,24 @@ class Hedges(BaseEstimator):
     """
 
     def fit(self, y, v, X):
-        """Fit the estimator to data."""
+        """Fit the estimator to data.
+
+        Parameters
+        ----------
+        y : :obj:`numpy.ndarray` of shape (n, d)
+            The dependent variable(s) (y).
+        v : :obj:`numpy.ndarray` of shape (n, d)
+            Sampling variances.
+        X : :obj:`numpy.ndarray` of shape (n, p)
+            The independent variable(s) (X).
+
+        Returns
+        -------
+        :obj:`~pymare.estimators.Hedges`
+        """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         k, p = X.shape[:2]
         _unit_v = np.ones_like(y)
         beta, inv_cov = weighted_least_squares(y, _unit_v, X, return_cov=True)
@@ -317,7 +368,24 @@ def __init__(self, method="ml", **kwargs):
 
     @_loopable
     def fit(self, y, v, X):
-        """Fit the estimator to data."""
+        """Fit the estimator to data.
+
+        Parameters
+        ----------
+        y : :obj:`numpy.ndarray` of shape (n, d)
+            The dependent variable(s) (y).
+        v : :obj:`numpy.ndarray` of shape (n, d)
+            Sampling variances.
+        X : :obj:`numpy.ndarray` of shape (n, p)
+            The independent variable(s) (X).
+
+        Returns
+        -------
+        :obj:`~pymare.estimators.VarianceBasedLikelihoodEstimator`
+        """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         # use D-L estimate for initial values
         est_DL = DerSimonianLaird().fit(y, v, X).params_
         beta = est_DL["fe_params"]
@@ -393,7 +461,24 @@ def __init__(self, method="ml", **kwargs):
 
     @_loopable
     def fit(self, y, n, X):
-        """Fit the estimator to data."""
+        """Fit the estimator to data.
+
+        Parameters
+        ----------
+        y : :obj:`numpy.ndarray` of shape (n, d)
+            The dependent variable(s) (y).
+        n : :obj:`numpy.ndarray` of shape (n, d)
+            Sample sizes.
+        X : :obj:`numpy.ndarray` of shape (n, p)
+            The independent variable(s) (X).
+
+        Returns
+        -------
+        :obj:`~pymare.estimators.SampleSizeBasedLikelihoodEstimator`
+        """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         if n.std() < np.sqrt(np.finfo(float).eps):
             raise ValueError(
                 "Sample size-based likelihood estimator cannot "
@@ -550,6 +635,9 @@ def fit(self, y, v, X, groups=None):
         `groups` argument can be used to specify the nesting structure
         (i.e., which rows in `y`, `v`, and `X` belong to each study).
         """
+        # This resets the Estimator's dataset_ attribute. fit_dataset will overwrite if called.
+        self.dataset_ = None
+
         if y.ndim > 1 and y.shape[1] > 1:
             raise ValueError(
                 "The StanMetaRegression estimator currently does "

diff --git a/pymare/results.py b/pymare/results.py
@@ -34,6 +34,12 @@ class MetaRegressionResults:
     tau2 : None or :obj:`numpy.ndarray` of shape (d,) or :obj:`float`, optional
         A 1-d array containing the estimated tau^2 value for each parallel dataset
         (or a float, for a single dataset). May be omitted by fixed-effects estimators.
+
+    Warning
+    -------
+    When an Estimator is fitted to arrays directly using the ``fit`` method, the Results object's
+    utility is limited.
+    Many methods will not work.
     """
 
     def __init__(self, estimator, dataset, fe_params, fe_cov, tau2=None):
@@ -95,6 +101,11 @@ def get_fe_stats(self, alpha=0.05):
     def get_re_stats(self, method="QP", alpha=0.05):
         """Get random-effect statistics.
 
+        .. warning::
+
+            This method relies on the ``.dataset`` attribute, so the original Estimator must have
+            be fitted with ``fit_dataset``, not ``fit``.
+
         Parameters
         ----------
         method : {"QP"}, optional
@@ -118,6 +129,9 @@ def get_re_stats(self, method="QP", alpha=0.05):
         ----------
         .. footbibliography::
         """
+        if self.dataset is None:
+            raise ValueError("The Dataset is unavailable. This method requires a Dataset.")
+
         if method == "QP":
             n_datasets = np.atleast_2d(self.tau2).shape[1]
             if n_datasets > 10:
@@ -160,6 +174,11 @@ def get_re_stats(self, method="QP", alpha=0.05):
     def get_heterogeneity_stats(self):
         """Get heterogeneity statistics.
 
+        .. warning::
+
+            This method relies on the ``.dataset`` attribute, so the original Estimator must have
+            be fitted with ``fit_dataset``, not ``fit``.
+
         Returns
         -------
         :obj:`dict`
@@ -183,6 +202,9 @@ def get_heterogeneity_stats(self):
         ----------
         .. footbibliography::
         """
+        if self.dataset is None:
+            raise ValueError("The Dataset is unavailable. This method requires a Dataset.")
+
         v = self.estimator.get_v(self.dataset)
         q_fe = q_gen(self.dataset.y, v, self.dataset.X, 0)
         df = self.dataset.y.shape[0] - self.dataset.X.shape[1]
@@ -198,6 +220,11 @@ def to_df(self, alpha=0.05):
 
             This method only works for one-dimensional results.
 
+        .. warning::
+
+            This method relies on the ``.dataset`` attribute, so the original Estimator must have
+            be fitted with ``fit_dataset``, not ``fit``.
+
         Parameters
         ----------
         alpha : :obj:`float`, optional
@@ -220,6 +247,9 @@ def to_df(self, alpha=0.05):
                         the CI columns will be ``"ci_0.025"`` and ``"ci_0.975"``.
             =========== ==========================================================================
         """
+        if self.dataset is None:
+            raise ValueError("The Dataset is unavailable. This method requires a Dataset.")
+
         b_shape = self.fe_params.shape
         if len(b_shape) > 1 and b_shape[1] > 1:
             raise ValueError(
@@ -240,6 +270,11 @@ def to_df(self, alpha=0.05):
     def permutation_test(self, n_perm=1000):
         """Run permutation test.
 
+        .. warning::
+
+            This method relies on the ``.dataset`` attribute, so the original Estimator must have
+            be fitted with ``fit_dataset``, not ``fit``.
+
         Parameters
         ----------
         n_perm : :obj:`int`, optional
@@ -263,6 +298,9 @@ def permutation_test(self, n_perm=1000):
         This means that one can often set very high n_perm values (e.g., 100k) with little
         performance degradation.
         """
+        if self.dataset is None:
+            raise ValueError("The Dataset is unavailable. This method requires a Dataset.")
+
         n_obs, n_datasets = self.dataset.y.shape
         has_mods = self.dataset.X.shape[1] > 1
 
@@ -384,6 +422,11 @@ def p(self):
     def permutation_test(self, n_perm=1000):
         """Run permutation test.
 
+        .. warning::
+
+            This method relies on the ``.dataset`` attribute, so the original Estimator must have
+            be fitted with ``fit_dataset``, not ``fit``.
+
         Parameters
         ----------
         n_perm : :obj:`int`, optional
@@ -406,6 +449,9 @@ def permutation_test(self, n_perm=1000):
         set very high n_perm values (e.g., 100k) with little performance
         degradation.
         """
+        if self.dataset is None:
+            raise ValueError("The Dataset is unavailable. This method requires a Dataset.")
+
         n_obs, n_datasets = self.dataset.y.shape
 
         # create results arrays

diff --git a/pymare/tests/test_results.py b/pymare/tests/test_results.py
@@ -33,6 +33,71 @@ def results_2d(fitted_estimator, dataset_2d):
     return est.fit_dataset(dataset_2d).summary()
 
 
+def test_meta_regression_results_from_arrays(dataset):
+    """Ensure that a MetaRegressionResults can be created from arrays.
+
+    This is a regression test for a bug that caused the MetaRegressionResults
+    to fail when Estimators were fitted to arrays instead of Datasets.
+    See https://github.com/neurostuff/PyMARE/issues/52 for more info.
+    """
+    est = DerSimonianLaird()
+    fitted_estimator = est.fit(y=dataset.y, X=dataset.X, v=dataset.v)
+    results = fitted_estimator.summary()
+    assert isinstance(results, MetaRegressionResults)
+    assert results.fe_params.shape == (2, 1)
+    assert results.fe_cov.shape == (2, 2, 1)
+    assert results.tau2.shape == (1,)
+
+    # fit overwrites dataset_ attribute with None
+    assert fitted_estimator.dataset_ is None
+    # fit_dataset overwrites it with the Dataset
+    fitted_estimator.fit_dataset(dataset)
+    assert isinstance(fitted_estimator.dataset_, Dataset)
+    # fit sets it back to None
+    fitted_estimator.fit(y=dataset.y, X=dataset.X, v=dataset.v)
+    assert fitted_estimator.dataset_ is None
+
+    # Some methods are not available if fit was used
+    results = fitted_estimator.summary()
+    with pytest.raises(ValueError):
+        results.get_re_stats()
+
+    with pytest.raises(ValueError):
+        results.get_heterogeneity_stats()
+
+    with pytest.raises(ValueError):
+        results.to_df()
+
+    with pytest.raises(ValueError):
+        results.permutation_test(1000)
+
+
+def test_combination_test_results_from_arrays(dataset):
+    """Ensure that a CombinationTestResults can be created from arrays.
+
+    This is a regression test for a bug that caused the MetaRegressionResults
+    to fail when Estimators were fitted to arrays instead of Datasets.
+    See https://github.com/neurostuff/PyMARE/issues/52 for more info.
+    """
+    fitted_estimator = StoufferCombinationTest().fit(z=dataset.y)
+    results = fitted_estimator.summary()
+    assert isinstance(results, CombinationTestResults)
+    assert results.p.shape == (1,)
+
+    # fit overwrites dataset_ attribute with None
+    assert fitted_estimator.dataset_ is None
+    # fit_dataset overwrites it with the Dataset
+    fitted_estimator.fit_dataset(dataset)
+    assert isinstance(fitted_estimator.dataset_, Dataset)
+    # fit sets it back to None
+    fitted_estimator.fit(z=dataset.y)
+    assert fitted_estimator.dataset_ is None
+
+    # Some methods are not available if fit was used
+    with pytest.raises(ValueError):
+        fitted_estimator.summary().permutation_test(1000)
+
+
 def test_meta_regression_results_init_1d(fitted_estimator):
     """Test MetaRegressionResults from 1D data."""
     est = fitted_estimator