Rename attributes 'feature_…_' to 'feature_names_…_' (#46)

This commit * renames iris_target to iris_target_name * renames attributes 'feature_…_' to 'feature_names_…_' * adjusts property names of fittable classes to scikit-learn conventions
BCG-X-Official · Oct 15, 2020 · 04ca01c · 04ca01c
1 parent c322f21
commit 04ca01c
Show file tree

Hide file tree

Showing 10 changed files with 110 additions and 82 deletions.
diff --git a/sphinx/source/tutorial/sklearndf_tutorial.ipynb b/sphinx/source/tutorial/sklearndf_tutorial.ipynb
@@ -644,7 +644,7 @@
     "raw_mimetype": "text/restructuredtext"
    },
    "source": [
-    "The `~sklearndf.transformation.ColumnTransformerDF.features_original_` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
+    "The `~sklearndf.transformation.ColumnTransformerDF.feature_names_original_` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
    ]
   },
   {
@@ -754,7 +754,7 @@
     }
    ],
    "source": [
-    "preprocessing_df.features_original_.to_frame().head(10)"
+    "preprocessing_df.feature_names_original_.to_frame().head(10)"
    ]
   },
   {
@@ -891,7 +891,7 @@
     }
    ],
    "source": [
-    "garage_type_derivatives = preprocessing_df.features_original_ == \"GarageType\"\n",
+    "garage_type_derivatives = preprocessing_df.feature_names_original_ == \"GarageType\"\n",
     "\n",
     "transformed_df.loc[:, garage_type_derivatives].head()"
    ]
@@ -1188,7 +1188,7 @@
     "raw_mimetype": "text/restructuredtext"
    },
    "source": [
-    "Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `features_in_` returns the names of the ingoing features as a pandas index."
+    "Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `feature_names_in_` returns the names of the ingoing features as a pandas index."
    ]
   },
   {
@@ -1251,7 +1251,7 @@
     }
    ],
    "source": [
-    "random_forest_regressor_df.features_in_"
+    "random_forest_regressor_df.feature_names_in_"
    ]
   },
   {
@@ -2410,7 +2410,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_out_.to_list()"
+    "boruta_pipeline.feature_names_out_.to_list()"
    ]
   },
   {
@@ -2553,7 +2553,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_original_.to_frame()"
+    "boruta_pipeline.feature_names_original_.to_frame()"
    ]
   },
   {
@@ -2583,7 +2583,7 @@
     }
    ],
    "source": [
-    "boruta_pipeline.features_original_.unique()"
+    "boruta_pipeline.feature_names_original_.unique()"
    ]
   }
  ],

diff --git a/src/sklearndf/__init__.py b/src/sklearndf/__init__.py
@@ -25,13 +25,13 @@
 
 All estimators enhanced by `sklearndf` also implement an
 additional attribute
-:attr:`~EstimatorDF.features_in_`, keeping track of the
+:attr:`~EstimatorDF.feature_names_in_`, keeping track of the
 column names of the data
 frame used to fit the estimator.
 
 `sklearndf` transformers also implement
-:attr:`~TransformerDF.features_out_` and
-:attr:`~TransformerDF.features_original_`, keeping track
+:attr:`~TransformerDF.feature_names_out_` and
+:attr:`~TransformerDF.feature_names_original_`, keeping track
 of the feature names of the
 transformed outputs as well as mapping output features
 back to the input features.

diff --git a/src/sklearndf/_sklearndf.py b/src/sklearndf/_sklearndf.py
@@ -83,7 +83,7 @@ def fit(
         pass
 
     @property
-    def features_in_(self) -> pd.Index:
+    def feature_names_in_(self) -> pd.Index:
         """
         The pandas column index with the names of the features used to fit this
         estimator.
@@ -220,7 +220,7 @@ def __init__(self, *args, **kwargs) -> None:
         self._features_original = None
 
     @property
-    def features_original_(self) -> pd.Series:
+    def feature_names_original_(self) -> pd.Series:
         """
         A pandas series, mapping the output features resulting from the transformation
         to the original input features.
@@ -238,7 +238,7 @@ def features_original_(self) -> pd.Series:
         return self._features_original
 
     @property
-    def features_out_(self) -> pd.Index:
+    def feature_names_out_(self) -> pd.Index:
         """
         A pandas column index with the names of the features produced by this
         transformer
@@ -301,8 +301,8 @@ def _get_features_original(self) -> pd.Series:
 
     def _get_features_out(self) -> pd.Index:
         # return a pandas index with this transformer's output columns
-        # default behaviour: get index returned by features_original_
-        return self.features_original_.index
+        # default behaviour: get index returned by feature_names_original_
+        return self.feature_names_original_.index
 
 
 class RegressorDF(LearnerDF, RegressorMixin, metaclass=ABCMeta):
@@ -320,6 +320,17 @@ class ClassifierDF(LearnerDF, ClassifierMixin, metaclass=ABCMeta):
     Provides enhanced support for data frames.
     """
 
+    @property
+    @abstractmethod
+    def classes_(self) -> Sequence[Any]:
+        """
+        Get the classes predicted by this classifier.
+        By default expects classes as a list-like stored in the `classes_` attribute.
+
+        :return: the classes predicted by this classifier
+        """
+        pass
+
     # noinspection PyPep8Naming
     @abstractmethod
     def predict_proba(
@@ -385,14 +396,3 @@ def decision_function(
             for multi-output classifiers, a list of one observation/class data frames \
             per output
         """
-
-    @property
-    @abstractmethod
-    def classes_(self) -> Sequence[Any]:
-        """
-        Get the classes predicted by this classifier.
-        By default expects classes as a list-like stored in the `classes_` attribute.
-
-        :return: the classes predicted by this classifier
-        """
-        pass
diff --git a/src/sklearndf/_wrapper.py b/src/sklearndf/_wrapper.py
@@ -7,8 +7,8 @@
 native estimators they wrap.
 
 The wrappers also implement the additional column attributes introduced by `sklearndf`,
-:meth:`~EstimatorDF.features_in_`, :meth:`~TransformerDF.features_out_`, and
-:meth:`~TransformerDF.features_original_`.
+:meth:`~EstimatorDF.feature_names_in_`, :meth:`~TransformerDF.feature_names_out_`, and
+:meth:`~TransformerDF.feature_names_original_`.
 """
 
 import inspect
@@ -248,7 +248,7 @@ def _check_parameter_types(
             raise TypeError("arg X must be a DataFrame")
         if self.is_fitted:
             _EstimatorWrapperDF._verify_df(
-                df_name="X argument", df=X, expected_columns=self.features_in_
+                df_name="X argument", df=X, expected_columns=self.feature_names_in_
             )
         if y is not None and not isinstance(y, (pd.Series, pd.DataFrame)):
             raise TypeError("arg y must be None, or a pandas Series or DataFrame")
@@ -361,7 +361,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         transformed = self._transform(X)
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_out_
+            transformed=transformed, index=X.index, columns=self.feature_names_out_
         )
 
     # noinspection PyPep8Naming
@@ -383,7 +383,7 @@ def fit_transform(
             ) from cause
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_out_
+            transformed=transformed, index=X.index, columns=self.feature_names_out_
         )
 
     # noinspection PyPep8Naming
@@ -396,7 +396,7 @@ def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
         transformed = self._inverse_transform(X)
 
         return self._transformed_to_df(
-            transformed=transformed, index=X.index, columns=self.features_in_
+            transformed=transformed, index=X.index, columns=self.feature_names_in_
         )
 
     def _reset_fit(self) -> None:

diff --git a/src/sklearndf/pipeline/_learner_pipeline.py b/src/sklearndf/pipeline/_learner_pipeline.py
@@ -45,7 +45,14 @@ def __init__(self, *, preprocessing: Optional[TransformerDF] = None) -> None:
                 f"{type(preprocessing).__name__}"
             )
 
-        self.preprocessing = preprocessing
+        self._preprocessing = preprocessing
+
+    @property
+    def preprocessing(self) -> Optional[TransformerDF]:
+        """
+        The preprocessing step.
+        """
+        return self._preprocessing
 
     @property
     @abstractmethod
@@ -70,6 +77,35 @@ def final_estimator_name(self) -> str:
         """
         pass
 
+    @property
+    def feature_names_out_(self) -> pd.Index:
+        """
+        Pandas column index of all features resulting from the preprocessing step.
+
+        Same as :attr:`.feature_names_in_` if the preprocessing step is ``None``.
+        """
+        if self.preprocessing is not None:
+            return self.preprocessing.feature_names_out_
+        else:
+            return self.feature_names_in_.rename(TransformerDF.COL_FEATURE_OUT)
+
+    @property
+    def feature_names_original_(self) -> pd.Series:
+        """
+        Pandas series mapping the names of all features resulting from the preprocessing
+        step to the names of the input features they were derived from.
+
+        Returns an identity mapping of :attr:`.feature_names_in_` onto itself
+        if the preprocessing step is ``None``.
+        """
+        if self.preprocessing is not None:
+            return self.preprocessing.feature_names_original_
+        else:
+            feature_names_in_ = self.feature_names_in_
+            return feature_names_in_.to_series(index=feature_names_in_).rename_axis(
+                index=TransformerDF.COL_FEATURE_OUT
+            )
+
     # noinspection PyPep8Naming
     def fit(
         self: T,
@@ -122,18 +158,6 @@ def fit(
 
         return self
 
-    @property
-    def features_out_(self) -> pd.Index:
-        """
-        Pandas column index of all features resulting from the preprocessing step.
-
-        Same as :attr:`.features_in_` if the preprocessing step is ``None``.
-        """
-        if self.preprocessing is not None:
-            return self.preprocessing.features_out_
-        else:
-            return self.features_in_.rename(TransformerDF.COL_FEATURE_OUT)
-
     @property
     def is_fitted(self) -> bool:
         """[see superclass]"""
@@ -143,9 +167,9 @@ def is_fitted(self) -> bool:
 
     def _get_features_in(self) -> pd.Index:
         if self.preprocessing is not None:
-            return self.preprocessing.features_in_
+            return self.preprocessing.feature_names_in_
         else:
-            return self.final_estimator.features_in_
+            return self.final_estimator.feature_names_in_
 
     def _get_n_outputs(self) -> int:
         if self.preprocessing is not None:
@@ -284,11 +308,6 @@ def __init__(
             )
         self.classifier = classifier
 
-    @property
-    def classes_(self) -> Sequence[Any]:
-        """[see superclass]"""
-        return self.final_estimator.classes_
-
     @property
     def final_estimator(self) -> T_FinalClassifierDF:
         """[see superclass]"""
@@ -299,6 +318,11 @@ def final_estimator_name(self) -> str:
         """[see superclass]"""
         return "classifier"
 
+    @property
+    def classes_(self) -> Sequence[Any]:
+        """[see superclass]"""
+        return self.final_estimator.classes_
+
     # noinspection PyPep8Naming
     def predict_proba(
         self, X: pd.DataFrame, **predict_params

diff --git a/src/sklearndf/pipeline/_pipeline.py b/src/sklearndf/pipeline/_pipeline.py
@@ -138,12 +138,12 @@ def _iter_not_none(
 
     def _get_features_original(self) -> pd.Series:
         col_mappings = [
-            df_transformer.features_original_
+            df_transformer.feature_names_original_
             for _, df_transformer in self._transformer_steps()
         ]
 
         if len(col_mappings) == 0:
-            _features_out: pd.Index = self.features_in_
+            _features_out: pd.Index = self.feature_names_in_
             _features_original: Union[np.ndarray, ExtensionArray] = _features_out.values
         else:
             _features_out: pd.Index = col_mappings[-1].index
@@ -175,9 +175,9 @@ def _get_features_original(self) -> pd.Series:
     def _get_features_out(self) -> pd.Index:
         for _, transformer in reversed(self.steps):
             if isinstance(transformer, TransformerDF):
-                return transformer.features_out_
+                return transformer.feature_names_out_
 
-        return self.features_in_
+        return self.feature_names_in_
 
 
 # noinspection PyAbstractClass
@@ -217,7 +217,8 @@ def _prepend_features_original(
         return pd.concat(
             objs=(
                 _prepend_features_original(
-                    features_original=transformer.features_original_, name_prefix=name
+                    features_original=transformer.feature_names_original_,
+                    name_prefix=name,
                 )
                 for name, transformer, _ in self.native_estimator._iter()
             )
@@ -233,7 +234,7 @@ def _get_features_out(self) -> pd.Index:
         # noinspection PyProtectedMember
         indices = [
             self._prepend_features_out(
-                features_out=transformer.features_out_, name_prefix=name
+                features_out=transformer.feature_names_out_, name_prefix=name
             )
             for name, transformer, _ in self.native_estimator._iter()
         ]