Skip to content

Commit

Permalink
Rename attributes 'feature_…_' to 'feature_names_…_' (#46)
Browse files Browse the repository at this point in the history
This commit
* renames iris_target to iris_target_name
* renames attributes 'feature_…_' to 'feature_names_…_'
* adjusts property names of fittable classes to scikit-learn conventions
  • Loading branch information
j-ittner authored Oct 15, 2020
1 parent c322f21 commit 04ca01c
Show file tree
Hide file tree
Showing 10 changed files with 110 additions and 82 deletions.
16 changes: 8 additions & 8 deletions sphinx/source/tutorial/sklearndf_tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,7 @@
"raw_mimetype": "text/restructuredtext"
},
"source": [
"The `~sklearndf.transformation.ColumnTransformerDF.features_original_` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
"The `~sklearndf.transformation.ColumnTransformerDF.feature_names_original_` attribute returns a series mapping the output columns (the series' index) to the input columns (the series' values):"
]
},
{
Expand Down Expand Up @@ -754,7 +754,7 @@
}
],
"source": [
"preprocessing_df.features_original_.to_frame().head(10)"
"preprocessing_df.feature_names_original_.to_frame().head(10)"
]
},
{
Expand Down Expand Up @@ -891,7 +891,7 @@
}
],
"source": [
"garage_type_derivatives = preprocessing_df.features_original_ == \"GarageType\"\n",
"garage_type_derivatives = preprocessing_df.feature_names_original_ == \"GarageType\"\n",
"\n",
"transformed_df.loc[:, garage_type_derivatives].head()"
]
Expand Down Expand Up @@ -1188,7 +1188,7 @@
"raw_mimetype": "text/restructuredtext"
},
"source": [
"Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `features_in_` returns the names of the ingoing features as a pandas index."
"Property `is_fitted` tells if the regressor is fitted, and -- for fitted estimators -- property `feature_names_in_` returns the names of the ingoing features as a pandas index."
]
},
{
Expand Down Expand Up @@ -1251,7 +1251,7 @@
}
],
"source": [
"random_forest_regressor_df.features_in_"
"random_forest_regressor_df.feature_names_in_"
]
},
{
Expand Down Expand Up @@ -2410,7 +2410,7 @@
}
],
"source": [
"boruta_pipeline.features_out_.to_list()"
"boruta_pipeline.feature_names_out_.to_list()"
]
},
{
Expand Down Expand Up @@ -2553,7 +2553,7 @@
}
],
"source": [
"boruta_pipeline.features_original_.to_frame()"
"boruta_pipeline.feature_names_original_.to_frame()"
]
},
{
Expand Down Expand Up @@ -2583,7 +2583,7 @@
}
],
"source": [
"boruta_pipeline.features_original_.unique()"
"boruta_pipeline.feature_names_original_.unique()"
]
}
],
Expand Down
6 changes: 3 additions & 3 deletions src/sklearndf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@
All estimators enhanced by `sklearndf` also implement an
additional attribute
:attr:`~EstimatorDF.features_in_`, keeping track of the
:attr:`~EstimatorDF.feature_names_in_`, keeping track of the
column names of the data
frame used to fit the estimator.
`sklearndf` transformers also implement
:attr:`~TransformerDF.features_out_` and
:attr:`~TransformerDF.features_original_`, keeping track
:attr:`~TransformerDF.feature_names_out_` and
:attr:`~TransformerDF.feature_names_original_`, keeping track
of the feature names of the
transformed outputs as well as mapping output features
back to the input features.
Expand Down
32 changes: 16 additions & 16 deletions src/sklearndf/_sklearndf.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def fit(
pass

@property
def features_in_(self) -> pd.Index:
def feature_names_in_(self) -> pd.Index:
"""
The pandas column index with the names of the features used to fit this
estimator.
Expand Down Expand Up @@ -220,7 +220,7 @@ def __init__(self, *args, **kwargs) -> None:
self._features_original = None

@property
def features_original_(self) -> pd.Series:
def feature_names_original_(self) -> pd.Series:
"""
A pandas series, mapping the output features resulting from the transformation
to the original input features.
Expand All @@ -238,7 +238,7 @@ def features_original_(self) -> pd.Series:
return self._features_original

@property
def features_out_(self) -> pd.Index:
def feature_names_out_(self) -> pd.Index:
"""
A pandas column index with the names of the features produced by this
transformer
Expand Down Expand Up @@ -301,8 +301,8 @@ def _get_features_original(self) -> pd.Series:

def _get_features_out(self) -> pd.Index:
# return a pandas index with this transformer's output columns
# default behaviour: get index returned by features_original_
return self.features_original_.index
# default behaviour: get index returned by feature_names_original_
return self.feature_names_original_.index


class RegressorDF(LearnerDF, RegressorMixin, metaclass=ABCMeta):
Expand All @@ -320,6 +320,17 @@ class ClassifierDF(LearnerDF, ClassifierMixin, metaclass=ABCMeta):
Provides enhanced support for data frames.
"""

@property
@abstractmethod
def classes_(self) -> Sequence[Any]:
"""
Get the classes predicted by this classifier.
By default expects classes as a list-like stored in the `classes_` attribute.
:return: the classes predicted by this classifier
"""
pass

# noinspection PyPep8Naming
@abstractmethod
def predict_proba(
Expand Down Expand Up @@ -385,14 +396,3 @@ def decision_function(
for multi-output classifiers, a list of one observation/class data frames \
per output
"""

@property
@abstractmethod
def classes_(self) -> Sequence[Any]:
"""
Get the classes predicted by this classifier.
By default expects classes as a list-like stored in the `classes_` attribute.
:return: the classes predicted by this classifier
"""
pass
12 changes: 6 additions & 6 deletions src/sklearndf/_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
native estimators they wrap.
The wrappers also implement the additional column attributes introduced by `sklearndf`,
:meth:`~EstimatorDF.features_in_`, :meth:`~TransformerDF.features_out_`, and
:meth:`~TransformerDF.features_original_`.
:meth:`~EstimatorDF.feature_names_in_`, :meth:`~TransformerDF.feature_names_out_`, and
:meth:`~TransformerDF.feature_names_original_`.
"""

import inspect
Expand Down Expand Up @@ -248,7 +248,7 @@ def _check_parameter_types(
raise TypeError("arg X must be a DataFrame")
if self.is_fitted:
_EstimatorWrapperDF._verify_df(
df_name="X argument", df=X, expected_columns=self.features_in_
df_name="X argument", df=X, expected_columns=self.feature_names_in_
)
if y is not None and not isinstance(y, (pd.Series, pd.DataFrame)):
raise TypeError("arg y must be None, or a pandas Series or DataFrame")
Expand Down Expand Up @@ -361,7 +361,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
transformed = self._transform(X)

return self._transformed_to_df(
transformed=transformed, index=X.index, columns=self.features_out_
transformed=transformed, index=X.index, columns=self.feature_names_out_
)

# noinspection PyPep8Naming
Expand All @@ -383,7 +383,7 @@ def fit_transform(
) from cause

return self._transformed_to_df(
transformed=transformed, index=X.index, columns=self.features_out_
transformed=transformed, index=X.index, columns=self.feature_names_out_
)

# noinspection PyPep8Naming
Expand All @@ -396,7 +396,7 @@ def inverse_transform(self, X: pd.DataFrame) -> pd.DataFrame:
transformed = self._inverse_transform(X)

return self._transformed_to_df(
transformed=transformed, index=X.index, columns=self.features_in_
transformed=transformed, index=X.index, columns=self.feature_names_in_
)

def _reset_fit(self) -> None:
Expand Down
64 changes: 44 additions & 20 deletions src/sklearndf/pipeline/_learner_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,14 @@ def __init__(self, *, preprocessing: Optional[TransformerDF] = None) -> None:
f"{type(preprocessing).__name__}"
)

self.preprocessing = preprocessing
self._preprocessing = preprocessing

@property
def preprocessing(self) -> Optional[TransformerDF]:
"""
The preprocessing step.
"""
return self._preprocessing

@property
@abstractmethod
Expand All @@ -70,6 +77,35 @@ def final_estimator_name(self) -> str:
"""
pass

@property
def feature_names_out_(self) -> pd.Index:
"""
Pandas column index of all features resulting from the preprocessing step.
Same as :attr:`.feature_names_in_` if the preprocessing step is ``None``.
"""
if self.preprocessing is not None:
return self.preprocessing.feature_names_out_
else:
return self.feature_names_in_.rename(TransformerDF.COL_FEATURE_OUT)

@property
def feature_names_original_(self) -> pd.Series:
"""
Pandas series mapping the names of all features resulting from the preprocessing
step to the names of the input features they were derived from.
Returns an identity mapping of :attr:`.feature_names_in_` onto itself
if the preprocessing step is ``None``.
"""
if self.preprocessing is not None:
return self.preprocessing.feature_names_original_
else:
feature_names_in_ = self.feature_names_in_
return feature_names_in_.to_series(index=feature_names_in_).rename_axis(
index=TransformerDF.COL_FEATURE_OUT
)

# noinspection PyPep8Naming
def fit(
self: T,
Expand Down Expand Up @@ -122,18 +158,6 @@ def fit(

return self

@property
def features_out_(self) -> pd.Index:
"""
Pandas column index of all features resulting from the preprocessing step.
Same as :attr:`.features_in_` if the preprocessing step is ``None``.
"""
if self.preprocessing is not None:
return self.preprocessing.features_out_
else:
return self.features_in_.rename(TransformerDF.COL_FEATURE_OUT)

@property
def is_fitted(self) -> bool:
"""[see superclass]"""
Expand All @@ -143,9 +167,9 @@ def is_fitted(self) -> bool:

def _get_features_in(self) -> pd.Index:
if self.preprocessing is not None:
return self.preprocessing.features_in_
return self.preprocessing.feature_names_in_
else:
return self.final_estimator.features_in_
return self.final_estimator.feature_names_in_

def _get_n_outputs(self) -> int:
if self.preprocessing is not None:
Expand Down Expand Up @@ -284,11 +308,6 @@ def __init__(
)
self.classifier = classifier

@property
def classes_(self) -> Sequence[Any]:
"""[see superclass]"""
return self.final_estimator.classes_

@property
def final_estimator(self) -> T_FinalClassifierDF:
"""[see superclass]"""
Expand All @@ -299,6 +318,11 @@ def final_estimator_name(self) -> str:
"""[see superclass]"""
return "classifier"

@property
def classes_(self) -> Sequence[Any]:
"""[see superclass]"""
return self.final_estimator.classes_

# noinspection PyPep8Naming
def predict_proba(
self, X: pd.DataFrame, **predict_params
Expand Down
13 changes: 7 additions & 6 deletions src/sklearndf/pipeline/_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,12 @@ def _iter_not_none(

def _get_features_original(self) -> pd.Series:
col_mappings = [
df_transformer.features_original_
df_transformer.feature_names_original_
for _, df_transformer in self._transformer_steps()
]

if len(col_mappings) == 0:
_features_out: pd.Index = self.features_in_
_features_out: pd.Index = self.feature_names_in_
_features_original: Union[np.ndarray, ExtensionArray] = _features_out.values
else:
_features_out: pd.Index = col_mappings[-1].index
Expand Down Expand Up @@ -175,9 +175,9 @@ def _get_features_original(self) -> pd.Series:
def _get_features_out(self) -> pd.Index:
for _, transformer in reversed(self.steps):
if isinstance(transformer, TransformerDF):
return transformer.features_out_
return transformer.feature_names_out_

return self.features_in_
return self.feature_names_in_


# noinspection PyAbstractClass
Expand Down Expand Up @@ -217,7 +217,8 @@ def _prepend_features_original(
return pd.concat(
objs=(
_prepend_features_original(
features_original=transformer.features_original_, name_prefix=name
features_original=transformer.feature_names_original_,
name_prefix=name,
)
for name, transformer, _ in self.native_estimator._iter()
)
Expand All @@ -233,7 +234,7 @@ def _get_features_out(self) -> pd.Index:
# noinspection PyProtectedMember
indices = [
self._prepend_features_out(
features_out=transformer.features_out_, name_prefix=name
features_out=transformer.feature_names_out_, name_prefix=name
)
for name, transformer, _ in self.native_estimator._iter()
]
Expand Down
Loading

0 comments on commit 04ca01c

Please # to comment.