diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 00000000..2395db87 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,6 @@ +[isort] +profile=black +src_paths=src,test +known_local_folder=sklearndf,test +known_first_party=pytools +known_third_party=numpy,pandas,joblib,sklearn,matplot \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3cb791c2..6a85fbe7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,9 @@ repos: + - repo: https://github.com/PyCQA/isort + rev: 5.5.4 + hooks: + - id: isort + - repo: https://github.com/psf/black rev: 20.8b1 hooks: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5a0b2ec9..9e252502 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -31,12 +31,52 @@ variables: }} stages: + # Check code quality first to fail fast (isort, flake8, black) + - stage: code_quality_checks + displayName: 'Code quality' + jobs: + - job: + displayName: 'isort' + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.7.*' + displayName: 'use Python 3.7' + - script: | + python -m pip install isort==5.5.4 + python -m isort --check --diff . + displayName: 'isort' + - job: + displayName: 'black' + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.7.*' + displayName: 'use Python 3.7' + - script: | + python -m pip install black==20.8b1 + python -m black --check . + displayName: 'black' + - job: + displayName: 'flake8' + steps: + - task: UsePythonVersion@0 + inputs: + versionSpec: '3.7.*' + displayName: 'use Python 3.7' + - script: | + python -m pip install flake8 flake8-comprehensions flake8-import-order + python -m flake8 --config flake8.ini -v . + displayName: 'Run flake8' + + # detect whether the conda build config was changed -> then we must run a build test - stage: detect_conda_changes - displayName: 'detect conda changes' + displayName: 'Conda build config' jobs: - job: checkout_and_diff + displayName: 'detect changes' steps: - checkout: self @@ -57,14 +97,14 @@ stages: - stage: - displayName: 'simple pytest' + displayName: 'Unit tests' dependsOn: 'detect_conda_changes' variables: conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ] jobs: - job: - displayName: 'pytest @ develop environment' + displayName: 'in develop environment' condition: > and( ne(variables.master_or_release, 'True'), @@ -77,8 +117,8 @@ stages: steps: - task: UsePythonVersion@0 inputs: - versionSpec: '3.7' - displayName: 'Use Python 3.7' + versionSpec: '3.7.*' + displayName: 'use Python 3.7' - checkout: self - checkout: pytools @@ -121,14 +161,14 @@ stages: # - FOR RELEASE BRANCH: 3 BUILD TESTS # - OTHERWISE: 1 BUILD TEST - stage: - displayName: 'conda build & pytest' + displayName: 'Conda build & test' dependsOn: 'detect_conda_changes' variables: conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ] jobs: - job: - displayName: '(single)' + displayName: 'single' condition: > and( ne(variables.master_or_release, 'True'), @@ -185,7 +225,7 @@ stages: displayName: "build & test conda package" - job: - displayName: '(matrix)' + displayName: 'matrix' condition: eq(variables.master_or_release, 'True') pool: @@ -225,7 +265,6 @@ stages: FACET_V_BORUTA: '>=0.3' FACET_V_LGBM: '>=2.2' FACET_V_JOBLIB: '>=0.13' - steps: - task: UsePythonVersion@0 inputs: diff --git a/environment.yml b/environment.yml index 5fecca68..263c7308 100644 --- a/environment.yml +++ b/environment.yml @@ -8,6 +8,9 @@ dependencies: - conda-build - conda-verify - docutils + - flake8 = 3.8.* + - flake8-comprehensions = 3.2.* + - isort = 5.5.* - joblib = 0.16.* - jupyter >= 1.0 - lightgbm = 3.0.* diff --git a/flake8.ini b/flake8.ini new file mode 100644 index 00000000..699f9983 --- /dev/null +++ b/flake8.ini @@ -0,0 +1,24 @@ +[flake8] + +max-line-length = 88 + +show-source = true + +ignore = + E203, # space before : (needed for how black formats slicing) + W503, # line break before binary operator + W504, # line break after binary operator + E402, # module level import not at top of file + E731, # do not assign a lambda expression, use a def + E741, # ignore not easy to read variables like i l I etc + C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal + S001, # found modulo formatter (incorrect picks up mod operations) + +per-file-ignores = + __init__.py: F401, F403, F405 + +exclude = + .eggs/*.py, + venv/*, + .venv/*, + .git/* diff --git a/sphinx/source/conf.py b/sphinx/source/conf.py index c1f41562..80707039 100644 --- a/sphinx/source/conf.py +++ b/sphinx/source/conf.py @@ -21,7 +21,7 @@ ) ) -from conf_base import * +from conf_base import set_config # ----- custom configuration ----- diff --git a/src/sklearndf/__init__.py b/src/sklearndf/__init__.py index 2242d146..5211c715 100644 --- a/src/sklearndf/__init__.py +++ b/src/sklearndf/__init__.py @@ -1,55 +1,84 @@ """ -`sklearndf` augments more than 150 scikit-learn estimators for native support of data +`sklearndf` augments more than 150 scikit-learn estimators for +native support of data frames, while leaving the original API intact. -Augmented scikit-learn classes are named after their native scikit-learn counterparts, +Augmented scikit-learn classes are named after their native +scikit-learn counterparts, with `DF` added as a suffix: :class:`.SimpleImputerDF` takes the place of :class:`~sklearn.impute.SimpleImputer`, :class:`.RandomForestRegressorDF` takes the place of :class:`~sklearn.ensemble.RandomForestRegressor`, and so on. -For all methods expecting an `X` argument for a feature matrix and potentially a -`y` argument for one or more targets, `sklearndf` estimators expect a pandas -:class:`~pandas.DataFrame` for `X` and a pandas :class:`~pandas.Series` for a -1-dimensional `y`, or a pandas :class:`~pandas.DataFrame` when fitting to multiple +For all methods expecting an `X` argument for a +feature matrix and potentially a +`y` argument for one or more targets, `sklearndf` +estimators expect a pandas +:class:`~pandas.DataFrame` for `X` and a +pandas :class:`~pandas.Series` for a +1-dimensional `y`, or a pandas :class:`~pandas.DataFrame` +when fitting to multiple targets or outputs. This includes methods such as :meth:`~EstimatorDF.fit`, -:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`, and so on. +:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`, +and so on. -All estimators enhanced by `sklearndf` also implement an additional attribute -:attr:`~EstimatorDF.features_in_`, keeping track of the column names of the data +All estimators enhanced by `sklearndf` also implement an +additional attribute +:attr:`~EstimatorDF.features_in_`, keeping track of the +column names of the data frame used to fit the estimator. -`sklearndf` transformers also implement :attr:`~TransformerDF.features_out_` and -:attr:`~TransformerDF.features_original_`, keeping track of the feature names of the -transformed outputs as well as mapping output features back to the input features. -This enables tracing features back to the original inputs even across complex +`sklearndf` transformers also implement +:attr:`~TransformerDF.features_out_` and +:attr:`~TransformerDF.features_original_`, keeping track +of the feature names of the +transformed outputs as well as mapping output features +back to the input features. +This enables tracing features back to the original +inputs even across complex pipelines (see allso :class:`.PipelineDF`) -`sklearndf` classes implement a class hierarchy that follows the taxonomy of -scikit-learn classes (but is only partially reflected via class inheritance in the +`sklearndf` classes implement a class hierarchy that +follows the taxonomy of +scikit-learn classes (but is only partially reflected +via class inheritance in the original implementation): -- all `sklearndf` transformers are subclasses of :class:`.TransformerDF`, which \ - in turn provides the API for all common transformer methods, e.g., \ +- all `sklearndf` transformers are subclasses of +:class:`.TransformerDF`, which \ + in turn provides the API for all common transformer + methods, e.g., \ :meth:`~TransformerDF.transform` -- all `sklearndf` regressors are subclasses of :class:`.RegressorDF`, which \ - in turn provides the API for all common regressor methods, e.g., \ +- all `sklearndf` regressors are subclasses +of :class:`.RegressorDF`, which \ + in turn provides the API for all common regressor + methods, e.g., \ :meth:`~LearnerDF.predict` -- all `sklearndf` classifiers are subclasses of :class:`.ClassifierDF`, which \ - in turn provides the API for all common classifier methods, e.g., \ +- all `sklearndf` classifiers are subclasses of :class: +`.ClassifierDF`, which \ + in turn provides the API for all common classifier + methods, e.g., \ :meth:`~ClassifierDF.predict_proba` -- all `sklearndf` regressors and classifiers are subclasses of :class:`.LearnerDF` -- all `sklearndf` estimators are subclasses of :class:`.EstimatorDF` +- all `sklearndf` regressors and classifiers are +subclasses of :class:`.LearnerDF` +- all `sklearndf` estimators are subclasses of +:class:`.EstimatorDF` -`sklearndf` introduces two additional pipeline classes, :class:`.RegressorPipelineDF` and -:class:`.ClassifierPipelineDF`, with an abstract base class :class:`.LearnerPipelineDF`, -to allow for easier handling of common types of ML pipelines. -These classes implement pipelines with two steps -- one preprocessing step, followed by +`sklearndf` introduces two additional pipeline classes, +:class:`.RegressorPipelineDF` and +:class:`.ClassifierPipelineDF`, with an abstract base +class :class:`.LearnerPipelineDF`, +to allow for easier handling of common types of ML +pipelines. +These classes implement pipelines with two steps -- +ne preprocessing step, followed by a regressor or a classifier. -`sklearndf` also provides data frame support for a selection of custom or 3rd-party -estimators, most notably :class:`.BorutaDF`, :class:`.LGBMRegressorDF`, and +`sklearndf` also provides data frame support for a +selection of custom or 3rd-party +estimators, most notably :class:`.BorutaDF`, +:class:`.LGBMRegressorDF`, and :class:`.LGBMClassifierDF`. All `sklearndf` estimators are fully type hinted. @@ -57,5 +86,4 @@ from ._sklearndf import * - __version__ = "1.0.0" diff --git a/src/sklearndf/_wrapper.py b/src/sklearndf/_wrapper.py index c2b56348..ce22fff1 100644 --- a/src/sklearndf/_wrapper.py +++ b/src/sklearndf/_wrapper.py @@ -17,19 +17,19 @@ from abc import ABCMeta, abstractmethod from functools import update_wrapper from typing import ( - Optional, - Iterable, - Generic, - Mapping, - Sequence, Any, - Union, - Callable, - TypeVar, - Type, AnyStr, + Callable, Dict, + Generic, + Iterable, List, + Mapping, + Optional, + Sequence, + Type, + TypeVar, + Union, ) import numpy as np @@ -43,6 +43,7 @@ ) from pytools.api import inheritdoc + from sklearndf import ClassifierDF, EstimatorDF, LearnerDF, RegressorDF, TransformerDF log = logging.getLogger(__name__) @@ -969,13 +970,13 @@ def _update_class_docstring( df_estimator_type.__doc__ = "\n".join( [ *tag_line, - f""" - .. note:: - This class is a wrapper around class :class:`{estimator_name}`. - - It provides enhanced support for pandas data frames, and otherwise replicates - all parameters and behaviours of class :class:`~{estimator_name}`. -""", + ( + f".. note::This class is a wrapper around class \n" + f":class:{estimator_name}. It provides enhanced support for " + f"pandas data frames, \n" + f"and otherwise replicates all parameters and \n" + f"behaviours of class :class:~{estimator_name}." + ), *(base_doc_lines if INCLUDE_FULL_SKLEARN_DOCUMENTATION else []), ] ) diff --git a/src/sklearndf/classification/__init__.py b/src/sklearndf/classification/__init__.py index 3f96cfca..92100dad 100644 --- a/src/sklearndf/classification/__init__.py +++ b/src/sklearndf/classification/__init__.py @@ -12,4 +12,3 @@ if __parse_version(__sklearn_version__) >= __parse_version("0.23"): from ._classification_v0_23 import * - diff --git a/src/sklearndf/classification/_classification.py b/src/sklearndf/classification/_classification.py index 19780e40..2e55f271 100644 --- a/src/sklearndf/classification/_classification.py +++ b/src/sklearndf/classification/_classification.py @@ -46,7 +46,7 @@ ) from sklearn.neural_network import MLPClassifier from sklearn.semi_supervised import LabelPropagation, LabelSpreading -from sklearn.svm import LinearSVC, NuSVC, SVC +from sklearn.svm import SVC, LinearSVC, NuSVC from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier from .. import ClassifierDF @@ -102,6 +102,8 @@ # # noinspection PyAbstractClass + + @df_estimator(df_wrapper_type=_ClassifierWrapperDF) class DummyClassifierDF(ClassifierDF, DummyClassifier): """ @@ -418,17 +420,6 @@ class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV): pass -# noinspection PyAbstractClass -@df_estimator(df_wrapper_type=_ClassifierWrapperDF) -class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV): - """ - Wraps :class:`sklearn.linear_model.logistic.LogisticRegressionCV`; accepts and - returns data frames. - """ - - pass - - # noinspection PyAbstractClass @df_estimator(df_wrapper_type=_ClassifierWrapperDF) class PassiveAggressiveClassifierDF(ClassifierDF, PassiveAggressiveClassifier): diff --git a/src/sklearndf/classification/_classification_v0_22.py b/src/sklearndf/classification/_classification_v0_22.py index 8e812ee4..f666e603 100644 --- a/src/sklearndf/classification/_classification_v0_22.py +++ b/src/sklearndf/classification/_classification_v0_22.py @@ -9,11 +9,7 @@ from sklearn.naive_bayes import CategoricalNB from .. import ClassifierDF -from .._wrapper import ( - _ClassifierWrapperDF, - _StackingClassifierWrapperDF, - df_estimator, -) +from .._wrapper import _ClassifierWrapperDF, _StackingClassifierWrapperDF, df_estimator log = logging.getLogger(__name__) @@ -28,7 +24,9 @@ # # noinspection PyAbstractClass -@df_estimator(df_wrapper_type=_ClassifierWrapperDF,) + + +@df_estimator(df_wrapper_type=_ClassifierWrapperDF) class CategoricalNBDF(ClassifierDF, CategoricalNB): """ Wraps :class:`sklearn.naive_bayes.CategoricalNB`; accepts and returns data frames. @@ -41,7 +39,8 @@ class CategoricalNBDF(ClassifierDF, CategoricalNB): @df_estimator(df_wrapper_type=_StackingClassifierWrapperDF) class StackingClassifierDF(ClassifierDF, StackingClassifier): """ - Wraps :class:`sklearn.ensemble._stacking.StackingClassifier`; accepts and returns data frames. + Wraps :class:`sklearn.ensemble._stacking.StackingClassifier`; + accepts and returns data frames. """ pass diff --git a/src/sklearndf/classification/extra/__init__.py b/src/sklearndf/classification/extra/__init__.py index a2abc5f7..c06faf15 100644 --- a/src/sklearndf/classification/extra/__init__.py +++ b/src/sklearndf/classification/extra/__init__.py @@ -2,4 +2,3 @@ Additional 3rd party classifiers that implement the Scikit-Learn interface. """ from ._extra import * - diff --git a/src/sklearndf/pipeline/__init__.py b/src/sklearndf/pipeline/__init__.py index 0b503f74..08cfdc2d 100644 --- a/src/sklearndf/pipeline/__init__.py +++ b/src/sklearndf/pipeline/__init__.py @@ -4,4 +4,3 @@ """ from ._learner_pipeline import * from ._pipeline import * - diff --git a/src/sklearndf/pipeline/_learner_pipeline.py b/src/sklearndf/pipeline/_learner_pipeline.py index 1e0dff77..34a19d5c 100644 --- a/src/sklearndf/pipeline/_learner_pipeline.py +++ b/src/sklearndf/pipeline/_learner_pipeline.py @@ -10,6 +10,7 @@ from sklearn.base import BaseEstimator from pytools.api import inheritdoc + from .. import ClassifierDF, EstimatorDF, LearnerDF, RegressorDF, TransformerDF log = logging.getLogger(__name__) diff --git a/src/sklearndf/regression/__init__.py b/src/sklearndf/regression/__init__.py index 259e9814..652e62bb 100644 --- a/src/sklearndf/regression/__init__.py +++ b/src/sklearndf/regression/__init__.py @@ -12,4 +12,3 @@ if __parse_version(__sklearn_version__) >= __parse_version("0.23"): from ._regression_v0_23 import * - diff --git a/src/sklearndf/regression/_regression.py b/src/sklearndf/regression/_regression.py index a201e199..f75ee7e3 100644 --- a/src/sklearndf/regression/_regression.py +++ b/src/sklearndf/regression/_regression.py @@ -51,7 +51,7 @@ from sklearn.multioutput import MultiOutputRegressor, RegressorChain from sklearn.neighbors import KNeighborsRegressor, RadiusNeighborsRegressor from sklearn.neural_network import MLPRegressor -from sklearn.svm import LinearSVR, NuSVR, SVR +from sklearn.svm import SVR, LinearSVR, NuSVR from sklearn.tree import DecisionTreeRegressor, ExtraTreeRegressor from .. import RegressorDF, TransformerDF diff --git a/src/sklearndf/regression/_regression_v0_23.py b/src/sklearndf/regression/_regression_v0_23.py index daa833f8..df87a30e 100644 --- a/src/sklearndf/regression/_regression_v0_23.py +++ b/src/sklearndf/regression/_regression_v0_23.py @@ -36,6 +36,8 @@ # GLM regressors added with v0.23 # # noinspection PyAbstractClass + + @df_estimator(df_wrapper_type=_RegressorWrapperDF) class PoissonRegressorDF(RegressorDF, PoissonRegressor): """ diff --git a/src/sklearndf/transformation/__init__.py b/src/sklearndf/transformation/__init__.py index c076d24c..b82ba4b3 100644 --- a/src/sklearndf/transformation/__init__.py +++ b/src/sklearndf/transformation/__init__.py @@ -12,4 +12,3 @@ if __parse_version(__sklearn_version__) >= __parse_version("0.23"): from ._transformation_v0_23 import * - diff --git a/src/sklearndf/transformation/_transformation.py b/src/sklearndf/transformation/_transformation.py index ffa2df90..c90cfe11 100644 --- a/src/sklearndf/transformation/_transformation.py +++ b/src/sklearndf/transformation/_transformation.py @@ -27,6 +27,8 @@ from sklearn.compose import ColumnTransformer from sklearn.cross_decomposition import PLSSVD from sklearn.decomposition import ( + NMF, + PCA, DictionaryLearning, FactorAnalysis, FastICA, @@ -35,8 +37,6 @@ LatentDirichletAllocation, MiniBatchDictionaryLearning, MiniBatchSparsePCA, - NMF, - PCA, SparseCoder, SparsePCA, TruncatedSVD, @@ -45,9 +45,9 @@ from sklearn.feature_extraction import DictVectorizer, FeatureHasher from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer from sklearn.feature_selection import ( - GenericUnivariateSelect, RFE, RFECV, + GenericUnivariateSelect, SelectFdr, SelectFpr, SelectFromModel, @@ -88,6 +88,8 @@ ) from sklearn.random_projection import GaussianRandomProjection, SparseRandomProjection +from .. import TransformerDF +from .._wrapper import _TransformerWrapperDF, df_estimator from ._wrapper import ( _BaseDimensionalityReductionWrapperDF, _BaseMultipleInputsPerOutputTransformerWrapperDF, @@ -96,8 +98,6 @@ _FeatureSelectionWrapperDF, _NComponentsDimensionalityReductionWrapperDF, ) -from .. import TransformerDF -from .._wrapper import _TransformerWrapperDF, df_estimator log = logging.getLogger(__name__) diff --git a/src/sklearndf/transformation/_transformation_v0_22.py b/src/sklearndf/transformation/_transformation_v0_22.py index a515552e..d209d82c 100644 --- a/src/sklearndf/transformation/_transformation_v0_22.py +++ b/src/sklearndf/transformation/_transformation_v0_22.py @@ -21,9 +21,9 @@ from sklearn.impute import KNNImputer -from ._transformation import _ImputerWrapperDF from .. import TransformerDF from .._wrapper import df_estimator +from ._transformation import _ImputerWrapperDF log = logging.getLogger(__name__) @@ -35,7 +35,9 @@ # impute # -# noinspection PyAbstractClass +# noinspection PyAbstractClass$ + + @df_estimator(df_wrapper_type=_ImputerWrapperDF) class KNNImputerDF(TransformerDF, KNNImputer): """ diff --git a/src/sklearndf/transformation/extra/__init__.py b/src/sklearndf/transformation/extra/__init__.py index 2fea03ca..b991d456 100644 --- a/src/sklearndf/transformation/extra/__init__.py +++ b/src/sklearndf/transformation/extra/__init__.py @@ -2,4 +2,3 @@ Additional 3rd party transformers that implement the Scikit-Learn interface. """ from ._extra import * - diff --git a/src/sklearndf/transformation/extra/_extra.py b/src/sklearndf/transformation/extra/_extra.py index 253f8188..9ea5b94b 100644 --- a/src/sklearndf/transformation/extra/_extra.py +++ b/src/sklearndf/transformation/extra/_extra.py @@ -10,9 +10,9 @@ from boruta import BorutaPy from sklearn.base import BaseEstimator -from .._wrapper import _ColumnSubsetTransformerWrapperDF, _NDArrayTransformerWrapperDF from ... import TransformerDF from ..._wrapper import _MetaEstimatorWrapperDF, df_estimator +from .._wrapper import _ColumnSubsetTransformerWrapperDF, _NDArrayTransformerWrapperDF log = logging.getLogger(__name__) diff --git a/test/test/__init__.py b/test/test/__init__.py index e8dfc804..173143ab 100644 --- a/test/test/__init__.py +++ b/test/test/__init__.py @@ -1,7 +1,8 @@ # noinspection PyPackageRequirements +import sklearn import yaml from packaging import version -import sklearn + from test.paths import TEST_CONFIG_YML diff --git a/test/test/conftest.py b/test/test/conftest.py index a51ebb86..4a8be40b 100644 --- a/test/test/conftest.py +++ b/test/test/conftest.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd + # noinspection PyPackageRequirements import pytest import sklearn diff --git a/test/test/sklearndf/pipeline/test_pipeline_df.py b/test/test/sklearndf/pipeline/test_pipeline_df.py index 55d98dc8..70accd9b 100644 --- a/test/test/sklearndf/pipeline/test_pipeline_df.py +++ b/test/test/sklearndf/pipeline/test_pipeline_df.py @@ -23,7 +23,7 @@ from sklearndf import TransformerDF from sklearndf._wrapper import df_estimator -from sklearndf.classification import LogisticRegressionDF, SVCDF +from sklearndf.classification import SVCDF, LogisticRegressionDF from sklearndf.pipeline import PipelineDF from sklearndf.regression import DummyRegressorDF, LassoDF, LinearRegressionDF from sklearndf.transformation import SelectKBestDF, SimpleImputerDF @@ -31,8 +31,8 @@ def test_set_params_nested_pipeline_df() -> None: - """ Test parameter setting for nested pipelines - adapted from - sklearn.tests.test_pipeline """ + """Test parameter setting for nested pipelines - adapted from + sklearn.tests.test_pipeline""" PipelineDF([("b", SimpleImputerDF(strategy="median"))]) @@ -43,8 +43,7 @@ def test_set_params_nested_pipeline_df() -> None: class NoFit(BaseEstimator): - """Small class to test parameter dispatching. - """ + """Small class to test parameter dispatching.""" def __init__(self, a: str = None, b: str = None) -> None: self.a = a @@ -114,8 +113,8 @@ class NoTransformerDF(TransformerDF, NoTransformer): def test_pipeline_df_memory( iris_features: pd.DataFrame, iris_target_sr: pd.Series ) -> None: - """ Test memory caching in PipelineDF - taken almost 1:1 from - sklearn.tests.test_pipeline """ + """Test memory caching in PipelineDF - taken almost 1:1 from + sklearn.tests.test_pipeline""" cache_dir = mkdtemp() diff --git a/test/test/sklearndf/test_base.py b/test/test/sklearndf/test_base.py index 1210f7e9..e35e47de 100644 --- a/test/test/sklearndf/test_base.py +++ b/test/test/sklearndf/test_base.py @@ -3,6 +3,7 @@ from abc import ABCMeta import numpy as np + # noinspection PyPackageRequirements import scipy.sparse as sp from numpy.testing import assert_array_equal, assert_raises @@ -12,9 +13,10 @@ from sklearn.pipeline import Pipeline from sklearndf import EstimatorDF + # noinspection PyProtectedMember from sklearndf._wrapper import _EstimatorWrapperDF, df_estimator -from sklearndf.classification import DecisionTreeClassifierDF, SVCDF +from sklearndf.classification import SVCDF, DecisionTreeClassifierDF from sklearndf.pipeline import PipelineDF from sklearndf.transformation import OneHotEncoderDF diff --git a/test/test/sklearndf/test_classification.py b/test/test/sklearndf/test_classification.py index f7c44f42..c879cbcf 100644 --- a/test/test/sklearndf/test_classification.py +++ b/test/test/sklearndf/test_classification.py @@ -99,12 +99,7 @@ def test_wrapped_fit_predict( assert len(predictions) == len(iris_target_sr) assert np.all(predictions.isin(classes)) - # test predict_proba & predict_log_proba only if the root classifier has them: - test_funcs = [ - getattr(classifier, attr) - for attr in ["predict_proba", "predict_log_proba"] - if hasattr(classifier.native_estimator, attr) - ] + # test predict_proba & predict_log_proba: for method_name in ["predict_proba", "predict_log_proba"]: method = getattr(classifier, method_name, None) diff --git a/test/test/sklearndf/test_regression.py b/test/test/sklearndf/test_regression.py index fc4b439c..27df231a 100644 --- a/test/test/sklearndf/test_regression.py +++ b/test/test/sklearndf/test_regression.py @@ -8,10 +8,10 @@ from sklearndf import RegressorDF, TransformerDF from sklearndf._wrapper import _EstimatorWrapperDF from sklearndf.regression import ( + SVRDF, IsotonicRegressionDF, LinearRegressionDF, RandomForestRegressorDF, - SVRDF, ) from test.sklearndf import check_expected_not_fitted_error, list_classes diff --git a/test/test/sklearndf/test_sklearn_coverage.py b/test/test/sklearndf/test_sklearn_coverage.py index 3cfaab66..a5b76b16 100644 --- a/test/test/sklearndf/test_sklearn_coverage.py +++ b/test/test/sklearndf/test_sklearn_coverage.py @@ -195,8 +195,8 @@ def test_transformer_coverage(sklearn_transformer_cls: Type[TransformerMixin]) - argnames="sklearn_pipeline_cls", argvalues=sklearn_pipeline_classes() ) def test_pipeline_coverage(sklearn_pipeline_cls: Type) -> None: - """ Check if each sklearn pipeline estimator has - a wrapped sklearndf counterpart. """ + """Check if each sklearn pipeline estimator has + a wrapped sklearndf counterpart.""" sklearn_classes = sklearn_delegate_classes(sklearndf.pipeline) diff --git a/test/test/sklearndf/transformation/test_transformation.py b/test/test/sklearndf/transformation/test_transformation.py index cd1e4b41..31c8d14c 100644 --- a/test/test/sklearndf/transformation/test_transformation.py +++ b/test/test/sklearndf/transformation/test_transformation.py @@ -13,12 +13,12 @@ from sklearndf import TransformerDF from sklearndf.classification import RandomForestClassifierDF from sklearndf.transformation import ( + RFECVDF, + RFEDF, ColumnTransformerDF, KBinsDiscretizerDF, NormalizerDF, OneHotEncoderDF, - RFECVDF, - RFEDF, SelectFromModelDF, SparseCoderDF, )