BCG-X-Official · j-ittner · Oct 1, 2020 · Oct 1, 2020 · Oct 1, 2020 · Oct 1, 2020
diff --git a/.isort.cfg b/.isort.cfg
@@ -0,0 +1,6 @@
+[isort]
+profile=black
+src_paths=src,test
+known_local_folder=sklearndf,test
+known_first_party=pytools
+known_third_party=numpy,pandas,joblib,sklearn,matplot
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,9 @@
 repos:
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.5.4
+    hooks:
+      - id: isort
+
   - repo: https://github.com/psf/black
     rev: 20.8b1
     hooks:

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -31,12 +31,52 @@ variables:
     }}
 
 stages:
+  # Check code quality first to fail fast (isort, flake8, black)
+  - stage: code_quality_checks
+    displayName: 'Code quality'
+    jobs:
+      - job:
+        displayName: 'isort'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install isort==5.5.4
+              python -m isort --check --diff .
+            displayName: 'isort'
+      - job:
+        displayName: 'black'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install black==20.8b1
+              python -m black --check .
+            displayName: 'black'
+      - job:
+        displayName: 'flake8'
+        steps:
+          - task: UsePythonVersion@0
+            inputs:
+              versionSpec: '3.7.*'
+            displayName: 'use Python 3.7'
+          - script: |
+              python -m pip install flake8 flake8-comprehensions flake8-import-order
+              python -m flake8 --config flake8.ini -v .
+            displayName: 'Run flake8'
+
+  # detect whether the conda build config was changed -> then we must run a build test
   - stage: detect_conda_changes
-    displayName: 'detect conda changes'
+    displayName: 'Conda build config'
 
     jobs:
 
       - job: checkout_and_diff
+        displayName: 'detect changes'
         steps:
           - checkout: self
 
@@ -57,14 +97,14 @@ stages:
 
 
   - stage:
-    displayName: 'simple pytest'
+    displayName: 'Unit tests'
     dependsOn: 'detect_conda_changes'
     variables:
       conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]
 
     jobs:
     - job:
-      displayName: 'pytest @ develop environment'
+      displayName: 'in develop environment'
       condition: >
         and(
         ne(variables.master_or_release, 'True'),
@@ -77,8 +117,8 @@ stages:
       steps:
         - task: UsePythonVersion@0
           inputs:
-            versionSpec: '3.7'
-          displayName: 'Use Python 3.7'
+            versionSpec: '3.7.*'
+          displayName: 'use Python 3.7'
 
         - checkout: self
         - checkout: pytools
@@ -121,14 +161,14 @@ stages:
   # - FOR RELEASE BRANCH: 3 BUILD TESTS
   # - OTHERWISE: 1 BUILD TEST
   - stage:
-    displayName: 'conda build & pytest'
+    displayName: 'Conda build & test'
     dependsOn: 'detect_conda_changes'
     variables:
       conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]
 
     jobs:
     - job:
-      displayName: '(single)'
+      displayName: 'single'
       condition: >
         and(
         ne(variables.master_or_release, 'True'),
@@ -185,7 +225,7 @@ stages:
           displayName: "build & test conda package"
 
     - job:
-      displayName: '(matrix)'
+      displayName: 'matrix'
       condition: eq(variables.master_or_release, 'True')
 
       pool:
@@ -225,7 +265,6 @@ stages:
             FACET_V_BORUTA: '>=0.3'
             FACET_V_LGBM: '>=2.2'
             FACET_V_JOBLIB: '>=0.13'
-
       steps:
         - task: UsePythonVersion@0
           inputs:

diff --git a/environment.yml b/environment.yml
@@ -8,6 +8,9 @@ dependencies:
   - conda-build
   - conda-verify
   - docutils
+  - flake8 = 3.8.*
+  - flake8-comprehensions = 3.2.*
+  - isort = 5.5.*
   - joblib = 0.16.*
   - jupyter >= 1.0
   - lightgbm = 3.0.*

diff --git a/flake8.ini b/flake8.ini
@@ -0,0 +1,24 @@
+[flake8]
+
+max-line-length = 88
+
+show-source = true
+
+ignore =
+    E203,  # space before : (needed for how black formats slicing)
+    W503,  # line break before binary operator
+    W504,  # line break after binary operator
+    E402,  # module level import not at top of file
+    E731,  # do not assign a lambda expression, use a def
+    E741,  # ignore not easy to read variables like i l I etc
+    C408,  # Unnecessary (dict/list/tuple) call - rewrite as a literal
+    S001,  # found modulo formatter (incorrect picks up mod operations)
+
+per-file-ignores =
+    __init__.py: F401, F403, F405
+
+exclude =
+    .eggs/*.py,
+    venv/*,
+    .venv/*,
+    .git/*
diff --git a/sphinx/source/conf.py b/sphinx/source/conf.py
@@ -21,7 +21,7 @@
     )
 )
 
-from conf_base import *
+from conf_base import set_config
 
 # ----- custom configuration -----
 

diff --git a/src/sklearndf/__init__.py b/src/sklearndf/__init__.py
@@ -1,61 +1,89 @@
 """
-`sklearndf` augments more than 150 scikit-learn estimators for native support of data
+`sklearndf` augments more than 150 scikit-learn estimators for
+native support of data
 frames, while leaving the original API intact.
 
-Augmented scikit-learn classes are named after their native scikit-learn counterparts,
+Augmented scikit-learn classes are named after their native
+scikit-learn counterparts,
 with `DF` added as a suffix:
 :class:`.SimpleImputerDF` takes the place of :class:`~sklearn.impute.SimpleImputer`,
 :class:`.RandomForestRegressorDF` takes the place of
 :class:`~sklearn.ensemble.RandomForestRegressor`, and so on.
 
-For all methods expecting an `X` argument for a feature matrix and potentially a
-`y` argument for one or more targets, `sklearndf` estimators expect a pandas
-:class:`~pandas.DataFrame` for `X` and a pandas :class:`~pandas.Series` for a
-1-dimensional `y`, or a pandas :class:`~pandas.DataFrame` when fitting to multiple
+For all methods expecting an `X` argument for a
+feature matrix and potentially a
+`y` argument for one or more targets, `sklearndf`
+estimators expect a pandas
+:class:`~pandas.DataFrame` for `X` and a
+pandas :class:`~pandas.Series` for a
+1-dimensional `y`, or a pandas :class:`~pandas.DataFrame`
+when fitting to multiple
 targets or outputs.
 This includes methods such as :meth:`~EstimatorDF.fit`,
-:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`, and so on.
+:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`,
+and so on.
 
-All estimators enhanced by `sklearndf` also implement an additional attribute
-:attr:`~EstimatorDF.features_in_`, keeping track of the column names of the data
+All estimators enhanced by `sklearndf` also implement an
+additional attribute
+:attr:`~EstimatorDF.features_in_`, keeping track of the
+column names of the data
 frame used to fit the estimator.
 
-`sklearndf` transformers also implement :attr:`~TransformerDF.features_out_` and
-:attr:`~TransformerDF.features_original_`, keeping track of the feature names of the
-transformed outputs as well as mapping output features back to the input features.
-This  enables tracing features back to the original inputs even across complex
+`sklearndf` transformers also implement
+:attr:`~TransformerDF.features_out_` and
+:attr:`~TransformerDF.features_original_`, keeping track
+of the feature names of the
+transformed outputs as well as mapping output features
+back to the input features.
+This  enables tracing features back to the original
+inputs even across complex
 pipelines (see allso :class:`.PipelineDF`)
 
-`sklearndf` classes implement a class hierarchy that follows the taxonomy of
-scikit-learn classes (but is only partially reflected via class inheritance in the
+`sklearndf` classes implement a class hierarchy that
+follows the taxonomy of
+scikit-learn classes (but is only partially reflected
+via class inheritance in the
 original implementation):
 
-- all `sklearndf` transformers are subclasses of :class:`.TransformerDF`, which \
-  in turn provides the API for all common transformer methods, e.g., \
+- all `sklearndf` transformers are subclasses of
+:class:`.TransformerDF`, which \
+  in turn provides the API for all common transformer
+  methods, e.g., \
   :meth:`~TransformerDF.transform`
-- all `sklearndf` regressors are subclasses of :class:`.RegressorDF`, which \
-  in turn provides the API for all common regressor methods, e.g., \
+- all `sklearndf` regressors are subclasses
+of :class:`.RegressorDF`, which \
+  in turn provides the API for all common regressor
+  methods, e.g., \
   :meth:`~LearnerDF.predict`
-- all `sklearndf` classifiers are subclasses of :class:`.ClassifierDF`, which \
-  in turn provides the API for all common classifier methods, e.g., \
+- all `sklearndf` classifiers are subclasses of :class:
+`.ClassifierDF`, which \
+  in turn provides the API for all common classifier
+  methods, e.g., \
   :meth:`~ClassifierDF.predict_proba`
-- all `sklearndf` regressors and classifiers are subclasses of :class:`.LearnerDF`
-- all `sklearndf` estimators are subclasses of :class:`.EstimatorDF`
+- all `sklearndf` regressors and classifiers are
+subclasses of :class:`.LearnerDF`
+- all `sklearndf` estimators are subclasses of
+:class:`.EstimatorDF`
 
-`sklearndf` introduces two additional pipeline classes, :class:`.RegressorPipelineDF` and
-:class:`.ClassifierPipelineDF`, with an abstract base class :class:`.LearnerPipelineDF`,
-to allow for easier handling of common types of ML pipelines.
-These classes implement pipelines with two steps -- one preprocessing step, followed by
+`sklearndf` introduces two additional pipeline classes,
+:class:`.RegressorPipelineDF` and
+:class:`.ClassifierPipelineDF`, with an abstract base
+class :class:`.LearnerPipelineDF`,
+to allow for easier handling of common types of ML
+pipelines.
+These classes implement pipelines with two steps --
+ne preprocessing step, followed by
 a regressor or a classifier.
 
-`sklearndf` also provides data frame support for a selection of custom or 3rd-party
-estimators, most notably :class:`.BorutaDF`, :class:`.LGBMRegressorDF`, and
+`sklearndf` also provides data frame support for a
+selection of custom or 3rd-party
+estimators, most notably :class:`.BorutaDF`,
+:class:`.LGBMRegressorDF`, and
 :class:`.LGBMClassifierDF`.
 
 All `sklearndf` estimators are fully type hinted.
 """
 
 from ._sklearndf import *
 
-
 __version__ = "1.0.0"
diff --git a/src/sklearndf/_wrapper.py b/src/sklearndf/_wrapper.py
@@ -17,19 +17,19 @@
 from abc import ABCMeta, abstractmethod
 from functools import update_wrapper
 from typing import (
-    Optional,
-    Iterable,
-    Generic,
-    Mapping,
-    Sequence,
     Any,
-    Union,
-    Callable,
-    TypeVar,
-    Type,
     AnyStr,
+    Callable,
     Dict,
+    Generic,
+    Iterable,
     List,
+    Mapping,
+    Optional,
+    Sequence,
+    Type,
+    TypeVar,
+    Union,
 )
 
 import numpy as np
@@ -43,6 +43,7 @@
 )
 
 from pytools.api import inheritdoc
+
 from sklearndf import ClassifierDF, EstimatorDF, LearnerDF, RegressorDF, TransformerDF
 
 log = logging.getLogger(__name__)
@@ -969,13 +970,13 @@ def _update_class_docstring(
             df_estimator_type.__doc__ = "\n".join(
                 [
                     *tag_line,
-                    f"""
-    .. note::
-        This class is a wrapper around class :class:`{estimator_name}`.
-
-        It provides enhanced support for pandas data frames, and otherwise replicates 
-        all parameters and behaviours of class :class:`~{estimator_name}`.
-""",
+                    (
+                        f".. note::This class is a wrapper around class \n"
+                        f":class:{estimator_name}. It provides enhanced support for "
+                        f"pandas data frames, \n"
+                        f"and otherwise replicates all parameters and \n"
+                        f"behaviours of class :class:~{estimator_name}."
+                    ),
                     *(base_doc_lines if INCLUDE_FULL_SKLEARN_DOCUMENTATION else []),
                 ]
             )

diff --git a/src/sklearndf/classification/__init__.py b/src/sklearndf/classification/__init__.py
@@ -12,4 +12,3 @@
 
 if __parse_version(__sklearn_version__) >= __parse_version("0.23"):
     from ._classification_v0_23 import *
-
diff --git a/src/sklearndf/classification/_classification.py b/src/sklearndf/classification/_classification.py
@@ -46,7 +46,7 @@
 )
 from sklearn.neural_network import MLPClassifier
 from sklearn.semi_supervised import LabelPropagation, LabelSpreading
-from sklearn.svm import LinearSVC, NuSVC, SVC
+from sklearn.svm import SVC, LinearSVC, NuSVC
 from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
 
 from .. import ClassifierDF
@@ -102,6 +102,8 @@
 #
 
 # noinspection PyAbstractClass
+
+
 @df_estimator(df_wrapper_type=_ClassifierWrapperDF)
 class DummyClassifierDF(ClassifierDF, DummyClassifier):
     """
@@ -418,17 +420,6 @@ class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV):
     pass
 
 
-# noinspection PyAbstractClass
-@df_estimator(df_wrapper_type=_ClassifierWrapperDF)
-class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV):
-    """
-    Wraps :class:`sklearn.linear_model.logistic.LogisticRegressionCV`; accepts and
-    returns data frames.
-    """
-
-    pass
-
-
 # noinspection PyAbstractClass
 @df_estimator(df_wrapper_type=_ClassifierWrapperDF)
 class PassiveAggressiveClassifierDF(ClassifierDF, PassiveAggressiveClassifier):
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,7 +21,7 @@ @@
         )
     )
-    from conf_base import *
+    from conf_base import set_config
     # ----- custom configuration -----
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -12,4 +12,3 @@

		if __parse_version(__sklearn_version__) >= __parse_version("0.23"):
		from ._classification_v0_23 import *