Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Add code checks to the CI pipeline: isort, black, flake #37

Merged
merged 10 commits into from
Oct 1, 2020
6 changes: 6 additions & 0 deletions .isort.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[isort]
profile=black
src_paths=src,test
known_local_folder=sklearndf,test
known_first_party=pytools
known_third_party=numpy,pandas,joblib,sklearn,matplot
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.5.4
hooks:
- id: isort

- repo: https://github.com/psf/black
rev: 20.8b1
hooks:
Expand Down
57 changes: 48 additions & 9 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,52 @@ variables:
}}

stages:
# Check code quality first to fail fast (isort, flake8, black)
- stage: code_quality_checks
displayName: 'Code quality'
jobs:
- job:
displayName: 'isort'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install isort==5.5.4
python -m isort --check --diff .
displayName: 'isort'
- job:
displayName: 'black'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install black==20.8b1
python -m black --check .
displayName: 'black'
- job:
displayName: 'flake8'
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7.*'
displayName: 'use Python 3.7'
- script: |
python -m pip install flake8 flake8-comprehensions flake8-import-order
python -m flake8 --config flake8.ini -v .
displayName: 'Run flake8'

# detect whether the conda build config was changed -> then we must run a build test
- stage: detect_conda_changes
displayName: 'detect conda changes'
displayName: 'Conda build config'

jobs:

- job: checkout_and_diff
displayName: 'detect changes'
steps:
- checkout: self

Expand All @@ -57,14 +97,14 @@ stages:


- stage:
displayName: 'simple pytest'
displayName: 'Unit tests'
dependsOn: 'detect_conda_changes'
variables:
conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]

jobs:
- job:
displayName: 'pytest @ develop environment'
displayName: 'in develop environment'
condition: >
and(
ne(variables.master_or_release, 'True'),
Expand All @@ -77,8 +117,8 @@ stages:
steps:
- task: UsePythonVersion@0
inputs:
versionSpec: '3.7'
displayName: 'Use Python 3.7'
versionSpec: '3.7.*'
displayName: 'use Python 3.7'

- checkout: self
- checkout: pytools
Expand Down Expand Up @@ -121,14 +161,14 @@ stages:
# - FOR RELEASE BRANCH: 3 BUILD TESTS
# - OTHERWISE: 1 BUILD TEST
- stage:
displayName: 'conda build & pytest'
displayName: 'Conda build & test'
dependsOn: 'detect_conda_changes'
variables:
conda_build_config_changed: $[ stageDependencies.detect_conda_changes.checkout_and_diff.outputs['diff.conda_build_config_changed'] ]

jobs:
- job:
displayName: '(single)'
displayName: 'single'
condition: >
and(
ne(variables.master_or_release, 'True'),
Expand Down Expand Up @@ -185,7 +225,7 @@ stages:
displayName: "build & test conda package"

- job:
displayName: '(matrix)'
displayName: 'matrix'
condition: eq(variables.master_or_release, 'True')

pool:
Expand Down Expand Up @@ -225,7 +265,6 @@ stages:
FACET_V_BORUTA: '>=0.3'
FACET_V_LGBM: '>=2.2'
FACET_V_JOBLIB: '>=0.13'

steps:
- task: UsePythonVersion@0
inputs:
Expand Down
3 changes: 3 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ dependencies:
- conda-build
- conda-verify
- docutils
- flake8 = 3.8.*
- flake8-comprehensions = 3.2.*
- isort = 5.5.*
- joblib = 0.16.*
- jupyter >= 1.0
- lightgbm = 3.0.*
Expand Down
24 changes: 24 additions & 0 deletions flake8.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
[flake8]

max-line-length = 88

show-source = true

ignore =
E203, # space before : (needed for how black formats slicing)
W503, # line break before binary operator
W504, # line break after binary operator
E402, # module level import not at top of file
E731, # do not assign a lambda expression, use a def
E741, # ignore not easy to read variables like i l I etc
C408, # Unnecessary (dict/list/tuple) call - rewrite as a literal
S001, # found modulo formatter (incorrect picks up mod operations)

per-file-ignores =
__init__.py: F401, F403, F405

exclude =
.eggs/*.py,
venv/*,
.venv/*,
.git/*
2 changes: 1 addition & 1 deletion sphinx/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
)

from conf_base import *
from conf_base import set_config

# ----- custom configuration -----

Expand Down
88 changes: 58 additions & 30 deletions src/sklearndf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,89 @@
"""
`sklearndf` augments more than 150 scikit-learn estimators for native support of data
`sklearndf` augments more than 150 scikit-learn estimators for
native support of data
frames, while leaving the original API intact.

Augmented scikit-learn classes are named after their native scikit-learn counterparts,
Augmented scikit-learn classes are named after their native
scikit-learn counterparts,
with `DF` added as a suffix:
:class:`.SimpleImputerDF` takes the place of :class:`~sklearn.impute.SimpleImputer`,
:class:`.RandomForestRegressorDF` takes the place of
:class:`~sklearn.ensemble.RandomForestRegressor`, and so on.

For all methods expecting an `X` argument for a feature matrix and potentially a
`y` argument for one or more targets, `sklearndf` estimators expect a pandas
:class:`~pandas.DataFrame` for `X` and a pandas :class:`~pandas.Series` for a
1-dimensional `y`, or a pandas :class:`~pandas.DataFrame` when fitting to multiple
For all methods expecting an `X` argument for a
feature matrix and potentially a
`y` argument for one or more targets, `sklearndf`
estimators expect a pandas
:class:`~pandas.DataFrame` for `X` and a
pandas :class:`~pandas.Series` for a
1-dimensional `y`, or a pandas :class:`~pandas.DataFrame`
when fitting to multiple
targets or outputs.
This includes methods such as :meth:`~EstimatorDF.fit`,
:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`, and so on.
:meth:`~TransformerDF.transform`, :meth:`~LearnerDF.predict`,
and so on.

All estimators enhanced by `sklearndf` also implement an additional attribute
:attr:`~EstimatorDF.features_in_`, keeping track of the column names of the data
All estimators enhanced by `sklearndf` also implement an
additional attribute
:attr:`~EstimatorDF.features_in_`, keeping track of the
column names of the data
frame used to fit the estimator.

`sklearndf` transformers also implement :attr:`~TransformerDF.features_out_` and
:attr:`~TransformerDF.features_original_`, keeping track of the feature names of the
transformed outputs as well as mapping output features back to the input features.
This enables tracing features back to the original inputs even across complex
`sklearndf` transformers also implement
:attr:`~TransformerDF.features_out_` and
:attr:`~TransformerDF.features_original_`, keeping track
of the feature names of the
transformed outputs as well as mapping output features
back to the input features.
This enables tracing features back to the original
inputs even across complex
pipelines (see allso :class:`.PipelineDF`)

`sklearndf` classes implement a class hierarchy that follows the taxonomy of
scikit-learn classes (but is only partially reflected via class inheritance in the
`sklearndf` classes implement a class hierarchy that
follows the taxonomy of
scikit-learn classes (but is only partially reflected
via class inheritance in the
original implementation):

- all `sklearndf` transformers are subclasses of :class:`.TransformerDF`, which \
in turn provides the API for all common transformer methods, e.g., \
- all `sklearndf` transformers are subclasses of
:class:`.TransformerDF`, which \
in turn provides the API for all common transformer
methods, e.g., \
:meth:`~TransformerDF.transform`
- all `sklearndf` regressors are subclasses of :class:`.RegressorDF`, which \
in turn provides the API for all common regressor methods, e.g., \
- all `sklearndf` regressors are subclasses
of :class:`.RegressorDF`, which \
in turn provides the API for all common regressor
methods, e.g., \
:meth:`~LearnerDF.predict`
- all `sklearndf` classifiers are subclasses of :class:`.ClassifierDF`, which \
in turn provides the API for all common classifier methods, e.g., \
- all `sklearndf` classifiers are subclasses of :class:
`.ClassifierDF`, which \
in turn provides the API for all common classifier
methods, e.g., \
:meth:`~ClassifierDF.predict_proba`
- all `sklearndf` regressors and classifiers are subclasses of :class:`.LearnerDF`
- all `sklearndf` estimators are subclasses of :class:`.EstimatorDF`
- all `sklearndf` regressors and classifiers are
subclasses of :class:`.LearnerDF`
- all `sklearndf` estimators are subclasses of
:class:`.EstimatorDF`

`sklearndf` introduces two additional pipeline classes, :class:`.RegressorPipelineDF` and
:class:`.ClassifierPipelineDF`, with an abstract base class :class:`.LearnerPipelineDF`,
to allow for easier handling of common types of ML pipelines.
These classes implement pipelines with two steps -- one preprocessing step, followed by
`sklearndf` introduces two additional pipeline classes,
:class:`.RegressorPipelineDF` and
:class:`.ClassifierPipelineDF`, with an abstract base
class :class:`.LearnerPipelineDF`,
to allow for easier handling of common types of ML
pipelines.
These classes implement pipelines with two steps --
ne preprocessing step, followed by
a regressor or a classifier.

`sklearndf` also provides data frame support for a selection of custom or 3rd-party
estimators, most notably :class:`.BorutaDF`, :class:`.LGBMRegressorDF`, and
`sklearndf` also provides data frame support for a
selection of custom or 3rd-party
estimators, most notably :class:`.BorutaDF`,
:class:`.LGBMRegressorDF`, and
:class:`.LGBMClassifierDF`.

All `sklearndf` estimators are fully type hinted.
"""

from ._sklearndf import *


__version__ = "1.0.0"
33 changes: 17 additions & 16 deletions src/sklearndf/_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,19 @@
from abc import ABCMeta, abstractmethod
from functools import update_wrapper
from typing import (
Optional,
Iterable,
Generic,
Mapping,
Sequence,
Any,
Union,
Callable,
TypeVar,
Type,
AnyStr,
Callable,
Dict,
Generic,
Iterable,
List,
Mapping,
Optional,
Sequence,
Type,
TypeVar,
Union,
)

import numpy as np
Expand All @@ -43,6 +43,7 @@
)

from pytools.api import inheritdoc

from sklearndf import ClassifierDF, EstimatorDF, LearnerDF, RegressorDF, TransformerDF

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -969,13 +970,13 @@ def _update_class_docstring(
df_estimator_type.__doc__ = "\n".join(
[
*tag_line,
f"""
.. note::
This class is a wrapper around class :class:`{estimator_name}`.

It provides enhanced support for pandas data frames, and otherwise replicates
all parameters and behaviours of class :class:`~{estimator_name}`.
""",
(
f".. note::This class is a wrapper around class \n"
f":class:{estimator_name}. It provides enhanced support for "
f"pandas data frames, \n"
f"and otherwise replicates all parameters and \n"
f"behaviours of class :class:~{estimator_name}."
),
*(base_doc_lines if INCLUDE_FULL_SKLEARN_DOCUMENTATION else []),
]
)
Expand Down
1 change: 0 additions & 1 deletion src/sklearndf/classification/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@

if __parse_version(__sklearn_version__) >= __parse_version("0.23"):
from ._classification_v0_23 import *

15 changes: 3 additions & 12 deletions src/sklearndf/classification/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
)
from sklearn.neural_network import MLPClassifier
from sklearn.semi_supervised import LabelPropagation, LabelSpreading
from sklearn.svm import LinearSVC, NuSVC, SVC
from sklearn.svm import SVC, LinearSVC, NuSVC
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier

from .. import ClassifierDF
Expand Down Expand Up @@ -102,6 +102,8 @@
#

# noinspection PyAbstractClass


@df_estimator(df_wrapper_type=_ClassifierWrapperDF)
class DummyClassifierDF(ClassifierDF, DummyClassifier):
"""
Expand Down Expand Up @@ -418,17 +420,6 @@ class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV):
pass


# noinspection PyAbstractClass
@df_estimator(df_wrapper_type=_ClassifierWrapperDF)
class LogisticRegressionCVDF(ClassifierDF, LogisticRegressionCV):
"""
Wraps :class:`sklearn.linear_model.logistic.LogisticRegressionCV`; accepts and
returns data frames.
"""

pass


# noinspection PyAbstractClass
@df_estimator(df_wrapper_type=_ClassifierWrapperDF)
class PassiveAggressiveClassifierDF(ClassifierDF, PassiveAggressiveClassifier):
Expand Down
Loading