|
| 1 | +from typing import Any, Dict, Optional, Union |
| 2 | + |
| 3 | +from ConfigSpace.configuration_space import ConfigurationSpace |
| 4 | +from ConfigSpace.hyperparameters import ( |
| 5 | + CategoricalHyperparameter, |
| 6 | + UniformIntegerHyperparameter |
| 7 | +) |
| 8 | + |
| 9 | +import numpy as np |
| 10 | + |
| 11 | +from sklearn.preprocessing import QuantileTransformer as SklearnQuantileTransformer |
| 12 | + |
| 13 | +from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType |
| 14 | +from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.base_scaler import BaseScaler |
| 15 | +from autoPyTorch.utils.common import HyperparameterSearchSpace, add_hyperparameter |
| 16 | + |
| 17 | + |
| 18 | +class QuantileTransformer(BaseScaler): |
| 19 | + """ |
| 20 | + Transform the features to follow a uniform or a normal distribution |
| 21 | + using quantiles information. |
| 22 | +
|
| 23 | + For more details of each attribute, see: |
| 24 | + https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.QuantileTransformer.html |
| 25 | + """ |
| 26 | + def __init__( |
| 27 | + self, |
| 28 | + n_quantiles: int = 1000, |
| 29 | + output_distribution: str = "normal", # Literal["normal", "uniform"] |
| 30 | + random_state: Optional[np.random.RandomState] = None |
| 31 | + ): |
| 32 | + super().__init__() |
| 33 | + self.random_state = random_state |
| 34 | + self.n_quantiles = n_quantiles |
| 35 | + self.output_distribution = output_distribution |
| 36 | + |
| 37 | + def fit(self, X: Dict[str, Any], y: Any = None) -> BaseScaler: |
| 38 | + |
| 39 | + self.check_requirements(X, y) |
| 40 | + |
| 41 | + self.preprocessor['numerical'] = SklearnQuantileTransformer(n_quantiles=self.n_quantiles, |
| 42 | + output_distribution=self.output_distribution, |
| 43 | + copy=False) |
| 44 | + return self |
| 45 | + |
| 46 | + @staticmethod |
| 47 | + def get_hyperparameter_search_space( |
| 48 | + dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None, |
| 49 | + n_quantiles: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="n_quantiles", |
| 50 | + value_range=(10, 2000), |
| 51 | + default_value=1000, |
| 52 | + ), |
| 53 | + output_distribution: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="output_distribution", |
| 54 | + value_range=("uniform", "normal"), |
| 55 | + default_value="normal", |
| 56 | + ) |
| 57 | + ) -> ConfigurationSpace: |
| 58 | + cs = ConfigurationSpace() |
| 59 | + |
| 60 | + # TODO parametrize like the Random Forest as n_quantiles = n_features^param |
| 61 | + add_hyperparameter(cs, n_quantiles, UniformIntegerHyperparameter) |
| 62 | + add_hyperparameter(cs, output_distribution, CategoricalHyperparameter) |
| 63 | + |
| 64 | + return cs |
| 65 | + |
| 66 | + @staticmethod |
| 67 | + def get_properties(dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None |
| 68 | + ) -> Dict[str, Union[str, bool]]: |
| 69 | + return { |
| 70 | + 'shortname': 'QuantileTransformer', |
| 71 | + 'name': 'QuantileTransformer', |
| 72 | + 'handles_sparse': False |
| 73 | + } |
0 commit comments