|
| 1 | +import numpy as np |
| 2 | +from numpy.testing import assert_array_equal |
| 3 | + |
| 4 | + |
| 5 | +from sklearn.base import BaseEstimator |
| 6 | +from sklearn.compose import make_column_transformer |
| 7 | + |
| 8 | +from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.variance_thresholding. \ |
| 9 | + VarianceThreshold import VarianceThreshold |
| 10 | + |
| 11 | + |
| 12 | +def test_variance_threshold(): |
| 13 | + data = np.array([[1, 2, 1], |
| 14 | + [7, 8, 9], |
| 15 | + [4, 5, 1], |
| 16 | + [11, 12, 1], |
| 17 | + [17, 18, 19], |
| 18 | + [14, 15, 16]]) |
| 19 | + numerical_columns = [0, 1, 2] |
| 20 | + train_indices = np.array([0, 2, 3]) |
| 21 | + test_indices = np.array([1, 4, 5]) |
| 22 | + dataset_properties = { |
| 23 | + 'categorical_columns': [], |
| 24 | + 'numerical_columns': numerical_columns, |
| 25 | + } |
| 26 | + X = { |
| 27 | + 'X_train': data[train_indices], |
| 28 | + 'dataset_properties': dataset_properties |
| 29 | + } |
| 30 | + component = VarianceThreshold() |
| 31 | + |
| 32 | + component = component.fit(X) |
| 33 | + X = component.transform(X) |
| 34 | + variance_threshold = X['variance_threshold']['numerical'] |
| 35 | + |
| 36 | + # check if the fit dictionary X is modified as expected |
| 37 | + assert isinstance(X['variance_threshold'], dict) |
| 38 | + assert isinstance(variance_threshold, BaseEstimator) |
| 39 | + |
| 40 | + # make column transformer with returned encoder to fit on data |
| 41 | + column_transformer = make_column_transformer((variance_threshold, |
| 42 | + X['dataset_properties']['numerical_columns']), |
| 43 | + remainder='passthrough') |
| 44 | + column_transformer = column_transformer.fit(X['X_train']) |
| 45 | + transformed = column_transformer.transform(data[test_indices]) |
| 46 | + |
| 47 | + assert_array_equal(transformed, np.array([[7, 8], |
| 48 | + [17, 18], |
| 49 | + [14, 15]])) |
0 commit comments