1
- from typing import List , Optional , cast
1
+ from typing import List , Optional , Union , cast
2
2
3
3
import numpy as np
4
4
14
14
from sklearn .utils .multiclass import type_of_target
15
15
16
16
from autoPyTorch .data .base_target_validator import BaseTargetValidator , SupportedTargetTypes
17
+ from autoPyTorch .utils .common import SparseMatrixType
17
18
18
19
19
- def _check_and_to_numpy (y : SupportedTargetTypes ) -> np .ndarray :
20
+ ArrayType = Union [np .ndarray , SparseMatrixType ]
21
+
22
+
23
+ def _check_and_to_array (y : SupportedTargetTypes ) -> ArrayType :
20
24
""" sklearn check array will make sure we have the correct numerical features for the array """
21
25
return sklearn .utils .check_array (y , force_all_finite = True , accept_sparse = 'csr' , ensure_2d = False )
22
26
23
27
28
+ def _modify_regression_target (y : ArrayType ) -> ArrayType :
29
+ # Regression targets must have numbers after a decimal point.
30
+ # Ref: https://github.com/scikit-learn/scikit-learn/issues/8952
31
+ y_min = np .abs (y ).min ()
32
+ offset = y_min * 1e-16 # Sufficiently small number
33
+ if y_min > 1e15 :
34
+ raise ValueError (
35
+ "The minimum value for the target labels of regression tasks must be smaller than "
36
+ f"1e15 to avoid errors caused by an overflow, but got { y_min } "
37
+ )
38
+
39
+ # Since it is all integer, we can just add a random small number
40
+ if isinstance (y , np .ndarray ):
41
+ y = y .astype (dtype = np .float64 ) + offset
42
+ else :
43
+ y .data = y .data .astype (dtype = np .float64 ) + offset
44
+
45
+ return y
46
+
47
+
24
48
class TabularTargetValidator (BaseTargetValidator ):
25
49
def _fit (
26
50
self ,
@@ -101,7 +125,7 @@ def _fit(
101
125
102
126
def _transform_by_encoder (self , y : SupportedTargetTypes ) -> np .ndarray :
103
127
if self .encoder is None :
104
- return _check_and_to_numpy (y )
128
+ return _check_and_to_array (y )
105
129
106
130
# remove ravel warning from pandas Series
107
131
shape = np .shape (y )
@@ -115,12 +139,9 @@ def _transform_by_encoder(self, y: SupportedTargetTypes) -> np.ndarray:
115
139
else :
116
140
y = self .encoder .transform (np .array (y ).reshape (- 1 , 1 )).reshape (- 1 )
117
141
118
- return _check_and_to_numpy (y )
142
+ return _check_and_to_array (y )
119
143
120
- def transform (
121
- self ,
122
- y : SupportedTargetTypes ,
123
- ) -> np .ndarray :
144
+ def transform (self , y : SupportedTargetTypes ) -> np .ndarray :
124
145
"""
125
146
Validates and fit a categorical encoder (if needed) to the features.
126
147
The supported data types are List, numpy arrays and pandas DataFrames.
@@ -146,24 +167,11 @@ def transform(
146
167
y = np .ravel (y )
147
168
148
169
if not self .is_classification and "continuous" not in type_of_target (y ):
149
- # Regression targets must have numbers after a decimal point.
150
- # Ref: https://github.com/scikit-learn/scikit-learn/issues/8952
151
- y_min = np .abs (y ).min ()
152
- offset = y_min * 1e-16 # Sufficiently small number
153
- if y_min > 1e15 :
154
- raise ValueError (
155
- "The minimum value for the target labels of regression tasks must be smaller than "
156
- f"1e15 to avoid errors caused by an overflow, but got { y_min } "
157
- )
158
-
159
- y = y .astype (dtype = np .float64 ) + offset # Since it is all integer, we can just add a random small number
170
+ y = _modify_regression_target (y )
160
171
161
172
return y
162
173
163
- def inverse_transform (
164
- self ,
165
- y : SupportedTargetTypes ,
166
- ) -> np .ndarray :
174
+ def inverse_transform (self , y : SupportedTargetTypes ) -> np .ndarray :
167
175
"""
168
176
Revert any encoding transformation done on a target array
169
177
@@ -197,10 +205,7 @@ def inverse_transform(
197
205
y = y .astype (self .dtype )
198
206
return y
199
207
200
- def _check_data (
201
- self ,
202
- y : SupportedTargetTypes ,
203
- ) -> None :
208
+ def _check_data (self , y : SupportedTargetTypes ) -> None :
204
209
"""
205
210
Perform dimensionality and data type checks on the targets
206
211
0 commit comments