Skip to content

Commit 4072b6f

Browse files
committed
[fix] Address Ravin's comments and loosen the small number choice
1 parent 33305fb commit 4072b6f

File tree

3 files changed

+10
-12
lines changed

3 files changed

+10
-12
lines changed

autoPyTorch/data/tabular_target_validator.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ def _modify_regression_target(y: ArrayType) -> ArrayType:
2929
# Regression targets must have numbers after a decimal point.
3030
# Ref: https://github.com/scikit-learn/scikit-learn/issues/8952
3131
y_min = np.abs(y).min()
32-
offset = y_min * 1e-16 # Sufficiently small number
33-
if y_min > 1e15:
32+
offset = max(y_min, 1e-13) * 1e-13 # Sufficiently small number
33+
if y_min > 1e12:
3434
raise ValueError(
3535
"The minimum value for the target labels of regression tasks must be smaller than "
36-
f"1e15 to avoid errors caused by an overflow, but got {y_min}"
36+
f"1e12 to avoid errors caused by an overflow, but got {y_min}"
3737
)
3838

3939
# Since it is all integer, we can just add a random small number

autoPyTorch/datasets/base_dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def _get_output_properties(train_tensors: BaseDatasetInputType) -> Tuple[int, st
6969
target_labels = np.array(train_tensors[1])
7070

7171
output_type: str = type_of_target(target_labels)
72-
if STRING_TO_OUTPUT_TYPES.get(output_type, None) in CLASSIFICATION_OUTPUTS:
72+
if STRING_TO_OUTPUT_TYPES[output_type] in CLASSIFICATION_OUTPUTS:
7373
output_dim = len(np.unique(target_labels))
7474
elif target_labels.ndim > 1:
7575
output_dim = target_labels.shape[-1]

test/test_api/test_api.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -912,23 +912,21 @@ def test_tabular_classification_test_evaluator(openml_id, backend, n_samples):
912912
)
913913
def test_task_inference(ans, task_class, backend):
914914
# Get the data and check that contents of data-manager make sense
915-
X = np.random.random((5, 1))
916-
y = np.array([0, 1, 2, 3, 4]) + 10 ** 15
917-
918-
X_train, _, y_train, _ = sklearn.model_selection.train_test_split(X, y, random_state=42)
915+
X = np.random.random((6, 1))
916+
y = np.array([-10 ** 12, 0, 1, 2, 3, 4], dtype=np.int64) + 10 ** 12
919917

920918
estimator = task_class(
921919
backend=backend,
922920
resampling_strategy=HoldoutValTypes.holdout_validation,
923921
resampling_strategy_args=None,
924922
seed=42,
925923
)
926-
dataset = estimator.get_dataset(X_train, y_train)
924+
dataset = estimator.get_dataset(X, y)
927925
assert dataset.output_type == ans
928926

929-
y_train += 1
927+
y += 10 ** 12 + 10 # Check if the function catches overflow possibilities
930928
if ans == 'continuous':
931929
with pytest.raises(ValueError): # ValueError due to `Too large value`
932-
estimator.get_dataset(X_train, y_train)
930+
estimator.get_dataset(X, y)
933931
else:
934-
estimator.get_dataset(X_train, y_train)
932+
estimator.get_dataset(X, y)

0 commit comments

Comments
 (0)