Skip to content

Commit 532cabb

Browse files
committed
Adds documentation, example and fixes setup.py
1 parent 4b13ae1 commit 532cabb

File tree

6 files changed

+115
-19
lines changed

6 files changed

+115
-19
lines changed

autoPyTorch/api/base_task.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,7 @@ def _search(
701701
precision: int = 32,
702702
disable_file_output: List = [],
703703
load_models: bool = True,
704+
run_greedy_portfolio: bool = False
704705
) -> 'BaseTask':
705706
"""
706707
Search for the best pipeline configuration for the given dataset.
@@ -771,7 +772,12 @@ def _search(
771772
disable_file_output (Union[bool, List]):
772773
load_models (bool), (default=True): Whether to load the
773774
models after fitting AutoPyTorch.
774-
775+
run_greedy_portfolio (bool), (default=False): If True,
776+
runs initial configurations present in
777+
'autoPyTorch/optimizer/greedy_portfolio.json'.
778+
These configurations are the best performing configurations
779+
when search was performed on meta training datasets.
780+
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
775781
Returns:
776782
self
777783
@@ -953,7 +959,8 @@ def _search(
953959
# We do not increase the num_run here, this is something
954960
# smac does internally
955961
start_num_run=self._backend.get_next_num_run(peek=True),
956-
search_space_updates=self.search_space_updates
962+
search_space_updates=self.search_space_updates,
963+
run_greedy_portfolio=run_greedy_portfolio
957964
)
958965
try:
959966
run_history, self.trajectory, budget_type = \

autoPyTorch/api/tabular_classification.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,11 @@ def search(
201201
load_models (bool), (default=True): Whether to load the
202202
models after fitting AutoPyTorch.
203203
run_greedy_portfolio (bool), (default=False): If True,
204-
runs initial
204+
runs initial configurations present in
205+
'autoPyTorch/optimizer/greedy_portfolio.json'.
206+
These configurations are the best performing configurations
207+
when search was performed on meta training datasets.
208+
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
205209
206210
Returns:
207211
self
@@ -248,6 +252,7 @@ def search(
248252
precision=precision,
249253
disable_file_output=disable_file_output,
250254
load_models=load_models,
255+
run_greedy_portfolio=run_greedy_portfolio
251256
)
252257

253258
def predict(

autoPyTorch/api/tabular_regression.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ def search(
123123
precision: int = 32,
124124
disable_file_output: List = [],
125125
load_models: bool = True,
126+
run_greedy_portfolio: bool = False
126127
) -> 'BaseTask':
127128
"""
128129
Search for the best pipeline configuration for the given dataset.
@@ -187,7 +188,12 @@ def search(
187188
disable_file_output (Union[bool, List]):
188189
load_models (bool), (default=True): Whether to load the
189190
models after fitting AutoPyTorch.
190-
191+
run_greedy_portfolio (bool), (default=False): If True,
192+
runs initial configurations present in
193+
'autoPyTorch/optimizer/greedy_portfolio.json'.
194+
These configurations are the best performing configurations
195+
when search was performed on meta training datasets.
196+
For more info refer to `AutoPyTorch Tabular <https://arxiv.org/abs/2006.13799>
191197
Returns:
192198
self
193199
@@ -233,6 +239,7 @@ def search(
233239
precision=precision,
234240
disable_file_output=disable_file_output,
235241
load_models=load_models,
242+
run_greedy_portfolio=run_greedy_portfolio
236243
)
237244

238245
def predict(

autoPyTorch/optimizer/smbo.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,8 @@ def __init__(self,
108108
all_supported_metrics: bool = True,
109109
ensemble_callback: typing.Optional[EnsembleBuilderManager] = None,
110110
logger_port: typing.Optional[int] = None,
111-
search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None
111+
search_space_updates: typing.Optional[HyperparameterSearchSpaceUpdates] = None,
112+
run_greedy_portfolio: bool = False
112113
):
113114
"""
114115
Interface to SMAC. This method calls the SMAC optimize method, and allows
@@ -157,7 +158,9 @@ def __init__(self,
157158
Allows to create a user specified SMAC object
158159
ensemble_callback (typing.Optional[EnsembleBuilderManager]):
159160
A callback used in this scenario to start ensemble building subtasks
160-
161+
run_greedy_portfolio (bool), (default=False): If True,
162+
runs initial configurations present in
163+
'autoPyTorch/optimizer/greedy_portfolio.json'.
161164
"""
162165
super(AutoMLSMBO, self).__init__()
163166
# data related
@@ -213,16 +216,18 @@ def __init__(self,
213216
# read and validate initial configurations
214217
initial_configurations = json.load(open(os.path.join(os.path.dirname(__file__), 'greedy_portfolio.json')))
215218

216-
self.initial_configurations: typing.List[Configuration] = list()
217-
for configuration_dict in initial_configurations:
218-
try:
219-
configuration = Configuration(self.config_space, configuration_dict)
220-
self.initial_configurations.append(configuration)
221-
except Exception as e:
222-
self.logger.warning(f"Failed to convert {configuration_dict} into"
223-
f" a Configuration with error {e}. "
224-
f"Therefore, it can't be used as an initial "
225-
f"configuration as it does not match the current config space. ")
219+
self.initial_configurations: typing.Optional[typing.List[Configuration]] = None
220+
if run_greedy_portfolio:
221+
self.initial_configurations = list()
222+
for configuration_dict in initial_configurations:
223+
try:
224+
configuration = Configuration(self.config_space, configuration_dict)
225+
self.initial_configurations.append(configuration)
226+
except Exception as e:
227+
self.logger.warning(f"Failed to convert {configuration_dict} into"
228+
f" a Configuration with error {e}. "
229+
f"Therefore, it can't be used as an initial "
230+
f"configuration as it does not match the current config space. ")
226231

227232
def reset_data_manager(self) -> None:
228233
if self.datamanager is not None:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
============================================
3+
Tabular Classification with Greedy Portfolio
4+
============================================
5+
6+
The following example shows how to fit a sample classification model
7+
with AutoPyTorch using the greedy portfolio
8+
"""
9+
import os
10+
import tempfile as tmp
11+
import warnings
12+
13+
os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
14+
os.environ['OMP_NUM_THREADS'] = '1'
15+
os.environ['OPENBLAS_NUM_THREADS'] = '1'
16+
os.environ['MKL_NUM_THREADS'] = '1'
17+
18+
warnings.simplefilter(action='ignore', category=UserWarning)
19+
warnings.simplefilter(action='ignore', category=FutureWarning)
20+
21+
import sklearn.datasets
22+
import sklearn.model_selection
23+
24+
from autoPyTorch.api.tabular_classification import TabularClassificationTask
25+
26+
27+
if __name__ == '__main__':
28+
29+
############################################################################
30+
# Data Loading
31+
# ============
32+
X, y = sklearn.datasets.fetch_openml(data_id=40981, return_X_y=True, as_frame=True)
33+
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
34+
X,
35+
y,
36+
random_state=42,
37+
)
38+
39+
############################################################################
40+
# Build and fit a classifier
41+
# ==========================
42+
api = TabularClassificationTask(
43+
seed=42,
44+
)
45+
46+
############################################################################
47+
# Search for an ensemble of machine learning algorithms
48+
# =====================================================
49+
api.search(
50+
X_train=X_train,
51+
y_train=y_train,
52+
X_test=X_test.copy(),
53+
y_test=y_test.copy(),
54+
optimize_metric='accuracy',
55+
total_walltime_limit=300,
56+
func_eval_time_limit_secs=50,
57+
# Setting this option to True
58+
# will make smac run the configurations
59+
# present in 'autoPyTorch/optimizer/greedy_portfolio.json'
60+
run_greedy_portfolio=True
61+
)
62+
63+
############################################################################
64+
# Print the final ensemble performance
65+
# ====================================
66+
print(api.run_history, api.trajectory)
67+
y_pred = api.predict(X_test)
68+
score = api.score(y_pred, y_test)
69+
print(score)
70+
# Print the final ensemble built by AutoPyTorch
71+
print(api.show_models())

setup.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
# noinspection PyInterpreter
1212
setuptools.setup(
1313
name="autoPyTorch",
14-
version="0.0.3",
14+
version="0.1.0",
1515
author="AutoML Freiburg",
1616
author_email="zimmerl@informatik.uni-freiburg.de",
17-
description=("Auto-PyTorch searches neural architectures using BO-HB"),
17+
description=("Auto-PyTorch searches neural architectures using smac"),
1818
long_description=long_description,
1919
url="https://github.com/automl/Auto-PyTorch",
2020
long_description_content_type="text/markdown",
@@ -59,5 +59,6 @@
5959
"docs": ["sphinx", "sphinx-gallery", "sphinx_bootstrap_theme", "numpydoc"],
6060
},
6161
test_suite="pytest",
62-
data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json'])]
62+
data_files=[('configs', ['autoPyTorch/configs/default_pipeline_options.json']),
63+
('portfolio', ['autoPyTorch/optimizer/greedy_portfolio.json'])]
6364
)

0 commit comments

Comments
 (0)