Skip to content

Commit f9fe056

Browse files
authored
[ADD] Get incumbent config (#175)
* In progress get_incumbent_results * [Add] get_incumbent_results to base task, changed additional info in abstract evaluator, and tests * In progress addressing fransisco's comment * Proper check for include_traditional * Fix flake * Mock search of estimator * Fixed path of run history test_api * Addressed comments from Fransisco, making better tests * fix flake * After rebase fix issues * fix flake * Added debug information for API * filtering only successful runs in get_incumbent_results * Address comments from fransisco * Revert changes made to run history assertion in base taks #1257 * fix flake issue
1 parent 855c57e commit f9fe056

File tree

7 files changed

+1470
-48
lines changed

7 files changed

+1470
-48
lines changed

autoPyTorch/api/base_task.py

+53-35
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import unittest.mock
1313
import warnings
1414
from abc import abstractmethod
15-
from typing import Any, Callable, Dict, List, Optional, Union, cast
15+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
1616

1717
from ConfigSpace.configuration_space import Configuration, ConfigurationSpace
1818

@@ -223,9 +223,7 @@ def build_pipeline(self, dataset_properties: Dict[str, Any]) -> BasePipeline:
223223
"""
224224
raise NotImplementedError
225225

226-
def set_pipeline_config(
227-
self,
228-
**pipeline_config_kwargs: Any) -> None:
226+
def set_pipeline_config(self, **pipeline_config_kwargs: Any) -> None:
229227
"""
230228
Check whether arguments are valid and
231229
then sets them to the current pipeline
@@ -259,12 +257,6 @@ def get_pipeline_options(self) -> dict:
259257
"""
260258
return self.pipeline_options
261259

262-
# def set_search_space(self, search_space: ConfigurationSpace) -> None:
263-
# """
264-
# Update the search space.
265-
# """
266-
# raise NotImplementedError
267-
#
268260
def get_search_space(self, dataset: BaseDataset = None) -> ConfigurationSpace:
269261
"""
270262
Returns the current search space as ConfigurationSpace object.
@@ -406,9 +398,9 @@ def _close_dask_client(self) -> None:
406398
None
407399
"""
408400
if (
409-
hasattr(self, '_is_dask_client_internally_created')
410-
and self._is_dask_client_internally_created
411-
and self._dask_client
401+
hasattr(self, '_is_dask_client_internally_created')
402+
and self._is_dask_client_internally_created
403+
and self._dask_client
412404
):
413405
self._dask_client.shutdown()
414406
self._dask_client.close()
@@ -661,10 +653,11 @@ def _do_traditional_prediction(self, time_left: int, func_eval_time_limit_secs:
661653
f"Fitting {cls} took {runtime}s, performance:{cost}/{additional_info}")
662654
configuration = additional_info['pipeline_configuration']
663655
origin = additional_info['configuration_origin']
656+
additional_info.pop('pipeline_configuration')
664657
run_history.add(config=configuration, cost=cost,
665658
time=runtime, status=status, seed=self.seed,
666659
starttime=starttime, endtime=starttime + runtime,
667-
origin=origin)
660+
origin=origin, additional_info=additional_info)
668661
else:
669662
if additional_info.get('exitcode') == -6:
670663
self._logger.error(
@@ -710,6 +703,7 @@ def _search(
710703
memory_limit: Optional[int] = 4096,
711704
smac_scenario_args: Optional[Dict[str, Any]] = None,
712705
get_smac_object_callback: Optional[Callable] = None,
706+
tae_func: Optional[Callable] = None,
713707
all_supported_metrics: bool = True,
714708
precision: int = 32,
715709
disable_file_output: List = [],
@@ -777,6 +771,10 @@ def _search(
777771
instances, num_params, runhistory, seed and ta. This is
778772
an advanced feature. Use only if you are familiar with
779773
[SMAC](https://automl.github.io/SMAC3/master/index.html).
774+
tae_func (Optional[Callable]):
775+
TargetAlgorithm to be optimised. If None, `eval_function`
776+
available in autoPyTorch/evaluation/train_evaluator is used.
777+
Must be child class of AbstractEvaluator.
780778
all_supported_metrics (bool), (default=True): if True, all
781779
metrics supporting current task will be calculated
782780
for each pipeline and results will be available via cv_results
@@ -988,7 +986,7 @@ def _search(
988986
)
989987
try:
990988
run_history, self.trajectory, budget_type = \
991-
_proc_smac.run_smbo()
989+
_proc_smac.run_smbo(func=tae_func)
992990
self.run_history.update(run_history, DataOrigin.INTERNAL)
993991
trajectory_filename = os.path.join(
994992
self._backend.get_smac_output_directory_for_run(self.seed),
@@ -1042,10 +1040,10 @@ def _search(
10421040
return self
10431041

10441042
def refit(
1045-
self,
1046-
dataset: BaseDataset,
1047-
budget_config: Dict[str, Union[int, str]] = {},
1048-
split_id: int = 0
1043+
self,
1044+
dataset: BaseDataset,
1045+
budget_config: Dict[str, Union[int, str]] = {},
1046+
split_id: int = 0
10491047
) -> "BaseTask":
10501048
"""
10511049
Refit all models found with fit to new data.
@@ -1181,10 +1179,10 @@ def fit(self,
11811179
return pipeline
11821180

11831181
def predict(
1184-
self,
1185-
X_test: np.ndarray,
1186-
batch_size: Optional[int] = None,
1187-
n_jobs: int = 1
1182+
self,
1183+
X_test: np.ndarray,
1184+
batch_size: Optional[int] = None,
1185+
n_jobs: int = 1
11881186
) -> np.ndarray:
11891187
"""Generate the estimator predictions.
11901188
Generate the predictions based on the given examples from the test set.
@@ -1234,9 +1232,9 @@ def predict(
12341232
return predictions
12351233

12361234
def score(
1237-
self,
1238-
y_pred: np.ndarray,
1239-
y_test: Union[np.ndarray, pd.DataFrame]
1235+
self,
1236+
y_pred: np.ndarray,
1237+
y_test: Union[np.ndarray, pd.DataFrame]
12401238
) -> Dict[str, float]:
12411239
"""Calculate the score on the test set.
12421240
Calculate the evaluation measure on the test set.
@@ -1277,17 +1275,37 @@ def __del__(self) -> None:
12771275
if hasattr(self, '_backend'):
12781276
self._backend.context.delete_directories(force=False)
12791277

1280-
@typing.no_type_check
12811278
def get_incumbent_results(
1282-
self
1283-
):
1284-
pass
1279+
self,
1280+
include_traditional: bool = False
1281+
) -> Tuple[Configuration, Dict[str, Union[int, str, float]]]:
1282+
"""
1283+
Get Incumbent config and the corresponding results
1284+
Args:
1285+
include_traditional: Whether to include results from tradtional pipelines
12851286
1286-
@typing.no_type_check
1287-
def get_incumbent_config(
1288-
self
1289-
):
1290-
pass
1287+
Returns:
1288+
1289+
"""
1290+
assert self.run_history is not None, "No Run History found, search has not been called."
1291+
if self.run_history.empty():
1292+
raise ValueError("Run History is empty. Something went wrong, "
1293+
"smac was not able to fit any model?")
1294+
1295+
run_history_data = self.run_history.data
1296+
if not include_traditional:
1297+
# traditional classifiers have trainer_configuration in their additional info
1298+
run_history_data = dict(
1299+
filter(lambda elem: elem[1].additional_info is not None and elem[1].
1300+
additional_info['configuration_origin'] != 'traditional',
1301+
run_history_data.items()))
1302+
run_history_data = dict(
1303+
filter(lambda elem: 'SUCCESS' in str(elem[1].status), run_history_data.items()))
1304+
sorted_runvalue_by_cost = sorted(run_history_data.items(), key=lambda item: item[1].cost)
1305+
incumbent_run_key, incumbent_run_value = sorted_runvalue_by_cost[0]
1306+
incumbent_config = self.run_history.ids_config[incumbent_run_key.config_id]
1307+
incumbent_results = incumbent_run_value.additional_info
1308+
return incumbent_config, incumbent_results
12911309

12921310
def get_models_with_weights(self) -> List:
12931311
if self.models_ is None or len(self.models_) == 0 or \

autoPyTorch/evaluation/abstract_evaluator.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ def get_additional_run_info(self) -> Dict[str, Any]: # pylint: disable=R0201
114114
Can be found in autoPyTorch/pipeline/components/setup/traditional_ml/classifier_configs
115115
"""
116116
return {'pipeline_configuration': self.configuration,
117-
'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config()}
117+
'trainer_configuration': self.pipeline.named_steps['model_trainer'].choice.model.get_config(),
118+
'configuration_origin': 'traditional'}
118119

119120
def get_pipeline_representation(self) -> Dict[str, str]:
120121
return self.pipeline.get_pipeline_representation()
@@ -178,7 +179,7 @@ def estimator_supports_iterative_fit(self) -> bool: # pylint: disable=R0201
178179
return False
179180

180181
def get_additional_run_info(self) -> Dict: # pylint: disable=R0201
181-
return {}
182+
return {'configuration_origin': 'DUMMY'}
182183

183184
def get_pipeline_representation(self) -> Dict[str, str]:
184185
return {
@@ -237,7 +238,7 @@ def estimator_supports_iterative_fit(self) -> bool: # pylint: disable=R0201
237238
return False
238239

239240
def get_additional_run_info(self) -> Dict: # pylint: disable=R0201
240-
return {}
241+
return {'configuration_origin': 'DUMMY'}
241242

242243
@staticmethod
243244
def get_default_pipeline_options() -> Dict[str, Any]:
@@ -611,8 +612,7 @@ def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
611612
additional_run_info = (
612613
{} if additional_run_info is None else additional_run_info
613614
)
614-
for metric_name, value in loss.items():
615-
additional_run_info[metric_name] = value
615+
additional_run_info['opt_loss'] = loss
616616
additional_run_info['duration'] = self.duration
617617
additional_run_info['num_run'] = self.num_run
618618
if train_loss is not None:

autoPyTorch/evaluation/train_evaluator.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -172,10 +172,11 @@ def fit_predict_and_loss(self) -> None:
172172

173173
status = StatusType.SUCCESS
174174

175-
self.logger.debug("In train evaluator fit_predict_and_loss, num_run: {} loss:{}".format(
176-
self.num_run,
177-
loss
178-
))
175+
self.logger.debug("In train evaluator fit_predict_and_loss, num_run: {} loss:{},"
176+
" additional run info:{}, status: {}".format(self.num_run,
177+
loss,
178+
additional_run_info,
179+
status))
179180
self.finish_up(
180181
loss=loss,
181182
train_loss=train_loss,

0 commit comments

Comments
 (0)