-
Notifications
You must be signed in to change notification settings - Fork 86
/
Copy pathbuilder.py
363 lines (278 loc) · 14.7 KB
/
builder.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
from __future__ import annotations
from typing import Any, Dict, List, Optional, Type, Union
from golem.core.optimisers.optimizer import GraphOptimizer
from fedot.api.main import Fedot
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.quality_metrics_repository import MetricCallable, MetricsEnum
from fedot.core.repository.tasks import TaskParams
class DefaultParamValue:
def __repr__(self):
return '<default value>' # Visible in the documentation.
DEFAULT_VALUE = DefaultParamValue() # Mock value used to filter out unset parameters.
class FedotBuilder:
""" An alternative FEDOT API version with optional attributes being documented
and separated into groups by meaning.
Each of the groups has corresponding setter method, named starting with :obj:`setup_*`.
Use these methods to set corresponding API attributes:
* :meth:`~FedotBuilder.setup_composition` -> general AutoML parameters
* :meth:`~FedotBuilder.setup_parallelization` -> parameters of computational parallelization by CPU jobs
* :meth:`~FedotBuilder.setup_output` -> parameters of outputs: logging, cache directories, etc.
* :meth:`~FedotBuilder.setup_evolution` -> parameters of ML pipelines evolutionary optimization
* :meth:`~FedotBuilder.setup_pipeline_structure` -> constrains on ML pipeline structure
* :meth:`~FedotBuilder.setup_pipeline_evaluation` -> parameters of ML pipelines quality evaluation
* :meth:`~FedotBuilder.setup_data_preprocessing` -> parameters of input data preprocessing
After all demanded attributes are set, use :meth:`~FedotBuilder.build` to get a parametrized instance of
:class:`~fedot.api.main.Fedot`.
Examples:
Example 1:
.. literalinclude:: ../../../examples/simple/api_builder/classification_with_api_builder.py
Example 2:
.. literalinclude:: ../../../examples/simple/api_builder/multiple_ts_forecasting_tasks.py
Args:
problem: name of a modelling problem to solve.
.. details:: Possible options:
- ``classification`` -> for classification task
- ``regression`` -> for regression task
- ``ts_forecasting`` -> for time series forecasting task
"""
def __init__(self, problem: str):
self.api_params: Dict[Any, Any] = dict(problem=problem)
def __update_params(self, **new_params):
""" Saves all parameters set by user to the dictionary ``self.api_params``. """
new_params = {k: v for k, v in new_params.items() if v != DEFAULT_VALUE}
self.api_params.update(new_params)
def setup_composition(
self,
timeout: Optional[float] = DEFAULT_VALUE,
task_params: TaskParams = DEFAULT_VALUE,
seed: int = DEFAULT_VALUE,
preset: str = DEFAULT_VALUE,
with_tuning: bool = DEFAULT_VALUE,
use_meta_rules: bool = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets general AutoML parameters.
Args:
timeout: time for model design (in minutes): ``None`` or ``-1`` means infinite time.
task_params: additional parameters of a task.
seed: value for a fixed random seed.
preset: name of the preset for model building (e.g. ``best_quality``, ``fast_train``, ``gpu``).
Default value is ``auto``.
.. details:: Possible options:
- ``best_quality`` -> All models that are available for this data type and task are used
- ``fast_train`` -> Models that learn quickly. This includes preprocessing operations
(data operations) that only reduce the dimensionality of the data, but cannot increase it.
For example, there are no polynomial features and one-hot encoding operations
- ``stable`` -> The most reliable preset in which the most stable operations are included
- ``auto`` -> Automatically determine which preset should be used
- ``gpu`` -> Models that use GPU resources for computation
- ``ts`` -> A special preset with models for time series forecasting task
- ``automl`` -> A special preset with only AutoML libraries such as TPOT and H2O as operations
with_tuning: flag for tuning hyperparameters of the final evolved
:class:`~fedot.core.pipelines.pipeline.Pipeline`. Defaults to ``True``.
use_meta_rules: indicates whether to change set parameters according to FEDOT meta rules.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
timeout=timeout,
task_params=task_params,
seed=seed,
preset=preset,
with_tuning=with_tuning,
use_meta_rules=use_meta_rules,
)
return self
def setup_parallelization(
self,
n_jobs: int = DEFAULT_VALUE,
parallelization_mode: str = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets parameters of computational parallelization by CPU jobs.
Args:
n_jobs: num of `jobs` for parallelization (set to ``-1`` to use all cpu's). Defaults to ``-1``.
parallelization_mode: type of evaluation for groups of individuals (``populational`` or
``sequential``). Default value is ``populational``.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
n_jobs=n_jobs,
parallelization_mode=parallelization_mode,
)
return self
def setup_output(
self,
logging_level: int = DEFAULT_VALUE,
show_progress: bool = DEFAULT_VALUE,
keep_history: bool = DEFAULT_VALUE,
history_dir: Optional[str] = DEFAULT_VALUE,
cache_dir: Optional[str] = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets parameters of outputs: logging, cache directories, etc.
Args:
logging_level: logging levels are the same as in
`built-in logging library <https://docs.python.org/3/library/logging.html>`_.
.. details:: Possible options:
- ``50`` -> critical
- ``40`` -> error
- ``30`` -> warning
- ``20`` -> info
- ``10`` -> debug
- ``0`` -> nonset
show_progress: indicates whether to show progress using tqdm/tuner or not. Defaults to ``True``.
keep_history: indicates if the framework should track evolutionary optimization history
for possible further analysis. Defaults to ``True``.
history_dir: relative or absolute path of a folder for composing history.
By default, creates a folder named "FEDOT" in temporary system files of an OS.
A relative path is relative to the default value.
cache_dir: path to a directory containing cache files (if any cache is enabled).
By default, creates a folder named "FEDOT" in temporary system files of an OS.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
logging_level=logging_level,
show_progress=show_progress,
keep_history=keep_history,
history_dir=history_dir,
cache_dir=cache_dir,
)
return self
def setup_evolution(
self,
initial_assumption: Union[Pipeline, List[Pipeline]] = DEFAULT_VALUE,
num_of_generations: Optional[int] = DEFAULT_VALUE,
early_stopping_iterations: int = DEFAULT_VALUE,
early_stopping_timeout: int = DEFAULT_VALUE,
pop_size: int = DEFAULT_VALUE,
keep_n_best: int = DEFAULT_VALUE,
genetic_scheme: str = DEFAULT_VALUE,
use_pipelines_cache: bool = DEFAULT_VALUE,
optimizer: Optional[Type[GraphOptimizer]] = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets parameters of ML pipelines evolutionary optimization.
Args:
initial_assumption: initial assumption(s) for composer.
Can be either a single :class:`Pipeline` or sequence of ones. Default values are task-specific and
selected by the method
:meth:`~fedot.api.api_utils.assumptions.task_assumptions.TaskAssumptions.for_task`.
early_stopping_iterations: composer will stop after ``n`` generation without improving
num_of_generations: number of evolutionary generations for composer. Defaults to ``None`` - no limit.
early_stopping_iterations: composer will stop after ``n`` generation without improving.
early_stopping_timeout: stagnation timeout in minutes: composer will stop after ``n`` minutes
without improving. Defaults to ``10``.
pop_size: size of population (generation) during composing. Defaults to ``20``.
keep_n_best: number of the best individuals in generation that survive during the evolution.
Defaults to ``1``.
genetic_scheme: name of the genetic scheme. Defaults to ``steady_state``.
use_pipelines_cache: indicates whether to use pipeline structures caching. Defaults to ``True``.
optimizer: inherit from :class:`golem.core.optimisers.optimizer.GraphOptimizer`
to specify a custom optimizer.
Default optimizer is :class:`golem.core.optimisers.genetic.gp_optimizer.EvoGraphOptimizer`.
See the :doc:`example </advanced/external_optimizer>`.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
initial_assumption=initial_assumption,
num_of_generations=num_of_generations,
early_stopping_iterations=early_stopping_iterations,
early_stopping_timeout=early_stopping_timeout,
pop_size=pop_size,
keep_n_best=keep_n_best,
genetic_scheme=genetic_scheme,
use_pipelines_cache=use_pipelines_cache,
optimizer=optimizer,
)
return self
def setup_pipeline_structure(
self,
available_operations: List[str] = DEFAULT_VALUE,
max_depth: int = DEFAULT_VALUE,
max_arity: int = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets constrains on ML pipeline structure.
Args:
available_operations: list of model names to use. Pick the names according to operations repository.
max_depth: max depth of a pipeline. Defaults to ``6``.
max_arity: max arity of a pipeline nodes. Defaults to ``3``.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
available_operations=available_operations,
max_depth=max_depth,
max_arity=max_arity,
)
return self
def setup_pipeline_evaluation(
self,
metric: Union[str, MetricCallable, MetricsEnum, List[Union[str, MetricCallable, MetricsEnum]]] = DEFAULT_VALUE,
cv_folds: int = DEFAULT_VALUE,
max_pipeline_fit_time: Optional[int] = DEFAULT_VALUE,
collect_intermediate_metric: bool = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets parameters of ML pipelines quality evaluation.
Args:
metric:
metric for quality calculation during composing, also is used for tuning if ``with_tuning=True``.
.. details:: Default value depends on a given task:
- ``roc_auc`` -> for classification
- ``rmse`` -> for regression & time series forecasting
.. details:: Available metrics are listed in the following enumerations:
- classification -> \
:class:`~fedot.core.repository.quality_metrics_repository.ClassificationMetricsEnum`
- regression -> \
:class:`~fedot.core.repository.quality_metrics_repository.RegressionMetricsEnum`
- time series forcasting -> \
:class:`~fedot.core.repository.quality_metrics_repository.TimeSeriesForecastingMetricsEnum`
- pipeline complexity (task-independent) -> \
:class:`~fedot.core.repository.quality_metrics_repository.ComplexityMetricsEnum`
cv_folds: number of folds for cross-validation.
.. details:: Default value depends on a given ``problem``:
- ``5`` -> for classification and regression tasks
- ``3`` -> for time series forecasting task
max_pipeline_fit_time: time constraint for operation fitting (in minutes).
Once the limit is reached, a candidate pipeline will be dropped. Defaults to ``None`` - no limit.
collect_intermediate_metric: save metrics for intermediate (non-root) nodes in composed
:class:`Pipeline`.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
metric=metric,
cv_folds=cv_folds,
max_pipeline_fit_time=max_pipeline_fit_time,
collect_intermediate_metric=collect_intermediate_metric,
)
return self
def setup_data_preprocessing(
self,
safe_mode: bool = DEFAULT_VALUE,
use_input_preprocessing: bool = DEFAULT_VALUE,
use_preprocessing_cache: bool = DEFAULT_VALUE,
) -> FedotBuilder:
""" Sets parameters of input data preprocessing.
Args:
safe_mode: if set ``True`` it will cut large datasets to prevent memory overflow and use label encoder
instead of one-hot encoder if summary cardinality of categorical features is high.
Default value is ``False``.
use_input_preprocessing: indicates whether to do preprocessing of further given data.
Defaults to ``True``.
use_preprocessing_cache: bool indicating whether to use optional preprocessors caching.
Defaults to ``True``.
Returns:
:class:`FedotBuilder` instance.
"""
self.__update_params(
safe_mode=safe_mode,
use_input_preprocessing=use_input_preprocessing,
use_preprocessing_cache=use_preprocessing_cache,
)
return self
def build(self) -> Fedot:
""" Initializes an instance of :class:`~fedot.api.main.Fedot` with accumulated parameters.
Returns:
:class:`~fedot.api.main.Fedot` instance.
"""
return Fedot(**self.api_params)