automl
diff --git a/‎development/_downloads/3b0b756ccfcac69e6a1673e56f2f543f/example_visualization.ipynb
+1-1 b/‎development/_downloads/3b0b756ccfcac69e6a1673e56f2f543f/example_visualization.ipynb
+1-1
diff --git a/‎development/_downloads/83b132442a977189d27fd09560b2bd34/example_single_configuration.ipynb
+126 b/‎development/_downloads/83b132442a977189d27fd09560b2bd34/example_single_configuration.ipynb
+126
diff --git a/‎development/_downloads/a4083e360cf01f594602cbaf737091b0/example_visualization.py
-15 b/‎development/_downloads/a4083e360cf01f594602cbaf737091b0/example_visualization.py
-15
diff --git a/‎development/_downloads/b71f91b170d4a46b842cd9568511ced2/example_single_configuration.py
+81 b/‎development/_downloads/b71f91b170d4a46b842cd9568511ced2/example_single_configuration.py
+81
diff --git a/‎development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
2.75 KB b/‎development/_downloads/bc82bea3a5dd7bdba60b65220891d9e5/examples_python.zip
2.75 KB
diff --git a/‎development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
4.38 KB b/‎development/_downloads/fb625db3c50d423b1b7881136ffdeec8/examples_jupyter.zip
4.38 KB
diff --git a/‎development/_images/sphx_glr_example_plot_over_time_001.png
-395 Bytes b/‎development/_images/sphx_glr_example_plot_over_time_001.png
-395 Bytes
diff --git a/‎development/_images/sphx_glr_example_plot_over_time_thumb.png
-1.21 KB b/‎development/_images/sphx_glr_example_plot_over_time_thumb.png
-1.21 KB
diff --git a/‎development/_images/sphx_glr_example_single_configuration_thumb.png
26.2 KB b/‎development/_images/sphx_glr_example_single_configuration_thumb.png
26.2 KB
diff --git a/‎development/_images/sphx_glr_example_visualization_001.png
3.11 KB b/‎development/_images/sphx_glr_example_visualization_001.png
3.11 KB
diff --git a/‎development/_images/sphx_glr_example_visualization_002.png
-17.7 KB b/‎development/_images/sphx_glr_example_visualization_002.png
-17.7 KB
diff --git a/‎development/_images/sphx_glr_example_visualization_thumb.png
2.32 KB b/‎development/_images/sphx_glr_example_visualization_thumb.png
2.32 KB
@@ -98,7 +98,7 @@
       },
       "outputs": [],
       "source": [
-        "# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n    if run_value.status != StatusType.SUCCESS:\n        # Ignore crashed runs\n        continue\n    individual_performances.append({\n        'Timestamp': pd.Timestamp(\n            time.strftime(\n                '%Y-%m-%d %H:%M:%S',\n                time.localtime(run_value.endtime)\n            )\n        ),\n        'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n        'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n        accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n    })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n    'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n    'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n    'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n    'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n    ensemble_performance_frame,\n    individual_performance_frame,\n    on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n    x='Timestamp',\n    kind='line',\n    legend=True,\n    title='Auto-PyTorch accuracy over time',\n    grid=True,\n)\nplt.show()\n\n# We then can understand the importance of each input feature using\n# a permutation importance analysis. This is done as a proof of concept, to\n# showcase that we can leverage of scikit-learn API.\nresult = permutation_importance(estimator, X_train, y_train, n_repeats=5,\n                                scoring='accuracy',\n                                random_state=seed)\nsorted_idx = result.importances_mean.argsort()\n\nfig, ax = plt.subplots()\nax.boxplot(result.importances[sorted_idx].T,\n           vert=False, labels=X_test.columns[sorted_idx])\nax.set_title(\"Permutation Importances (Train set)\")\nfig.tight_layout()\nplt.show()"
+        "# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n    if run_value.status != StatusType.SUCCESS:\n        # Ignore crashed runs\n        continue\n    individual_performances.append({\n        'Timestamp': pd.Timestamp(\n            time.strftime(\n                '%Y-%m-%d %H:%M:%S',\n                time.localtime(run_value.endtime)\n            )\n        ),\n        'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n        'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n        accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n    })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n    'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n    'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n    'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n    'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n    ensemble_performance_frame,\n    individual_performance_frame,\n    on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n    x='Timestamp',\n    kind='line',\n    legend=True,\n    title='Auto-PyTorch accuracy over time',\n    grid=True,\n)\nplt.show()"
       ]
     }
   ],
 
@@ -0,0 +1,126 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Fit a single configuration\n*Auto-PyTorch* searches for the best combination of machine learning algorithms\nand their hyper-parameter configuration for a given task.\nThis example shows how one can fit one of these pipelines, both, with a user defined\nconfiguration, and a randomly sampled one form the configuration space.\nThe pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can\nget further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import os\nimport tempfile as tmp\nimport warnings\n\nos.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()\nos.environ['OMP_NUM_THREADS'] = '1'\nos.environ['OPENBLAS_NUM_THREADS'] = '1'\nos.environ['MKL_NUM_THREADS'] = '1'\n\nwarnings.simplefilter(action='ignore', category=UserWarning)\nwarnings.simplefilter(action='ignore', category=FutureWarning)\n\nimport sklearn.datasets\nimport sklearn.metrics\n\nfrom autoPyTorch.api.tabular_classification import TabularClassificationTask\nfrom autoPyTorch.datasets.resampling_strategy import HoldoutValTypes"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Data Loading\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)\nX_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(\n    X, y, test_size=0.5, random_state=3\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Define an estimator\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "estimator = TabularClassificationTask(\n    resampling_strategy=HoldoutValTypes.holdout_validation,\n    resampling_strategy_args={'val_share': 0.5},\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Get a configuration of the pipeline for current dataset\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "dataset = estimator.get_dataset(X_train=X_train,\n                                y_train=y_train,\n                                X_test=X_test,\n                                y_test=y_test,\n                                dataset_name='kr-vs-kp')\nconfiguration = estimator.get_search_space(dataset).get_default_configuration()\n\nprint(\"Passed Configuration:\", configuration)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Fit the configuration\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,\n                                                                configuration=configuration,\n                                                                budget_type='epochs',\n                                                                budget=10,\n                                                                run_time_limit_secs=100\n                                                                )\n\n# The fit_pipeline command also returns a named tuple with the pipeline constraints\nprint(run_info)\n\n# The fit_pipeline command also returns a named tuple with train/test performance\nprint(run_value)\n\n# This object complies with Scikit-Learn Pipeline API.\n# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html\nprint(pipeline.named_steps)"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -149,18 +149,3 @@
     grid=True,
 )
 plt.show()
-
-# We then can understand the importance of each input feature using
-# a permutation importance analysis. This is done as a proof of concept, to
-# showcase that we can leverage of scikit-learn API.
-result = permutation_importance(estimator, X_train, y_train, n_repeats=5,
-                                scoring='accuracy',
-                                random_state=seed)
-sorted_idx = result.importances_mean.argsort()
-
-fig, ax = plt.subplots()
-ax.boxplot(result.importances[sorted_idx].T,
-           vert=False, labels=X_test.columns[sorted_idx])
-ax.set_title("Permutation Importances (Train set)")
-fig.tight_layout()
-plt.show()
@@ -0,0 +1,81 @@
+# -*- encoding: utf-8 -*-
+"""
+==========================
+Fit a single configuration
+==========================
+*Auto-PyTorch* searches for the best combination of machine learning algorithms
+and their hyper-parameter configuration for a given task.
+This example shows how one can fit one of these pipelines, both, with a user defined
+configuration, and a randomly sampled one form the configuration space.
+The pipelines that Auto-PyTorch fits are compatible with Scikit-Learn API. You can
+get further documentation about Scikit-Learn models here: <https://scikit-learn.org/stable/getting_started.html`>_
+"""
+import os
+import tempfile as tmp
+import warnings
+
+os.environ['JOBLIB_TEMP_FOLDER'] = tmp.gettempdir()
+os.environ['OMP_NUM_THREADS'] = '1'
+os.environ['OPENBLAS_NUM_THREADS'] = '1'
+os.environ['MKL_NUM_THREADS'] = '1'
+
+warnings.simplefilter(action='ignore', category=UserWarning)
+warnings.simplefilter(action='ignore', category=FutureWarning)
+
+import sklearn.datasets
+import sklearn.metrics
+
+from autoPyTorch.api.tabular_classification import TabularClassificationTask
+from autoPyTorch.datasets.resampling_strategy import HoldoutValTypes
+
+
+############################################################################
+# Data Loading
+# ============
+
+X, y = sklearn.datasets.fetch_openml(data_id=3, return_X_y=True, as_frame=True)
+X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
+    X, y, test_size=0.5, random_state=3
+)
+
+############################################################################
+# Define an estimator
+# ===================
+
+estimator = TabularClassificationTask(
+    resampling_strategy=HoldoutValTypes.holdout_validation,
+    resampling_strategy_args={'val_share': 0.5},
+)
+
+############################################################################
+# Get a configuration of the pipeline for current dataset
+# ===============================================================
+
+dataset = estimator.get_dataset(X_train=X_train,
+                                y_train=y_train,
+                                X_test=X_test,
+                                y_test=y_test,
+                                dataset_name='kr-vs-kp')
+configuration = estimator.get_search_space(dataset).get_default_configuration()
+
+print("Passed Configuration:", configuration)
+###########################################################################
+# Fit the configuration
+# =====================
+
+pipeline, run_info, run_value, dataset = estimator.fit_pipeline(dataset=dataset,
+                                                                configuration=configuration,
+                                                                budget_type='epochs',
+                                                                budget=10,
+                                                                run_time_limit_secs=100
+                                                                )
+
+# The fit_pipeline command also returns a named tuple with the pipeline constraints
+print(run_info)
+
+# The fit_pipeline command also returns a named tuple with train/test performance
+print(run_value)
+
+# This object complies with Scikit-Learn Pipeline API.
+# https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html
+print(pipeline.named_steps)
Original file line number	Diff line number	Diff line change
`@@ -98,7 +98,7 @@`
`98`	`98`	`},`
`99`	`99`	`"outputs": [],`
`100`	`100`	`"source": [`
`101`		- "# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n if run_value.status != StatusType.SUCCESS:\n # Ignore crashed runs\n continue\n individual_performances.append({\n 'Timestamp': pd.Timestamp(\n time.strftime(\n '%Y-%m-%d %H:%M:%S',\n time.localtime(run_value.endtime)\n )\n ),\n 'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n 'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n 'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n 'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n 'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n 'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n ensemble_performance_frame,\n individual_performance_frame,\n on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n x='Timestamp',\n kind='line',\n legend=True,\n title='Auto-PyTorch accuracy over time',\n grid=True,\n)\nplt.show()\n\n# We then can understand the importance of each input feature using\n# a permutation importance analysis. This is done as a proof of concept, to\n# showcase that we can leverage of scikit-learn API.\nresult = permutation_importance(estimator, X_train, y_train, n_repeats=5,\n scoring='accuracy',\n random_state=seed)\nsorted_idx = result.importances_mean.argsort()\n\nfig, ax = plt.subplots()\nax.boxplot(result.importances[sorted_idx].T,\n vert=False, labels=X_test.columns[sorted_idx])\nax.set_title(\"Permutation Importances (Train set)\")\nfig.tight_layout()\nplt.show()"
	`101`	+ "# We will plot the search incumbent through time.\n\n# Collect the performance of individual machine learning algorithms\n# found by SMAC\nindividual_performances = []\nfor run_key, run_value in estimator.run_history.data.items():\n if run_value.status != StatusType.SUCCESS:\n # Ignore crashed runs\n continue\n individual_performances.append({\n 'Timestamp': pd.Timestamp(\n time.strftime(\n '%Y-%m-%d %H:%M:%S',\n time.localtime(run_value.endtime)\n )\n ),\n 'single_best_optimization_accuracy': accuracy._optimum - run_value.cost,\n 'single_best_test_accuracy': np.nan if run_value.additional_info is None else\n accuracy._optimum - run_value.additional_info['test_loss']['accuracy'],\n })\nindividual_performance_frame = pd.DataFrame(individual_performances)\n\n# Collect the performance of the ensemble through time\n# This ensemble is built from the machine learning algorithms\n# found by SMAC\nensemble_performance_frame = pd.DataFrame(estimator.ensemble_performance_history)\n\n# As we are tracking the incumbent, we are interested in the cummax() performance\nensemble_performance_frame['ensemble_optimization_accuracy'] = ensemble_performance_frame[\n 'train_accuracy'\n].cummax()\nensemble_performance_frame['ensemble_test_accuracy'] = ensemble_performance_frame[\n 'test_accuracy'\n].cummax()\nensemble_performance_frame.drop(columns=['test_accuracy', 'train_accuracy'], inplace=True)\nindividual_performance_frame['single_best_optimization_accuracy'] = individual_performance_frame[\n 'single_best_optimization_accuracy'\n].cummax()\nindividual_performance_frame['single_best_test_accuracy'] = individual_performance_frame[\n 'single_best_test_accuracy'\n].cummax()\n\npd.merge(\n ensemble_performance_frame,\n individual_performance_frame,\n on=\"Timestamp\", how='outer'\n).sort_values('Timestamp').fillna(method='ffill').plot(\n x='Timestamp',\n kind='line',\n legend=True,\n title='Auto-PyTorch accuracy over time',\n grid=True,\n)\nplt.show()"
`102`	`102`	`]`
`103`	`103`	`}`
`104`	`104`	`],`