From d5bdfd93584882e88febe3f6cfb480e841b3f006 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 14 Sep 2021 09:28:09 +0200
Subject: [PATCH 1/9] [doc] Add module doc-string and TODOs to base_task.py

---
 autoPyTorch/api/base_task.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
diff --git a/autoPyTorch/api/base_task.py b/autoPyTorch/api/base_task.py
index 94add94bd..8687b77cd 100644
--- a/autoPyTorch/api/base_task.py
+++ b/autoPyTorch/api/base_task.py
@@ -1,3 +1,18 @@
+"""Base class for tasks to solve
+* The shared components among all the tasks
+* This module provides the optimization given a pipeline
+* This module plays a role of communicating with
+  distributed clients
+
+TODO:
+    * Separate the training procedure by another class and encapsulate it
+    * Separate _do_dummy_prediction and refactor it
+    * Separate _do_traditional_prediction and refactor it
+    * Refactor _search
+    * Reduce unimportant instance variables
+    * Use private variables and public variables by _<var name>
+"""
+
 import copy
 import json
 import logging.handlers

From 13899df6928dfe2e7c19eb5e85a2ec7ad6a76047 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 14 Sep 2021 10:31:24 +0200
Subject: [PATCH 2/9] [doc] Add module doc-string and TODOs to
 base_feature_validator.py

---
 autoPyTorch/data/base_feature_validator.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/autoPyTorch/data/base_feature_validator.py b/autoPyTorch/data/base_feature_validator.py
index 2ef02ceba..6955dff8b 100644
--- a/autoPyTorch/data/base_feature_validator.py
+++ b/autoPyTorch/data/base_feature_validator.py
@@ -1,3 +1,15 @@
+"""Base class for the feature validator given a task
+* A wrapper class of the sklearn.base.BaseEstimator
+* The feature validator for each task inherits this class
+* Check if the provided feature can be processed in AutoPytorch
+
+TODO:
+    * SUPPORTED_FEAT_TYPES --> Enumerator
+    * Describe the shape of X
+    * typing.<type> --> <type>
+    * logging.Logger --> Logger
+"""
+
 import logging
 import typing
 

From f8957453396e3a88a2a5202ed5da0440e72e4a3a Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 14 Sep 2021 10:41:24 +0200
Subject: [PATCH 3/9] [doc] Add module doc-string and TODOs to
 base_target_validator.py

---
 autoPyTorch/data/base_target_validator.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/autoPyTorch/data/base_target_validator.py b/autoPyTorch/data/base_target_validator.py
index 44e73d42a..e3018e839 100644
--- a/autoPyTorch/data/base_target_validator.py
+++ b/autoPyTorch/data/base_target_validator.py
@@ -1,3 +1,19 @@
+"""Base class for the target (or label) validator given a task
+* A wrapper class of the sklearn.base.BaseEstimator
+* The target validator for each task inherits this class
+* Check if the provided targets (or labels) are compatible in both
+  training and test
+
+TODO:
+    * SUPPORTED_FEAT_TYPES --> Enumerator
+    * Describe the shape of y
+    * typing.<type> --> <type>
+    * logging.Logger --> Logger
+    * Rename classes_ --> get_classes
+    * Check the return of classes_
+    * is_single_column_target --> is_target_scalar
+"""
+
 import logging
 import typing
 
@@ -31,12 +47,13 @@ class BaseTargetValidator(BaseEstimator):
     """
     A class to pre-process targets. It validates the data provided during fit (to make sure
     it matches AutoPyTorch expectation) as well as encoding the targets in case of classification
+
     Attributes:
         is_classification (bool):
             A bool that indicates if the validator should operate in classification mode.
             During classification, the targets are encoded.
         encoder (typing.Optional[BaseEstimator]):
-            Host a encoder object if the data requires transformation (for example,
+            Host an encoder object if the data requires transformation (for example,
             if provided a categorical column in a pandas DataFrame)
         enc_columns (typing.List[str])
             List of columns that where encoded
@@ -175,7 +192,7 @@ def classes_(self) -> np.ndarray:
         Complies with scikit learn classes_ attribute,
         which consist of a ndarray of shape (n_classes,)
         where n_classes are the number of classes seen while fitting
-        a encoder to the targets.
+        an encoder to the targets.
         Returns:
             classes_: np.ndarray
                 The unique classes seen during encoding of a classifier

From f3282d9cf63ec37b8d98dc3c234755a394c8721f Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 14 Sep 2021 10:47:43 +0200
Subject: [PATCH 4/9] [doc] Add module doc-string and TODOs to
 base_validator.py

---
 autoPyTorch/data/base_validator.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/autoPyTorch/data/base_validator.py b/autoPyTorch/data/base_validator.py
index 7528d56ab..ff782c526 100644
--- a/autoPyTorch/data/base_validator.py
+++ b/autoPyTorch/data/base_validator.py
@@ -1,3 +1,16 @@
+"""Base class for the input validator given a task
+* A wrapper class of the sklearn.base.BaseEstimator
+* The input validator for each task inherits this class
+* Check if the provided data are compatible with AutoPytorch implementation
+* Manage both target_ and feature_validator in this class
+
+TODO:
+    * typing.<type> --> <type>
+    * logging.Logger --> Logger
+    * Inherit feature_validator and target_validator from a child class
+      via super().__init__()
+"""
+
 # -*- encoding: utf-8 -*-
 import logging.handlers
 import typing

From 8f864e6897f746187b48931a9284c496b90abfb4 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Tue, 14 Sep 2021 11:08:10 +0200
Subject: [PATCH 5/9] [doc] Add module doc-string and TODOs to base_dataset.py

---
 autoPyTorch/constants.py             |  7 +++++++
 autoPyTorch/datasets/base_dataset.py | 16 ++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/autoPyTorch/constants.py b/autoPyTorch/constants.py
index 652a546b9..de77f440d 100644
--- a/autoPyTorch/constants.py
+++ b/autoPyTorch/constants.py
@@ -1,3 +1,10 @@
+"""Constant variables in AutoPytorch
+
+TODO:
+    * Make everything enumerators
+    * Avoid the usage of integers
+"""
+
 TABULAR_CLASSIFICATION = 1
 IMAGE_CLASSIFICATION = 2
 TABULAR_REGRESSION = 3
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index 15a6dedf9..8393140e0 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -1,3 +1,19 @@
+"""Base class of the provided dataset
+* Provide data validation splits based on types of data
+* Provide API to return training and validation splits
+* Storage the properties of the dataset which are required
+  in AutoPytorch implementation
+
+TODO:
+    * Address: https://github.com/automl/Auto-PyTorch/pull/108/
+    * Make BaseDatasetPropertiesType more informative
+    * Use private variables and public variables properly
+    * Consider more memory-efficient way to store splits
+        ==> It will be so much memory consumption for huge datasets
+    * Check the usage of validation and test because cross validation
+      only uses the training dataset
+"""
+
 import os
 import uuid
 from abc import ABCMeta

From f5765ba509b25689f19e16bef360dd4bb1985aad Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Wed, 15 Sep 2021 09:56:43 +0200
Subject: [PATCH 6/9] [doc] Add module doc-string and TODOs to
 resampling_strategy.py

---
 autoPyTorch/datasets/resampling_strategy.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
index ac96c934a..0df54866c 100644
--- a/autoPyTorch/datasets/resampling_strategy.py
+++ b/autoPyTorch/datasets/resampling_strategy.py
@@ -1,3 +1,19 @@
+"""Functions for resampling strategy or cross validation
+* Each function is used in BaseDataset to provide dataset splits
+
+TODO:
+    * DEFAULT_RESAMPLING_PARAMETERS --> keyword arguments
+    * documentation strings
+    * Make shuffle and stratified arguments rather than
+      independent methods
+    * Force the instantiation of each splitting methods
+        ==> instance variables tell you what kind of splitting
+    * Delete protocol and enumerator because we do not need
+      once we make them classes that require instantiation
+    * resampling_strategy --> splitting_fn
+    * resampling_strategy_args --> splitting_params
+"""
+
 from enum import IntEnum
 from typing import Any, Dict, List, Optional, Tuple, Union
 

From 151055ee5eab8c3898f78abea37eb7edb3203d1e Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Wed, 15 Sep 2021 13:18:49 +0200
Subject: [PATCH 7/9] [doc] Add module doc-string and TODOs to ensemble/ except
 ensemble_builder.py

---
 autoPyTorch/ensemble/abstract_ensemble.py   |  8 +++++++
 autoPyTorch/ensemble/ensemble_selection.py  | 23 +++++++++++++++++++++
 autoPyTorch/ensemble/singlebest_ensemble.py | 12 +++++++++++
 3 files changed, 43 insertions(+)

diff --git a/autoPyTorch/ensemble/abstract_ensemble.py b/autoPyTorch/ensemble/abstract_ensemble.py
index 072b6d260..6c22d5ced 100644
--- a/autoPyTorch/ensemble/abstract_ensemble.py
+++ b/autoPyTorch/ensemble/abstract_ensemble.py
@@ -1,3 +1,11 @@
+"""The abstract class of ensemble classes
+* Provide methods that must be overridden by the child class
+
+TODO:
+    * Add `raise NotImplementedError`
+    * model_identifiers --> List[<NamedTuple with an appropriate name>]
+"""
+
 from abc import ABCMeta, abstractmethod
 from typing import Any, Dict, List, Tuple, Union
 
diff --git a/autoPyTorch/ensemble/ensemble_selection.py b/autoPyTorch/ensemble/ensemble_selection.py
index b8f379e55..607533651 100644
--- a/autoPyTorch/ensemble/ensemble_selection.py
+++ b/autoPyTorch/ensemble/ensemble_selection.py
@@ -1,3 +1,26 @@
+"""The title of the module description  # noqa
+* Describe at the beginning of the source code.
+* Describe before the package imports
+
+TODO:
+    * Add the following
+    References:
+        Title: Ensemble Selection from Libraries of Models
+        Authors: Rich Caruana et. al.
+        URL: https://www.cs.cornell.edu/~alexn/papers/shotgun.icml04.revised.rev2.pdf
+
+    * `A copy of self` --> check if it is really true
+    * Change `<variable>_` to `_<variable>`
+    * get_models_with_weights --> looks sort by descending of weights
+    * soft voting ==> explanation
+    References:
+        Title: Consensus Based Ensembles of Soft Clusterings
+        Authors: Kunal Punera and Joydeep Ghosh
+        URL: https://www.researchgate.net/profile/Joydeep-Ghosh-8/publication/221188694_Consensus_Based_Ensembles_of_Soft_Clusterings/links/02e7e521fe367e06c3000000/Consensus-Based-Ensembles-of-Soft-Clusterings.pdf
+    * _calculate_weights ==> what about np.sum(weights) > 1??
+    * Refactor _fit() and add the shape of predictions
+"""
+
 from collections import Counter
 from typing import Any, Dict, List, Tuple, Union
 
diff --git a/autoPyTorch/ensemble/singlebest_ensemble.py b/autoPyTorch/ensemble/singlebest_ensemble.py
index 881ae5fd2..78c8cd5d7 100644
--- a/autoPyTorch/ensemble/singlebest_ensemble.py
+++ b/autoPyTorch/ensemble/singlebest_ensemble.py
@@ -1,3 +1,15 @@
+"""Backup solution class for the crached searching
+* Provide the best configuration instead of an ensemble
+  with multiple models
+
+TODO:
+    * Change `<variable>_` to `_<variable>`
+    * Add more `raise <Error>` since this class is supposed
+      to be used in very specific situations
+    * Check the contexts where this class is called because
+      self.weights_ and self.indices_ are not clear enough
+"""
+
 import os
 from typing import Any, Dict, List, Tuple, Union
 

From 65bb6724eb24dd11e0d8ac05d6262ba5e01c7e3f Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Thu, 16 Sep 2021 09:54:13 +0200
Subject: [PATCH 8/9] [doc] Add module doc-string and TODOs to
 ensemble_builder.py

---
 autoPyTorch/ensemble/ensemble_builder.py | 27 ++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/autoPyTorch/ensemble/ensemble_builder.py b/autoPyTorch/ensemble/ensemble_builder.py
index a22d413f7..163eb97df 100644
--- a/autoPyTorch/ensemble/ensemble_builder.py
+++ b/autoPyTorch/ensemble/ensemble_builder.py
@@ -1,3 +1,30 @@
+"""The module that enables a build ensemble
+* EnsembleBuilderManager serves as a central system that
+  submit an EnsembleBuilder to dask
+* EnsembleBuilder builds an ensemble using pynisher
+  so that we can easily suppress the memory usage and runtime
+* EnsembleBuilder builds an ensemble using the configurations
+  that are observed in HPO
+
+TODO:
+    * Unused arguments in EnsembleBuilderManager.__call__
+    * Remove the argument `unit_test` and separate methods
+    with patch.object(<class name>, '<method name>', side_effect=MemoryError):
+        inst = <class name>(arguments)
+        inst.<method name>()  <== MemoryError
+
+    * Remove unneeded comments
+    * Make precision in a better way (enum, np.int32 ...)
+    * Separate `raise Error` methods in EnsembleBuilder
+        + run
+        + main
+        + compute_loss_per_model
+        + get_n_best_preds
+    * Separate more general function from EnsembleBuilder
+        + get_disk_consumption
+        + _read_np_fn
+"""
+
 # -*- encoding: utf-8 -*-
 import glob
 import gzip

From aeedc2f8eed1f1c37754e5bfc5f059b84f78d346 Mon Sep 17 00:00:00 2001
From: nabenabe0928 <shuhei.watanabe.utokyo@gmail.com>
Date: Mon, 27 Sep 2021 20:05:34 +0900
Subject: [PATCH 9/9] [doc/WIP] Add module doc string and ToDos in
 autoPytorch/evaluation/abstract_evaluator.py

---
 autoPyTorch/evaluation/abstract_evaluator.py | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/autoPyTorch/evaluation/abstract_evaluator.py b/autoPyTorch/evaluation/abstract_evaluator.py
index 0ba588276..893f20a14 100644
--- a/autoPyTorch/evaluation/abstract_evaluator.py
+++ b/autoPyTorch/evaluation/abstract_evaluator.py
@@ -1,3 +1,39 @@
+"""This module provides model estimator pipelines
+This module has the following pipelines:
+    - MyTraditionalTabularClassificationPipeline
+        Wrapper class for traditional ML classification methods
+        such as CatBoost, RandomForest
+    - MyTraditionalTabularRegressionPipeline
+        Wrapper class for traditional ML regression methods
+        such as RandomForest
+    - DummyClassificationPipeline
+        Wrapper class for dummy classifier in sklearn
+    - DummyRegressionPipeline
+        Wrapper class for dummy regressor in sklearn
+    - AbstractEvaluator
+        The interface for the pipeline evaluators
+        to optimize via SMAC
+
+Note: Dummy model is an estimator using a very simple rule
+      and this is used for the minimum baseline for each task.
+      https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyClassifier.html  # noqa: W291
+      https://scikit-learn.org/stable/modules/generated/sklearn.dummy.DummyRegressor.html  # noqa: W291
+
+TODO:
+    * Describe the definition of sample_weight
+    * import autoPyTorch.pipeline.xxx as shorter names
+    * Describe the shape of returns in predict and predict_proba
+    * Improve the documentation of additional_run_info
+    * Change get_pipeline_representation --> __repr__
+    * delete self.random_state, self.init_params, self.config,
+      self.dataset_properties, 
+      (because they are not used)
+    * [named_step](https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html)  # noqa: W291
+    * The typing of config in DummyXXXPipeline
+    * Add enumerator for additional_run_info
+    * Rename fit_and_suppress_warnings
+"""
+
 import logging.handlers
 import time
 import warnings