From 29855e33f834664122674f7d9d70049a05be4210 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 18:38:55 +0000
Subject: [PATCH 1/8] Fake message

---
 sklearn_questions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index fa02e0d..f2c6a61 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -2,6 +2,7 @@
 
 The goal of this assignment is to implement by yourself:
 
+
 - a scikit-learn estimator for the KNearestNeighbors for classification
   tasks and check that it is working properly.
 - a scikit-learn CV splitter where the splits are based on a Pandas

From 0ac1abed84b6fdadbe0210f3cc468b24336e7202 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 18:58:12 +0000
Subject: [PATCH 2/8] partial

---
 sklearn_questions.py | 122 +++++++++++++++++++------------------------
 1 file changed, 55 insertions(+), 67 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index f2c6a61..791bd0c 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -83,6 +83,11 @@ def fit(self, X, y):
         self : instance of KNearestNeighbors
             The current instance of the classifier
         """
+        X, y = check_X_y(X, y)
+        check_classification_targets(y)
+        self.X_train_ = X
+        self.y_train_ = y
+        self.classes_ = np.unique(y)
         return self
 
     def predict(self, X):
@@ -98,8 +103,18 @@ def predict(self, X):
         y : ndarray, shape (n_test_samples,)
             Predicted class labels for each test data sample.
         """
-        y_pred = np.zeros(X.shape[0])
-        return y_pred
+        check_is_fitted(self)
+        X = check_array(X)
+        
+        y_pred = []
+        for x in X:
+            distances = np.linalg.norm(self.X_train_ - x, axis=1)
+            neighbor_indices = np.argsort(distances)[:self.n_neighbors]
+            neighbor_labels = self.y_train_[neighbor_indices]
+            most_common_label = np.bincount(neighbor_labels).argmax()
+            y_pred.append(most_common_label)
+
+        return np.array(y_pred)
 
     def score(self, X, y):
         """Calculate the score of the prediction.
@@ -116,74 +131,47 @@ def score(self, X, y):
         score : float
             Accuracy of the model computed for the (X, y) pairs.
         """
-        return 0.
+        y_pred = self.predict(X)
 
+        return np.mean(y_pred == y)
 
-class MonthlySplit(BaseCrossValidator):
-    """CrossValidator based on monthly split.
-
-    Split data based on the given `time_col` (or default to index). Each split
-    corresponds to one month of data for the training and the next month of
-    data for the test.
-
-    Parameters
-    ----------
-    time_col : str, defaults to 'index'
-        Column of the input DataFrame that will be used to split the data. This
-        column should be of type datetime. If split is called with a DataFrame
-        for which this column is not a datetime, it will raise a ValueError.
-        To use the index as column just set `time_col` to `'index'`.
-    """
-
-    def __init__(self, time_col='index'):  # noqa: D107
-        self.time_col = time_col
-
-    def get_n_splits(self, X, y=None, groups=None):
-        """Return the number of splitting iterations in the cross-validator.
 
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            Training data, where `n_samples` is the number of samples
-            and `n_features` is the number of features.
-        y : array-like of shape (n_samples,)
-            Always ignored, exists for compatibility.
-        groups : array-like of shape (n_samples,)
-            Always ignored, exists for compatibility.
 
-        Returns
-        -------
-        n_splits : int
-            The number of splits.
-        """
-        return 0
-
-    def split(self, X, y, groups=None):
-        """Generate indices to split data into training and test set.
+class MonthlySplit(BaseCrossValidator):
+    """CrossValidator based on monthly split."""
 
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            Training data, where `n_samples` is the number of samples
-            and `n_features` is the number of features.
-        y : array-like of shape (n_samples,)
-            Always ignored, exists for compatibility.
-        groups : array-like of shape (n_samples,)
-            Always ignored, exists for compatibility.
-
-        Yields
-        ------
-        idx_train : ndarray
-            The training set indices for that split.
-        idx_test : ndarray
-            The testing set indices for that split.
-        """
+    def __init__(self, time_col='index'):
+        self.time_col = time_col
 
-        n_samples = X.shape[0]
-        n_splits = self.get_n_splits(X, y, groups)
-        for i in range(n_splits):
-            idx_train = range(n_samples)
-            idx_test = range(n_samples)
-            yield (
-                idx_train, idx_test
-            )
+    def get_n_splits(self, X, y=None, groups=None):
+        """Return the number of splitting iterations in the cross-validator."""
+        if self.time_col == 'index':
+            dates = X.index
+        else:
+            dates = X[self.time_col]
+
+        if not pd.api.types.is_datetime64_any_dtype(dates):
+            raise ValueError("The column used for time-based splitting should be of datetime type.")
+        
+        months = dates.to_period("M").unique()
+        return len(months) - 1
+
+    def split(self, X, y=None, groups=None):
+        """Generate indices to split data into training and test set."""
+        if self.time_col == 'index':
+            dates = X.index
+        else:
+            dates = X[self.time_col]
+
+        if not pd.api.types.is_datetime64_any_dtype(dates):
+            raise ValueError("The column used for time-based splitting should be of datetime type.")
+
+        # Ensure the index is of type Int64Index
+        # if isinstance(dates, pd.RangeIndex):
+        #     dates = dates.astype('int64')
+
+        months = dates.to_period("M").unique()
+        for i in range(len(months) - 1):
+            train_idx = dates[dates.to_period("M") == months[i]].index
+            test_idx = dates[dates.to_period("M") == months[i + 1]].index
+            yield train_idx, test_idx
\ No newline at end of file

From 4df5b3b582cb5bc81c3200f9b3c0fb40bb73ff61 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 22:56:32 +0000
Subject: [PATCH 3/8] updated

---
 sklearn_questions.py | 185 +++++++++++++++++++++++++------------------
 1 file changed, 108 insertions(+), 77 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 791bd0c..25033a2 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -61,117 +61,148 @@
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.metrics.pairwise import pairwise_distances
 
-
 class KNearestNeighbors(BaseEstimator, ClassifierMixin):
     """KNearestNeighbors classifier."""
 
-    def __init__(self, n_neighbors=1):  # noqa: D107
+    def __init__(self, n_neighbors=1):
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
-        """Fitting function.
-
-         Parameters
-        ----------
-        X : ndarray, shape (n_samples, n_features)
-            Data to train the model.
-        y : ndarray, shape (n_samples,)
-            Labels associated with the training data.
-
-        Returns
-        ----------
-        self : instance of KNearestNeighbors
-            The current instance of the classifier
-        """
+        """Fit the model using X as training data and y as target values."""
         X, y = check_X_y(X, y)
         check_classification_targets(y)
         self.X_train_ = X
         self.y_train_ = y
         self.classes_ = np.unique(y)
+        self.n_features_in_ = X.shape[1]  # Set the n_features_in_ attribute
         return self
 
     def predict(self, X):
-        """Predict function.
-
-        Parameters
-        ----------
-        X : ndarray, shape (n_test_samples, n_features)
-            Data to predict on.
-
-        Returns
-        ----------
-        y : ndarray, shape (n_test_samples,)
-            Predicted class labels for each test data sample.
-        """
-        check_is_fitted(self)
+        """Predict the class labels for the provided data."""
+        check_is_fitted(self, attributes=["X_train_", "y_train_", "classes_", "n_features_in_"])
         X = check_array(X)
         
-        y_pred = []
-        for x in X:
-            distances = np.linalg.norm(self.X_train_ - x, axis=1)
-            neighbor_indices = np.argsort(distances)[:self.n_neighbors]
+        if X.shape[1] != self.n_features_in_:
+            raise ValueError(f"Number of features of the input must be {self.n_features_in_}, but the input has {X.shape[1]} features.")
+
+        distances = pairwise_distances(X, self.X_train_)
+        y_pred = np.zeros(X.shape[0], dtype=int)
+
+        for i in range(X.shape[0]):
+            neighbor_indices = np.argsort(distances[i])[:self.n_neighbors]
             neighbor_labels = self.y_train_[neighbor_indices]
             most_common_label = np.bincount(neighbor_labels).argmax()
-            y_pred.append(most_common_label)
-
-        return np.array(y_pred)
+            y_pred[i] = most_common_label
+        
+        return self.classes_[y_pred]
 
     def score(self, X, y):
-        """Calculate the score of the prediction.
-
-        Parameters
-        ----------
-        X : ndarray, shape (n_samples, n_features)
-            Data to score on.
-        y : ndarray, shape (n_samples,)
-            target values.
-
-        Returns
-        ----------
-        score : float
-            Accuracy of the model computed for the (X, y) pairs.
-        """
+        """Return the mean accuracy on the given test data and labels."""
+        y = check_array(y, ensure_2d=False, dtype=int)
         y_pred = self.predict(X)
-
         return np.mean(y_pred == y)
 
 
-
 class MonthlySplit(BaseCrossValidator):
-    """CrossValidator based on monthly split."""
+    """CrossValidator based on monthly split.
+
+    Split data based on the given `time_col` (or default to index). Each split
+    corresponds to one month of data for the training and the next month of
+    data for the test.
+
+    Parameters
+    ----------
+    time_col : str, defaults to 'index'
+        Column of the input DataFrame that will be used to split the data. This
+        column should be of type datetime. If split is called with a DataFrame
+        for which this column is not a datetime, it will raise a ValueError.
+        To use the index as column just set `time_col` to `'index'`.
+    """
 
     def __init__(self, time_col='index'):
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
-        """Return the number of splitting iterations in the cross-validator."""
-        if self.time_col == 'index':
-            dates = X.index
-        else:
-            dates = X[self.time_col]
+        """Return the number of splitting iterations in the cross-validator.
 
-        if not pd.api.types.is_datetime64_any_dtype(dates):
-            raise ValueError("The column used for time-based splitting should be of datetime type.")
-        
-        months = dates.to_period("M").unique()
-        return len(months) - 1
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+        y : array-like of shape (n_samples,)
+            Always ignored, exists for compatibility.
+        groups : array-like of shape (n_samples,)
+            Always ignored, exists for compatibility.
 
-    def split(self, X, y=None, groups=None):
-        """Generate indices to split data into training and test set."""
+        Returns
+        -------
+        n_splits : int
+            The number of splits.
+        """
+        _, time_col_unique = self.get_n_splits_col(X)
+        return len(time_col_unique) - 1
+
+    def get_n_splits_col(self, X):
+        """Get the time column and unique values of the time column.
+
+        Parameters
+        ----------
+        X : DataFrame
+            Data to split.
+
+        Returns
+        -------
+        time_col : pd.DatetimeIndex
+            The time column of the input data.
+        time_col_unique : pd.PeriodIndex
+            Unique values of the time column.
+        """
         if self.time_col == 'index':
-            dates = X.index
+            if not isinstance(X.index, pd.DatetimeIndex):
+                raise TypeError(
+                    f"The column '{self.time_col}' is not a datetime."
+                )
+            time_col = X.index
         else:
-            dates = X[self.time_col]
+            if not pd.api.types.is_datetime64_any_dtype(X[self.time_col]):
+                raise ValueError(
+                    f"The column '{self.time_col}' is not a datetime."
+                )
+            time_col = pd.to_datetime(X[self.time_col])
 
-        if not pd.api.types.is_datetime64_any_dtype(dates):
-            raise ValueError("The column used for time-based splitting should be of datetime type.")
+        if not isinstance(time_col, pd.DatetimeIndex):
+            time_col = pd.DatetimeIndex(time_col)
+        time_col_unique = time_col.to_period("M").unique()
+        return time_col, time_col_unique
 
-        # Ensure the index is of type Int64Index
-        # if isinstance(dates, pd.RangeIndex):
-        #     dates = dates.astype('int64')
+    def split(self, X, y=None, groups=None):
+        """Generate indices to split data into training and test set.
 
-        months = dates.to_period("M").unique()
-        for i in range(len(months) - 1):
-            train_idx = dates[dates.to_period("M") == months[i]].index
-            test_idx = dates[dates.to_period("M") == months[i + 1]].index
-            yield train_idx, test_idx
\ No newline at end of file
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+        y : array-like of shape (n_samples,)
+            Always ignored, exists for compatibility.
+        groups : array-like of shape (n_samples,)
+            Always ignored, exists for compatibility.
+
+        Yields
+        ------
+        idx_train : ndarray
+            The training set indices for that split.
+        idx_test : ndarray
+            The testing set indices for that split.
+        """
+        time_col, time_col_unique = self.get_n_splits_col(X)
+        n_splits = self.get_n_splits(X)
+        time_col_unique = sorted(time_col_unique)
+        for i in range(n_splits):
+            train_mask = time_col.to_period('M').isin([time_col_unique[i]])
+            idx_train = np.where(train_mask)[0]
+            test_mask = time_col.to_period('M').isin([time_col_unique[i + 1]])
+            idx_test = np.where(test_mask)[0]
+
+            yield idx_train, idx_test
\ No newline at end of file

From 72b0bc67dffe43aee528f3e39fd163e99bef31c4 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 23:12:30 +0000
Subject: [PATCH 4/8] updated

---
 sklearn_questions.py | 198 +++++++++++++++++++++++++++----------------
 1 file changed, 125 insertions(+), 73 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 25033a2..7d64865 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -2,7 +2,6 @@
 
 The goal of this assignment is to implement by yourself:
 
-
 - a scikit-learn estimator for the KNearestNeighbors for classification
   tasks and check that it is working properly.
 - a scikit-learn CV splitter where the splits are based on a Pandas
@@ -48,6 +47,7 @@
 
 to compute distances between 2 sets of samples.
 """
+
 import numpy as np
 import pandas as pd
 
@@ -55,52 +55,98 @@
 from sklearn.base import ClassifierMixin
 
 from sklearn.model_selection import BaseCrossValidator
+from sklearn.preprocessing import LabelEncoder
+
 
 from sklearn.utils.validation import check_X_y, check_is_fitted
-from sklearn.utils.validation import check_array
-from sklearn.utils.multiclass import check_classification_targets
+from sklearn.utils.validation import validate_data
+from sklearn.utils.multiclass import unique_labels
 from sklearn.metrics.pairwise import pairwise_distances
 
-class KNearestNeighbors(BaseEstimator, ClassifierMixin):
+
+class KNearestNeighbors(ClassifierMixin, BaseEstimator):
     """KNearestNeighbors classifier."""
 
-    def __init__(self, n_neighbors=1):
+    def __init__(self, n_neighbors=1): 
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
-        """Fit the model using X as training data and y as target values."""
-        X, y = check_X_y(X, y)
-        check_classification_targets(y)
-        self.X_train_ = X
-        self.y_train_ = y
-        self.classes_ = np.unique(y)
-        self.n_features_in_ = X.shape[1]  # Set the n_features_in_ attribute
+        """Fitting function.
+
+         Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            Data to train the model.
+        y : ndarray, shape (n_samples,)
+            Labels associated with the training data.
+
+        Returns
+        ----------
+        self : instance of KNearestNeighbors
+            The current instance of the classifier
+        """
+        self.classes_ = unique_labels(y)
+        X, y = validate_data(self, X, y, reset=True)
+
+        self.label_encoder_ = LabelEncoder()
+        self.X_ = X
+
+        self.y_ = self.label_encoder_.fit_transform(y)
+        self.is_fitted_ = True
+
         return self
 
     def predict(self, X):
-        """Predict the class labels for the provided data."""
-        check_is_fitted(self, attributes=["X_train_", "y_train_", "classes_", "n_features_in_"])
-        X = check_array(X)
-        
-        if X.shape[1] != self.n_features_in_:
-            raise ValueError(f"Number of features of the input must be {self.n_features_in_}, but the input has {X.shape[1]} features.")
-
-        distances = pairwise_distances(X, self.X_train_)
+        """Predict function.
+
+        Parameters
+        ----------
+        X : ndarray, shape (n_test_samples, n_features)
+            Data to predict on.
+
+        Returns
+        ----------
+        y : ndarray, shape (n_test_samples,)
+            Predicted class labels for each test data sample.
+        """
+        check_is_fitted(self)
+        X = validate_data(self, X, reset=False, dtype=float)
+
         y_pred = np.zeros(X.shape[0], dtype=int)
 
-        for i in range(X.shape[0]):
-            neighbor_indices = np.argsort(distances[i])[:self.n_neighbors]
-            neighbor_labels = self.y_train_[neighbor_indices]
-            most_common_label = np.bincount(neighbor_labels).argmax()
-            y_pred[i] = most_common_label
-        
-        return self.classes_[y_pred]
+        distance_mat = pairwise_distances(X, self.X_).argsort(axis=1)
+
+        index_min_dist = distance_mat[:, : self.n_neighbors]
+
+        for ind, row in enumerate(index_min_dist):
+            val = self.y_[row] 
+            nearest_neigh = np.bincount(val).argmax()
+            y_pred[ind] = nearest_neigh
+
+        y_pred = self.label_encoder_.inverse_transform(y_pred)
+
+        return y_pred
 
     def score(self, X, y):
-        """Return the mean accuracy on the given test data and labels."""
-        y = check_array(y, ensure_2d=False, dtype=int)
+        """Calculate the score of the prediction.
+
+        Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            Data to score on.
+        y : ndarray, shape (n_samples,)
+            target values.
+
+        Returns
+        ----------
+        score : float
+            Accuracy of the model computed for the (X, y) pairs.
+        """
+        X, y = check_X_y(X, y)
+
         y_pred = self.predict(X)
-        return np.mean(y_pred == y)
+        acc = (y_pred == y).sum() / len(y)
+        return acc
 
 
 class MonthlySplit(BaseCrossValidator):
@@ -119,7 +165,7 @@ class MonthlySplit(BaseCrossValidator):
         To use the index as column just set `time_col` to `'index'`.
     """
 
-    def __init__(self, time_col='index'):
+    def __init__(self, time_col="index"): 
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
@@ -140,43 +186,20 @@ def get_n_splits(self, X, y=None, groups=None):
         n_splits : int
             The number of splits.
         """
-        _, time_col_unique = self.get_n_splits_col(X)
-        return len(time_col_unique) - 1
 
-    def get_n_splits_col(self, X):
-        """Get the time column and unique values of the time column.
+        if not self.time_col == "index":
+            if np.dtype(X[self.time_col]) != np.dtype("datetime64[ns]"):
+                raise ValueError("Time column should be a datetime object")
+            X_mem = X.set_index(self.time_col).copy()
+        else:
+            X_mem = X.copy()
+            if X_mem.index.dtype != np.dtype("datetime64[ns]"):
+                raise ValueError("Time column should be a datetime object")
 
-        Parameters
-        ----------
-        X : DataFrame
-            Data to split.
+        n_split = len(X_mem.resample("ME")) - 1
+        return n_split
 
-        Returns
-        -------
-        time_col : pd.DatetimeIndex
-            The time column of the input data.
-        time_col_unique : pd.PeriodIndex
-            Unique values of the time column.
-        """
-        if self.time_col == 'index':
-            if not isinstance(X.index, pd.DatetimeIndex):
-                raise TypeError(
-                    f"The column '{self.time_col}' is not a datetime."
-                )
-            time_col = X.index
-        else:
-            if not pd.api.types.is_datetime64_any_dtype(X[self.time_col]):
-                raise ValueError(
-                    f"The column '{self.time_col}' is not a datetime."
-                )
-            time_col = pd.to_datetime(X[self.time_col])
-
-        if not isinstance(time_col, pd.DatetimeIndex):
-            time_col = pd.DatetimeIndex(time_col)
-        time_col_unique = time_col.to_period("M").unique()
-        return time_col, time_col_unique
-
-    def split(self, X, y=None, groups=None):
+    def split(self, X, y, groups=None):
         """Generate indices to split data into training and test set.
 
         Parameters
@@ -196,13 +219,42 @@ def split(self, X, y=None, groups=None):
         idx_test : ndarray
             The testing set indices for that split.
         """
-        time_col, time_col_unique = self.get_n_splits_col(X)
-        n_splits = self.get_n_splits(X)
-        time_col_unique = sorted(time_col_unique)
+        if isinstance(X, pd.Series):
+            X = pd.DataFrame(X)
+
+        n_splits = self.get_n_splits(X, y, groups)
+
+        if not self.time_col == "index":
+            if np.dtype(X[self.time_col]) != np.dtype("datetime64[ns]"):
+                raise ValueError("Time column should be a datetime object")
+            X_ = X.set_index(self.time_col).copy()
+        else:
+            X_ = X.copy()
+            if X_.index.dtype != np.dtype("datetime64[ns]"):
+                raise ValueError("Time column should be a datetime object")
+
+        month_split = pd.unique(X_.to_period("M").index)
+        month_split = pd.Series(month_split)
+
+        month_split = month_split.apply(
+            lambda x: "{}-{}".format(x.year, str(x.month).zfill(2))
+        )
+
+        month_split.sort_values(inplace=True, ignore_index=True)
+
+
+        X_mem = X_.copy().sort_index()
+
+        X_.reset_index(names="date", inplace=True)
+
         for i in range(n_splits):
-            train_mask = time_col.to_period('M').isin([time_col_unique[i]])
-            idx_train = np.where(train_mask)[0]
-            test_mask = time_col.to_period('M').isin([time_col_unique[i + 1]])
-            idx_test = np.where(test_mask)[0]
+            mem_id_train = X_mem[: month_split[i]].index
+            
+            X_mem.drop(mem_id_train, inplace=True)
+
+            mem_id_test = X_mem[: month_split[i + 1]].index
+
+            idx_train = X_.index[(X_["date"].isin(mem_id_train))].to_list()
+            idx_test = X_.index[(X_["date"].isin(mem_id_test))].to_list()
 
-            yield idx_train, idx_test
\ No newline at end of file
+            yield (idx_train, idx_test)
\ No newline at end of file

From 9a49e420f2d79d4b3edb7179294b6d2fda07bbe5 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 23:13:27 +0000
Subject: [PATCH 5/8] updated

---
 sklearn_questions.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 7d64865..5b14d43 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -67,7 +67,7 @@
 class KNearestNeighbors(ClassifierMixin, BaseEstimator):
     """KNearestNeighbors classifier."""
 
-    def __init__(self, n_neighbors=1): 
+    def __init__(self, n_neighbors=1):
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
@@ -119,7 +119,7 @@ def predict(self, X):
         index_min_dist = distance_mat[:, : self.n_neighbors]
 
         for ind, row in enumerate(index_min_dist):
-            val = self.y_[row] 
+            val = self.y_[row]
             nearest_neigh = np.bincount(val).argmax()
             y_pred[ind] = nearest_neigh
 
@@ -165,7 +165,7 @@ class MonthlySplit(BaseCrossValidator):
         To use the index as column just set `time_col` to `'index'`.
     """
 
-    def __init__(self, time_col="index"): 
+    def __init__(self, time_col="index"):
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
@@ -242,14 +242,13 @@ def split(self, X, y, groups=None):
 
         month_split.sort_values(inplace=True, ignore_index=True)
 
-
         X_mem = X_.copy().sort_index()
 
         X_.reset_index(names="date", inplace=True)
 
         for i in range(n_splits):
             mem_id_train = X_mem[: month_split[i]].index
-            
+
             X_mem.drop(mem_id_train, inplace=True)
 
             mem_id_test = X_mem[: month_split[i + 1]].index
@@ -257,4 +256,4 @@ def split(self, X, y, groups=None):
             idx_train = X_.index[(X_["date"].isin(mem_id_train))].to_list()
             idx_test = X_.index[(X_["date"].isin(mem_id_test))].to_list()
 
-            yield (idx_train, idx_test)
\ No newline at end of file
+            yield (idx_train, idx_test)

From 3caa47678e9793507c31d1d8b8a3cd2752a5199d Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 23:16:50 +0000
Subject: [PATCH 6/8] updated

---
 sklearn_questions.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 5b14d43..90a7733 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -68,6 +68,11 @@ class KNearestNeighbors(ClassifierMixin, BaseEstimator):
     """KNearestNeighbors classifier."""
 
     def __init__(self, n_neighbors=1):
+        """Fitting function.
+
+         Dummy
+        """
+        
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
@@ -85,6 +90,7 @@ def fit(self, X, y):
         self : instance of KNearestNeighbors
             The current instance of the classifier
         """
+        
         self.classes_ = unique_labels(y)
         X, y = validate_data(self, X, y, reset=True)
 
@@ -109,6 +115,7 @@ def predict(self, X):
         y : ndarray, shape (n_test_samples,)
             Predicted class labels for each test data sample.
         """
+        
         check_is_fitted(self)
         X = validate_data(self, X, reset=False, dtype=float)
 
@@ -142,6 +149,7 @@ def score(self, X, y):
         score : float
             Accuracy of the model computed for the (X, y) pairs.
         """
+        
         X, y = check_X_y(X, y)
 
         y_pred = self.predict(X)
@@ -166,6 +174,11 @@ class MonthlySplit(BaseCrossValidator):
     """
 
     def __init__(self, time_col="index"):
+        """Fitting function.
+
+         Dummy
+        """
+        
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):

From cec2b1cd2ed087dbd67a401542793b0c6b9816f4 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 23:21:17 +0000
Subject: [PATCH 7/8] updated

---
 sklearn_questions.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index 90a7733..f82e982 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -68,11 +68,7 @@ class KNearestNeighbors(ClassifierMixin, BaseEstimator):
     """KNearestNeighbors classifier."""
 
     def __init__(self, n_neighbors=1):
-        """Fitting function.
-
-         Dummy
-        """
-        
+        """Fitting function.Dummy"""
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
@@ -90,7 +86,6 @@ def fit(self, X, y):
         self : instance of KNearestNeighbors
             The current instance of the classifier
         """
-        
         self.classes_ = unique_labels(y)
         X, y = validate_data(self, X, y, reset=True)
 
@@ -115,7 +110,6 @@ def predict(self, X):
         y : ndarray, shape (n_test_samples,)
             Predicted class labels for each test data sample.
         """
-        
         check_is_fitted(self)
         X = validate_data(self, X, reset=False, dtype=float)
 
@@ -149,7 +143,6 @@ def score(self, X, y):
         score : float
             Accuracy of the model computed for the (X, y) pairs.
         """
-        
         X, y = check_X_y(X, y)
 
         y_pred = self.predict(X)
@@ -174,11 +167,7 @@ class MonthlySplit(BaseCrossValidator):
     """
 
     def __init__(self, time_col="index"):
-        """Fitting function.
-
-         Dummy
-        """
-        
+        """Fitting function.Dummy"""
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):
@@ -199,7 +188,6 @@ def get_n_splits(self, X, y=None, groups=None):
         n_splits : int
             The number of splits.
         """
-
         if not self.time_col == "index":
             if np.dtype(X[self.time_col]) != np.dtype("datetime64[ns]"):
                 raise ValueError("Time column should be a datetime object")

From 0d2b1b61b5e441911eb4b0ddeb1e0bbdcb4b4da7 Mon Sep 17 00:00:00 2001
From: Kshitij-Ambilduke <kshitijambilduke1@gmail.com>
Date: Fri, 20 Dec 2024 23:22:33 +0000
Subject: [PATCH 8/8] updated

---
 sklearn_questions.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn_questions.py b/sklearn_questions.py
index f82e982..9759b09 100644
--- a/sklearn_questions.py
+++ b/sklearn_questions.py
@@ -68,7 +68,7 @@ class KNearestNeighbors(ClassifierMixin, BaseEstimator):
     """KNearestNeighbors classifier."""
 
     def __init__(self, n_neighbors=1):
-        """Fitting function.Dummy"""
+        """Fitting function.Dummy."""
         self.n_neighbors = n_neighbors
 
     def fit(self, X, y):
@@ -167,7 +167,7 @@ class MonthlySplit(BaseCrossValidator):
     """
 
     def __init__(self, time_col="index"):
-        """Fitting function.Dummy"""
+        """Fitting function.Dummy."""
         self.time_col = time_col
 
     def get_n_splits(self, X, y=None, groups=None):