Algorithms made as moduls, notebooks reworked, resolves #26

waico · Sep 24, 2023 · c031ef6 · c031ef6
1 parent 80bac4a
commit c031ef6
Show file tree

Hide file tree

Showing 25 changed files with 6,863 additions and 10,684 deletions.
diff --git a/.gitignore b/.gitignore
@@ -8,7 +8,7 @@ notebooks/*.h5
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
-src/__pycache__/
+algorithms/__pycache__/
 notebooks/__pycache__/
 *.py[cod]
 *$py.class
@@ -61,7 +61,7 @@ coverage.xml
 # Jupyter Notebook
 .ipynb_checkpoints
 notebooks/.ipynb_checkpoints
-src/.ipynb_checkpoints
+algorithms/.ipynb_checkpoints
 
 # IPython
 profile_default/

diff --git a/algorithms/Conv_AE.py b/algorithms/Conv_AE.py
@@ -0,0 +1,111 @@
+from tensorflow.keras.layers import Input, Conv1D, Dropout, Conv1DTranspose
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.optimizers import Adam
+from tensorflow.keras.callbacks import EarlyStopping
+import tensorflow as tf
+
+class Conv_AE: 
+    """
+    A reconstruction convolutional autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score.
+
+    Parameters
+    ----------
+    No parameters are required for initializing the class.
+
+    Attributes
+    ----------
+    model : Sequential
+        The trained convolutional autoencoder model.
+
+    Examples
+    --------
+    >>> from Conv_AE import Conv_AE
+    >>> CAutoencoder = Conv_AE()
+    >>> CAutoencoder.fit(train_data)
+    >>> prediction = CAutoencoder.predict(test_data)
+    """
+
+    def __init__(self):
+        self._Random(0)
+
+    def _Random(self, seed_value): 
+
+        import os
+        os.environ['PYTHONHASHSEED'] = str(seed_value)
+
+        import random
+        random.seed(seed_value)
+
+        import numpy as np
+        np.random.seed(seed_value)
+
+        import tensorflow as tf
+        tf.random.set_seed(seed_value)
+
+    def _build_model(self):
+
+        model = Sequential(
+            [
+                Input(shape=(self.shape[1], self.shape[2])),
+                Conv1D(
+                    filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
+                ),
+                Dropout(rate=0.2),
+                Conv1D(
+                    filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
+                ),
+                Conv1DTranspose(
+                    filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
+                ),
+                Dropout(rate=0.2),
+                Conv1DTranspose(
+                    filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
+                ),
+                Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
+            ]
+        )
+        model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")
+
+        return model
+
+    def fit(self, data):
+        """
+        Train the convolutional autoencoder model on the provided data.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Input data for training the autoencoder model.
+        """
+
+        self.shape = data.shape
+        self.model = self._build_model()
+
+        self.model.fit(
+            data,
+            data,
+            epochs=100,
+            batch_size=32,
+            validation_split=0.1,
+            verbose=0,
+            callbacks=[
+                EarlyStopping(monitor="val_loss", patience=5, mode="min", verbose=0)
+            ],
+        )
+
+    def predict(self, data):
+        """
+        Generate predictions using the trained convolutional autoencoder model.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Input data for generating predictions.
+
+        Returns
+        -------
+        numpy.ndarray
+            Predicted output data.
+        """
+
+        return self.model.predict(data)
diff --git a/algorithms/Isolation_Forest.py b/algorithms/Isolation_Forest.py
@@ -0,0 +1,88 @@
+from sklearn.ensemble import IsolationForest
+import tensorflow as tf
+
+class Isolation_Forest:
+    """
+    Isolation Forest or iForest builds an ensemble of iTrees for a given data set, then anomalies are those instances which have short average path lengths on the iTrees.
+
+    Parameters
+    ----------
+    params : list
+        A list containing three parameters: random_state, n_jobs, and contamination.
+        
+    Attributes
+    ----------
+    random_state : int
+        The random seed used for reproducibility.
+    n_jobs : int
+        The number of CPU cores to use for parallelism.
+    contamination : float
+        The expected proportion of anomalies in the dataset.
+        
+    Examples
+    --------
+    >>> from Isolation_Forest import Isolation_Forest
+    >>> PARAMS = [random_state, n_jobs, contamination]
+    >>> model = Isolation_Forest(PARAMS)
+    >>> model.fit(X_train)
+    >>> predictions = model.predict(test_data)
+    """
+
+    def __init__(self, params):
+        self.params = params
+        self.random_state = self.params[0]
+        self.n_jobs = self.params[1]
+        self.contamination = self.params[2]
+
+    def _Random(self, seed_value):
+
+        import os
+        os.environ['PYTHONHASHSEED'] = str(seed_value)
+
+        import random
+        random.seed(seed_value)
+
+        import numpy as np
+        np.random.seed(seed_value)
+
+        import tensorflow as tf
+        tf.random.set_seed(seed_value)
+
+    def _build_model(self):
+        self._Random(0)
+
+        model = IsolationForest(random_state=self.random_state, 
+                                n_jobs=self.n_jobs,
+                                contamination=self.contamination)
+        return model
+
+    def fit(self, X):
+        """
+        Train the Isolation Forest model on the provided data.
+
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input data for training the model.
+        """
+
+        self.model = self._build_model()
+
+        self.model.fit(X)
+
+    def predict(self, data):
+        """
+        Generate predictions using the trained Isolation Forest model.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Input data for generating predictions.
+
+        Returns
+        -------
+        numpy.ndarray
+            Predicted output data.
+        """
+
+        return self.model.predict(data)
diff --git a/algorithms/LSTM_AE.py b/algorithms/LSTM_AE.py
@@ -0,0 +1,109 @@
+from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed
+from tensorflow.keras import Model
+from tensorflow.keras.callbacks import EarlyStopping
+import tensorflow as tf
+
+class LSTM_AE:
+    """
+    A reconstruction sequence-to-sequence (LSTM-based) autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score.
+
+    Parameters
+    ----------
+    params : list
+        A list of hyperparameters for the model, containing the following elements:
+        - EPOCHS : int
+            The number of training epochs.
+        - BATCH_SIZE : int
+            The batch size for training.
+        - VAL_SPLIT : float
+            The validation split ratio during training.
+
+    Attributes
+    ----------
+    params : list
+        The hyperparameters for the model.
+
+    Examples
+    --------
+    >>> from LSTM_AE import LSTM_AE
+    >>> PARAMS = [EPOCHS, BATCH_SIZE, VAL_SPLIT]
+    >>> model = LSTM_AE(PARAMS)
+    >>> model.fit(train_data)
+    >>> predictions = model.predict(test_data)
+    """
+
+    def __init__(self, params):
+        self.params = params
+
+    def _Random(self, seed_value):
+
+        import os
+        os.environ['PYTHONHASHSEED'] = str(seed_value)
+
+        import random
+        random.seed(seed_value)
+
+        import numpy as np
+        np.random.seed(seed_value)
+
+        import tensorflow as tf
+        tf.random.set_seed(seed_value)
+
+    def _build_model(self):
+        self._Random(0)
+
+        inputs = Input(shape=(self.shape[1], self.shape[2]))
+        encoded = LSTM(100, activation='relu')(inputs)
+
+        decoded = RepeatVector(self.shape[1])(encoded)
+        decoded = LSTM(100, activation='relu', return_sequences=True)(decoded)
+        decoded = TimeDistributed(Dense(self.shape[2]))(decoded)
+
+        model = Model(inputs, decoded)
+        encoder = Model(inputs, encoded)
+
+        model.compile(optimizer='adam', loss='mae', metrics=["mse"])
+
+        return model
+
+    def fit(self, X):
+        """
+        Train the sequence-to-sequence (LSTM-based) autoencoder model on the provided data.
+
+        Parameters
+        ----------
+        X : numpy.ndarray
+            Input data for training the model.
+        """
+
+        self.shape = X.shape
+        self.model = self._build_model()
+
+        early_stopping = EarlyStopping(patience=5, 
+                                       verbose=0)
+
+        self.model.fit(X, X,
+                  validation_split=self.params[2],
+                  epochs=self.params[0],
+                  batch_size=self.params[1],
+                  verbose=0,
+                  shuffle=False,
+                  callbacks=[early_stopping]
+                  )
+
+    def predict(self, data):
+        """
+        Generate predictions using the trained sequence-to-sequence (LSTM-based) autoencoder model.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            Input data for generating predictions.
+
+        Returns
+        -------
+        numpy.ndarray
+            Predicted output data.
+        """
+
+        return self.model.predict(data)