Skip to content

Commit

Permalink
Algorithms made as moduls, notebooks reworked, resolves #26
Browse files Browse the repository at this point in the history
  • Loading branch information
IgorMozolin committed Sep 24, 2023
1 parent 80bac4a commit c031ef6
Show file tree
Hide file tree
Showing 25 changed files with 6,863 additions and 10,684 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ notebooks/*.h5

# Byte-compiled / optimized / DLL files
__pycache__/
src/__pycache__/
algorithms/__pycache__/
notebooks/__pycache__/
*.py[cod]
*$py.class
Expand Down Expand Up @@ -61,7 +61,7 @@ coverage.xml
# Jupyter Notebook
.ipynb_checkpoints
notebooks/.ipynb_checkpoints
src/.ipynb_checkpoints
algorithms/.ipynb_checkpoints

# IPython
profile_default/
Expand Down
111 changes: 111 additions & 0 deletions algorithms/Conv_AE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from tensorflow.keras.layers import Input, Conv1D, Dropout, Conv1DTranspose
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

class Conv_AE:
"""
A reconstruction convolutional autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score.
Parameters
----------
No parameters are required for initializing the class.
Attributes
----------
model : Sequential
The trained convolutional autoencoder model.
Examples
--------
>>> from Conv_AE import Conv_AE
>>> CAutoencoder = Conv_AE()
>>> CAutoencoder.fit(train_data)
>>> prediction = CAutoencoder.predict(test_data)
"""

def __init__(self):
self._Random(0)

def _Random(self, seed_value):

import os
os.environ['PYTHONHASHSEED'] = str(seed_value)

import random
random.seed(seed_value)

import numpy as np
np.random.seed(seed_value)

import tensorflow as tf
tf.random.set_seed(seed_value)

def _build_model(self):

model = Sequential(
[
Input(shape=(self.shape[1], self.shape[2])),
Conv1D(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
Dropout(rate=0.2),
Conv1D(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
Conv1DTranspose(
filters=16, kernel_size=7, padding="same", strides=2, activation="relu"
),
Dropout(rate=0.2),
Conv1DTranspose(
filters=32, kernel_size=7, padding="same", strides=2, activation="relu"
),
Conv1DTranspose(filters=1, kernel_size=7, padding="same"),
]
)
model.compile(optimizer=Adam(learning_rate=0.001), loss="mse")

return model

def fit(self, data):
"""
Train the convolutional autoencoder model on the provided data.
Parameters
----------
data : numpy.ndarray
Input data for training the autoencoder model.
"""

self.shape = data.shape
self.model = self._build_model()

self.model.fit(
data,
data,
epochs=100,
batch_size=32,
validation_split=0.1,
verbose=0,
callbacks=[
EarlyStopping(monitor="val_loss", patience=5, mode="min", verbose=0)
],
)

def predict(self, data):
"""
Generate predictions using the trained convolutional autoencoder model.
Parameters
----------
data : numpy.ndarray
Input data for generating predictions.
Returns
-------
numpy.ndarray
Predicted output data.
"""

return self.model.predict(data)
88 changes: 88 additions & 0 deletions algorithms/Isolation_Forest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from sklearn.ensemble import IsolationForest
import tensorflow as tf

class Isolation_Forest:
"""
Isolation Forest or iForest builds an ensemble of iTrees for a given data set, then anomalies are those instances which have short average path lengths on the iTrees.
Parameters
----------
params : list
A list containing three parameters: random_state, n_jobs, and contamination.
Attributes
----------
random_state : int
The random seed used for reproducibility.
n_jobs : int
The number of CPU cores to use for parallelism.
contamination : float
The expected proportion of anomalies in the dataset.
Examples
--------
>>> from Isolation_Forest import Isolation_Forest
>>> PARAMS = [random_state, n_jobs, contamination]
>>> model = Isolation_Forest(PARAMS)
>>> model.fit(X_train)
>>> predictions = model.predict(test_data)
"""

def __init__(self, params):
self.params = params
self.random_state = self.params[0]
self.n_jobs = self.params[1]
self.contamination = self.params[2]

def _Random(self, seed_value):

import os
os.environ['PYTHONHASHSEED'] = str(seed_value)

import random
random.seed(seed_value)

import numpy as np
np.random.seed(seed_value)

import tensorflow as tf
tf.random.set_seed(seed_value)

def _build_model(self):
self._Random(0)

model = IsolationForest(random_state=self.random_state,
n_jobs=self.n_jobs,
contamination=self.contamination)
return model

def fit(self, X):
"""
Train the Isolation Forest model on the provided data.
Parameters
----------
X : numpy.ndarray
Input data for training the model.
"""

self.model = self._build_model()

self.model.fit(X)

def predict(self, data):
"""
Generate predictions using the trained Isolation Forest model.
Parameters
----------
data : numpy.ndarray
Input data for generating predictions.
Returns
-------
numpy.ndarray
Predicted output data.
"""

return self.model.predict(data)
109 changes: 109 additions & 0 deletions algorithms/LSTM_AE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed
from tensorflow.keras import Model
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf

class LSTM_AE:
"""
A reconstruction sequence-to-sequence (LSTM-based) autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score.
Parameters
----------
params : list
A list of hyperparameters for the model, containing the following elements:
- EPOCHS : int
The number of training epochs.
- BATCH_SIZE : int
The batch size for training.
- VAL_SPLIT : float
The validation split ratio during training.
Attributes
----------
params : list
The hyperparameters for the model.
Examples
--------
>>> from LSTM_AE import LSTM_AE
>>> PARAMS = [EPOCHS, BATCH_SIZE, VAL_SPLIT]
>>> model = LSTM_AE(PARAMS)
>>> model.fit(train_data)
>>> predictions = model.predict(test_data)
"""

def __init__(self, params):
self.params = params

def _Random(self, seed_value):

import os
os.environ['PYTHONHASHSEED'] = str(seed_value)

import random
random.seed(seed_value)

import numpy as np
np.random.seed(seed_value)

import tensorflow as tf
tf.random.set_seed(seed_value)

def _build_model(self):
self._Random(0)

inputs = Input(shape=(self.shape[1], self.shape[2]))
encoded = LSTM(100, activation='relu')(inputs)

decoded = RepeatVector(self.shape[1])(encoded)
decoded = LSTM(100, activation='relu', return_sequences=True)(decoded)
decoded = TimeDistributed(Dense(self.shape[2]))(decoded)

model = Model(inputs, decoded)
encoder = Model(inputs, encoded)

model.compile(optimizer='adam', loss='mae', metrics=["mse"])

return model

def fit(self, X):
"""
Train the sequence-to-sequence (LSTM-based) autoencoder model on the provided data.
Parameters
----------
X : numpy.ndarray
Input data for training the model.
"""

self.shape = X.shape
self.model = self._build_model()

early_stopping = EarlyStopping(patience=5,
verbose=0)

self.model.fit(X, X,
validation_split=self.params[2],
epochs=self.params[0],
batch_size=self.params[1],
verbose=0,
shuffle=False,
callbacks=[early_stopping]
)

def predict(self, data):
"""
Generate predictions using the trained sequence-to-sequence (LSTM-based) autoencoder model.
Parameters
----------
data : numpy.ndarray
Input data for generating predictions.
Returns
-------
numpy.ndarray
Predicted output data.
"""

return self.model.predict(data)
Loading

0 comments on commit c031ef6

Please # to comment.