-
Notifications
You must be signed in to change notification settings - Fork 55
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Algorithms made as moduls, notebooks reworked, resolves #26
- Loading branch information
1 parent
80bac4a
commit c031ef6
Showing
25 changed files
with
6,863 additions
and
10,684 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from tensorflow.keras.layers import Input, Conv1D, Dropout, Conv1DTranspose | ||
from tensorflow.keras.models import Sequential | ||
from tensorflow.keras.optimizers import Adam | ||
from tensorflow.keras.callbacks import EarlyStopping | ||
import tensorflow as tf | ||
|
||
class Conv_AE: | ||
""" | ||
A reconstruction convolutional autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score. | ||
Parameters | ||
---------- | ||
No parameters are required for initializing the class. | ||
Attributes | ||
---------- | ||
model : Sequential | ||
The trained convolutional autoencoder model. | ||
Examples | ||
-------- | ||
>>> from Conv_AE import Conv_AE | ||
>>> CAutoencoder = Conv_AE() | ||
>>> CAutoencoder.fit(train_data) | ||
>>> prediction = CAutoencoder.predict(test_data) | ||
""" | ||
|
||
def __init__(self): | ||
self._Random(0) | ||
|
||
def _Random(self, seed_value): | ||
|
||
import os | ||
os.environ['PYTHONHASHSEED'] = str(seed_value) | ||
|
||
import random | ||
random.seed(seed_value) | ||
|
||
import numpy as np | ||
np.random.seed(seed_value) | ||
|
||
import tensorflow as tf | ||
tf.random.set_seed(seed_value) | ||
|
||
def _build_model(self): | ||
|
||
model = Sequential( | ||
[ | ||
Input(shape=(self.shape[1], self.shape[2])), | ||
Conv1D( | ||
filters=32, kernel_size=7, padding="same", strides=2, activation="relu" | ||
), | ||
Dropout(rate=0.2), | ||
Conv1D( | ||
filters=16, kernel_size=7, padding="same", strides=2, activation="relu" | ||
), | ||
Conv1DTranspose( | ||
filters=16, kernel_size=7, padding="same", strides=2, activation="relu" | ||
), | ||
Dropout(rate=0.2), | ||
Conv1DTranspose( | ||
filters=32, kernel_size=7, padding="same", strides=2, activation="relu" | ||
), | ||
Conv1DTranspose(filters=1, kernel_size=7, padding="same"), | ||
] | ||
) | ||
model.compile(optimizer=Adam(learning_rate=0.001), loss="mse") | ||
|
||
return model | ||
|
||
def fit(self, data): | ||
""" | ||
Train the convolutional autoencoder model on the provided data. | ||
Parameters | ||
---------- | ||
data : numpy.ndarray | ||
Input data for training the autoencoder model. | ||
""" | ||
|
||
self.shape = data.shape | ||
self.model = self._build_model() | ||
|
||
self.model.fit( | ||
data, | ||
data, | ||
epochs=100, | ||
batch_size=32, | ||
validation_split=0.1, | ||
verbose=0, | ||
callbacks=[ | ||
EarlyStopping(monitor="val_loss", patience=5, mode="min", verbose=0) | ||
], | ||
) | ||
|
||
def predict(self, data): | ||
""" | ||
Generate predictions using the trained convolutional autoencoder model. | ||
Parameters | ||
---------- | ||
data : numpy.ndarray | ||
Input data for generating predictions. | ||
Returns | ||
------- | ||
numpy.ndarray | ||
Predicted output data. | ||
""" | ||
|
||
return self.model.predict(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
from sklearn.ensemble import IsolationForest | ||
import tensorflow as tf | ||
|
||
class Isolation_Forest: | ||
""" | ||
Isolation Forest or iForest builds an ensemble of iTrees for a given data set, then anomalies are those instances which have short average path lengths on the iTrees. | ||
Parameters | ||
---------- | ||
params : list | ||
A list containing three parameters: random_state, n_jobs, and contamination. | ||
Attributes | ||
---------- | ||
random_state : int | ||
The random seed used for reproducibility. | ||
n_jobs : int | ||
The number of CPU cores to use for parallelism. | ||
contamination : float | ||
The expected proportion of anomalies in the dataset. | ||
Examples | ||
-------- | ||
>>> from Isolation_Forest import Isolation_Forest | ||
>>> PARAMS = [random_state, n_jobs, contamination] | ||
>>> model = Isolation_Forest(PARAMS) | ||
>>> model.fit(X_train) | ||
>>> predictions = model.predict(test_data) | ||
""" | ||
|
||
def __init__(self, params): | ||
self.params = params | ||
self.random_state = self.params[0] | ||
self.n_jobs = self.params[1] | ||
self.contamination = self.params[2] | ||
|
||
def _Random(self, seed_value): | ||
|
||
import os | ||
os.environ['PYTHONHASHSEED'] = str(seed_value) | ||
|
||
import random | ||
random.seed(seed_value) | ||
|
||
import numpy as np | ||
np.random.seed(seed_value) | ||
|
||
import tensorflow as tf | ||
tf.random.set_seed(seed_value) | ||
|
||
def _build_model(self): | ||
self._Random(0) | ||
|
||
model = IsolationForest(random_state=self.random_state, | ||
n_jobs=self.n_jobs, | ||
contamination=self.contamination) | ||
return model | ||
|
||
def fit(self, X): | ||
""" | ||
Train the Isolation Forest model on the provided data. | ||
Parameters | ||
---------- | ||
X : numpy.ndarray | ||
Input data for training the model. | ||
""" | ||
|
||
self.model = self._build_model() | ||
|
||
self.model.fit(X) | ||
|
||
def predict(self, data): | ||
""" | ||
Generate predictions using the trained Isolation Forest model. | ||
Parameters | ||
---------- | ||
data : numpy.ndarray | ||
Input data for generating predictions. | ||
Returns | ||
------- | ||
numpy.ndarray | ||
Predicted output data. | ||
""" | ||
|
||
return self.model.predict(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
from tensorflow.keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed | ||
from tensorflow.keras import Model | ||
from tensorflow.keras.callbacks import EarlyStopping | ||
import tensorflow as tf | ||
|
||
class LSTM_AE: | ||
""" | ||
A reconstruction sequence-to-sequence (LSTM-based) autoencoder model to detect anomalies in timeseries data using reconstruction error as an anomaly score. | ||
Parameters | ||
---------- | ||
params : list | ||
A list of hyperparameters for the model, containing the following elements: | ||
- EPOCHS : int | ||
The number of training epochs. | ||
- BATCH_SIZE : int | ||
The batch size for training. | ||
- VAL_SPLIT : float | ||
The validation split ratio during training. | ||
Attributes | ||
---------- | ||
params : list | ||
The hyperparameters for the model. | ||
Examples | ||
-------- | ||
>>> from LSTM_AE import LSTM_AE | ||
>>> PARAMS = [EPOCHS, BATCH_SIZE, VAL_SPLIT] | ||
>>> model = LSTM_AE(PARAMS) | ||
>>> model.fit(train_data) | ||
>>> predictions = model.predict(test_data) | ||
""" | ||
|
||
def __init__(self, params): | ||
self.params = params | ||
|
||
def _Random(self, seed_value): | ||
|
||
import os | ||
os.environ['PYTHONHASHSEED'] = str(seed_value) | ||
|
||
import random | ||
random.seed(seed_value) | ||
|
||
import numpy as np | ||
np.random.seed(seed_value) | ||
|
||
import tensorflow as tf | ||
tf.random.set_seed(seed_value) | ||
|
||
def _build_model(self): | ||
self._Random(0) | ||
|
||
inputs = Input(shape=(self.shape[1], self.shape[2])) | ||
encoded = LSTM(100, activation='relu')(inputs) | ||
|
||
decoded = RepeatVector(self.shape[1])(encoded) | ||
decoded = LSTM(100, activation='relu', return_sequences=True)(decoded) | ||
decoded = TimeDistributed(Dense(self.shape[2]))(decoded) | ||
|
||
model = Model(inputs, decoded) | ||
encoder = Model(inputs, encoded) | ||
|
||
model.compile(optimizer='adam', loss='mae', metrics=["mse"]) | ||
|
||
return model | ||
|
||
def fit(self, X): | ||
""" | ||
Train the sequence-to-sequence (LSTM-based) autoencoder model on the provided data. | ||
Parameters | ||
---------- | ||
X : numpy.ndarray | ||
Input data for training the model. | ||
""" | ||
|
||
self.shape = X.shape | ||
self.model = self._build_model() | ||
|
||
early_stopping = EarlyStopping(patience=5, | ||
verbose=0) | ||
|
||
self.model.fit(X, X, | ||
validation_split=self.params[2], | ||
epochs=self.params[0], | ||
batch_size=self.params[1], | ||
verbose=0, | ||
shuffle=False, | ||
callbacks=[early_stopping] | ||
) | ||
|
||
def predict(self, data): | ||
""" | ||
Generate predictions using the trained sequence-to-sequence (LSTM-based) autoencoder model. | ||
Parameters | ||
---------- | ||
data : numpy.ndarray | ||
Input data for generating predictions. | ||
Returns | ||
------- | ||
numpy.ndarray | ||
Predicted output data. | ||
""" | ||
|
||
return self.model.predict(data) |
Oops, something went wrong.