forked from numenta/NAB
-
Notifications
You must be signed in to change notification settings - Fork 3
/
nyc_taxi.py
68 lines (58 loc) · 2.06 KB
/
nyc_taxi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
""" Run HTM.Core's anomaly detector on the NYC-Taxicab dataset. """
from nab.corpus import DataFile
from nab.detectors.htmcore.htmcore_detector import HtmcoreDetector
import argparse
import datetime
import matplotlib.pyplot as plt
import numpy as np
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-f', '--fast', action='store_true',
help='Truncate dataset to first 1000 records')
args = parser.parse_args()
def parse_timestamp(ts):
return datetime.datetime.strptime(ts, "%Y-%m-%d %H:%M:%S")
srcPath = "data/realKnownCause/nyc_taxi.csv"
dataSet = DataFile(srcPath)
if args.fast:
dataSet.data = dataSet.data[0:1000]
val = dataSet.data["value"]
ts = dataSet.data["timestamp"]
# These windows are the Ground-Truth of where the anomalies are. These were
# copied from the file "NAB/labels/combined_windows.json" which contains all of
# the ground truth labels for the NAB datasets.
anomaly_windows = [
("2014-10-30 15:30:00",
"2014-11-03 22:30:00"),
("2014-11-25 12:00:00",
"2014-11-29 19:00:00"),
("2014-12-23 11:30:00",
"2014-12-27 18:30:00"),
("2014-12-29 21:30:00",
"2015-01-03 04:30:00"),
("2015-01-24 20:30:00",
"2015-01-29 03:30:00")
]
model = HtmcoreDetector(dataSet=dataSet, probationaryPercent = 0.15)
model.initialize()
results = model.run()
raw = results["raw_score"]
anom = results["anomaly_score"]
print()
print("Encoder", model.enc_info)
print("Spatial Pooler", model.sp_info)
print("Temporal Memory", model.tm_info)
# Plot the results.
val /= np.max(val)
plt.figure('NYC Taxi')
plt.title('NYC Taxi Anomaly Detection with HTM.Core')
plt.plot(ts, val, 'k', label='NYC Taxi (scaled)')
plt.plot(ts, raw, 'b', label='Raw Anomaly')
plt.plot(ts, anom, 'r', label='Anomaly Likelihood')
for (start, end) in anomaly_windows:
start = parse_timestamp(start)
end = parse_timestamp(end)
plt.axvspan(start, end, facecolor='y', alpha=0.5)
plt.xlabel('Time')
plt.plot([], [], 'sy', alpha=0.5, label="Labeled Anomaly Window")
plt.legend()
plt.show()