-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproject3data.py
62 lines (55 loc) · 2.4 KB
/
project3data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import os
import pandas as pd
import numpy as np
from project3utils import get_features_from_url
from project3sql_helpers import remote_sql_session, local_sql_session
training_data = None
def reload_training_data():
phishing_urls = pd.read_csv('data/phishtank2018-05-02_verified_online.csv').head(10000).url
nonphishing_urls = pd.read_csv('data/scraped_urls.txt', header=None, names=['url']).url
phishing_features = pd.DataFrame(list(phishing_urls.apply(get_features_from_url)), dtype=np.float64)
nonphishing_features = pd.DataFrame(list(nonphishing_urls.apply(get_features_from_url)), dtype=np.float64)
phishing_features['is_phishing'] = 1
nonphishing_features['is_phishing'] = 0
training_data = pd.concat([phishing_features, nonphishing_features], ignore_index=True)
return training_data
def get_training_data():
if training_data is None: return reload_training_data()
return training_data
def run_migrations(session):
session.execute(
"""
CREATE TABLE IF NOT EXISTS migrations (
migrated VARCHAR(255)
);
"""
)
# Assuming that 'db_migrations' contains only sql migration files
# and that the files follow the naming convention YYYYMMDDHH_description.sql
completed_migration_count = 0
for filename in sorted(os.listdir('db_migrations')):
migration_id, _ = filename.split('_')
already_run = session.execute(
"SELECT * FROM migrations WHERE migrated = '{}'".format(migration_id)
).fetchone()
if already_run:
print('skipping migration {}'.format(filename))
continue
else:
print('starting migration {}'.format(filename))
with open('db_migrations/{}'.format(filename), 'r') as sql_file:
session.execute('BEGIN')
session.execute(sql_file.read())
session.execute("INSERT INTO migrations (migrated) VALUES ('{}')".format(migration_id))
session.execute('COMMIT')
print('completed migration {}'.format(filename))
completed_migration_count += 1
print('completed {} pending migration(s)'.format(completed_migration_count))
@local_sql_session
def run_local_migrations(session):
print('starting local migrations')
run_migrations(session)
@remote_sql_session
def run_remote_migrations(session):
print('starting remote migrations')
run_migrations(session)