forked from gandhi-21/SBUHacks
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRandForest.py
113 lines (86 loc) · 3.4 KB
/
RandForest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import codecs
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
# GRABS DATA FROM STOCK API AND PUTS IT IN DATAFRAME
stock = "AMZN"
url = "https://api.iextrading.com/1.0/stock/"+stock+"/chart/5y?filter=date,close"
urlInfo = "https://api.iextrading.com/1.0/stock/"+stock+"/company"
closingprices = []
dates = []
data = pd.DataFrame(pd.read_json(url, orient="columns"))
for i in data:
dates.append(data["date"])
closingprices.append(data["close"])
# REVERSES SO THE LATEST IS ON TOP OF LIST
cp = list(reversed(closingprices))
# READS FROM TRENDS.TXT TO OBTAIN GOOGLE TRENDS SCORES (FROM NODEJS FILE)
scores = []
with open("trends.txt") as f:
data = f.readlines()
scores = str(data).split(",")
scores[0] = scores[0][2:]
scores[-1] = scores[-1][:2]
int_scores = []
for score in scores:
int_scores.append(int(score))
# CONVERTS LISTS FOR EACH DATASET TO NUMPY ARRAY
DATES = np.array(dates)
CLOSINGPRICES = np.array(cp)
TRENDSCORES = np.array(int_scores)
DatesDF = pd.DataFrame(data=DATES.T)
pricesDF = pd.DataFrame(data=CLOSINGPRICES.T)
trendsDF = pd.DataFrame(data=TRENDSCORES.T)
# PUTS ALL THE DATAFRAME COLUMNS TOGETHER
completeDF = pd.DataFrame()
completeDF = completeDF.assign(Date=DATES[0])
completeDF = completeDF.assign(ClosingPrice=CLOSINGPRICES[0])
completeDF = completeDF.assign(GoogleTrendsScore=trendsDF[0])
#print(completeDF)
## OBTAINS SAMPLE DATA FOR TRAINING AND TESTING MODELS
tempDF = completeDF
lm = LinearRegression()
train_x = [tempDF["ClosingPrice"][:-252]]
test_x = [tempDF["ClosingPrice"][-252:]]
train_y = [tempDF["GoogleTrendsScore"][:-252]]
test_y = [tempDF["GoogleTrendsScore"][-252:]]
## OG GRAPH
plt.scatter(train_x, train_y, color="black")
plt.xlabel("Stock Price")
plt.ylabel("Google Trend Score")
plt.show()
# LINEAR REGRESSION
lm.fit(train_x, train_y)
equation = str((np.poly1d(np.polyfit(tempDF["ClosingPrice"], tempDF["GoogleTrendsScore"], 1))))
m=equation[:equation.index("+")-2]
b=equation[equation.index("+")+2:]
json.dump(equation, codecs.open("graph.json", 'w', encoding='utf-8'), sort_keys=True, indent=4)
## LINEAR REGRESSION GRAPH
plt.scatter(train_x, train_y, color="black")
lm.fit(train_x, train_y)
plt.plot(np.unique(tempDF["ClosingPrice"]), np.poly1d(np.polyfit(tempDF["ClosingPrice"], tempDF["GoogleTrendsScore"], 1))(np.unique(tempDF["ClosingPrice"])))
plt.xlabel("Stock Price")
plt.ylabel("Google Trend Score")
plt.ylabel("Stock Price")
plt.xlabel("Google Trend Score")
print (np.unique(tempDF["ClosingPrice"]), np.poly1d(np.polyfit(tempDF["ClosingPrice"], tempDF["GoogleTrendsScore"], 1))(np.unique(tempDF["ClosingPrice"])))
plt.show()
plt.xlabel("Stock Price")
plt.ylabel("Google Trend Score")
plt.show()
print (np.unique(tempDF["ClosingPrice"]), np.poly1d(np.polyfit(tempDF["ClosingPrice"], tempDF["GoogleTrendsScore"], 1))(np.unique(tempDF["ClosingPrice"])))
plt.show()
#EQUATION
equation = (np.poly1d(np.polyfit(tempDF["ClosingPrice"], tempDF["GoogleTrendsScore"], 1)))
# TRAINING THE MODEL
lm.predict(train_x)
plt.scatter(train_x, lm.predict(train_x))
plt.xlabel("Stock Price")
plt.ylabel("Google Trend Score")
train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split(train_x, train_y, test_size=0.2, random_state=0)
print(train_x.shape, test_x, train_y, test_y)