-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathEmoteModel.py
60 lines (44 loc) · 2 KB
/
EmoteModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
#from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas
import numpy
import TweetProcessor
from sklearn.linear_model import LogisticRegression
def init_model():
dataset = pandas.read_csv('newset.csv')
dataset['content'] = dataset['content'].apply(TweetProcessor.process)
#debugging
print("loaded dataset...")
label_encoder = preprocessing.LabelEncoder()
y = label_encoder.fit_transform(dataset.sentiment.values)
X_train, X_val, y_train, y_val = train_test_split(dataset.content.values, y, stratify=y, random_state=42, test_size=0.1, shuffle=True) #test_size=0.1?
count_vectors = CountVectorizer()
count_vectors.fit(dataset['content'])
XtCount = count_vectors.transform(X_train)
XvCount = count_vectors.transform(X_val)
#debugging
print("processed dataset for logistic regression")
#lsvm = SGDClassifier(random_state=9, max_iter=30, tol=None)
#lsvm.fit(XtCount, y_train)
logreg = LogisticRegression(C=1, max_iter=1000, tol=0.0001)
logreg.fit(XtCount, y_train)
#print(logreg.classes_)
y_pred = logreg.predict(XvCount)
print('accuracy: %s' % accuracy_score(y_pred, y_val))
return logreg, count_vectors
def make_prediction(logreg, count_vectors, tweets = ["@NerdIndian Take that back. I am insulted.", "AAA IM SO ANGRY GRR", "i just wanna dance", "I'm feeling very well today!", "I am sad.", "im so mad", "I am depressed", "I am angry", "this is so cool", "greatest feeling ever", "overjoyed by this right now", "i dont know what to do right now"]
):
#user input
tweet_count = count_vectors.transform(tweets)
next_pred = logreg.predict(tweet_count)
return next_pred
'''
def main():
logreg, count_vectors = init_model()
print(make_prediction(logreg, count_vectors))
'''
#main()