-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWord2VecModel.py
61 lines (38 loc) · 1.48 KB
/
Word2VecModel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
## Data
# https://github.com/RaRe-Technologies/gensim/blob/2a70e3a726404cd4230542a35cfd2dc4d63da6f1/gensim/models/wrappers/fasttext.py#L246
# https://rare-technologies.com/fasttext-and-gensim-word-embeddings/
import logging
from gensim.models import KeyedVectors
from gensim.models import word2vec
class W2V_Model(object):
def __init__(self):
self.model = None
def load(self, modelfile, binary = True):
self.model = KeyedVectors.load_word2vec_format('Data/' + modelfile, binary= binary)
def train(self, sentences):
if not isinstance(self.model, gensim.models.keyedvectors.KeyedVectors):
self.model = word2vec.Word2Vec(sentences, size=200)
else:
print("You have already trained a model, you can't train a new one")
return
def similarity(self, words, top_n = 20):
results = self.model.most_similar(words, topn = top_n)
results_list = []
for word, score in results:
results_list.append(word)
return results_list
def find_concepts(self, positive, negative, top_n = 20):
results = self.model.most_similar(positive = positive,
negative = negative, topn = top_n)
results_list = []
for word, score in results:
results_list.append(word)
return results_list
def intruder(self,words):
results = self.model.doesnt_match(words)
return results
if __name__ == '__main__':
modelfile = 'SBW-vectors-300-min5.txt'
w2v = W2V_Model()
w2v.load(modelfile, False)
print(w2v.model.most_similar(positive=['rey', 'mujer'], negative=['hombre'], topn=1))