-
Notifications
You must be signed in to change notification settings - Fork 0
/
importance.py
42 lines (34 loc) · 1.23 KB
/
importance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import os
import torch
from models import InferSent
CWD = os.getcwd()
VERSION = 2
USE_CUDA = torch.cuda.is_available()
MODEL_PATH = f"{CWD}/encoder/infersent{VERSION}.pkl"
VECTOR_GLOVE = f"{CWD}/vectors/glove.840B.300d.txt"
VECTOR_FASTTEXT = f"{CWD}/vectors/crawl-300d-2M.vec"
VECTOR_PATH = VECTOR_GLOVE if VERSION == 1 else VECTOR_FASTTEXT
VOCAB_SIZE= 100000
sentences = [
'Yesterday, Muhyiddin Yassin announced that the Cabinet had agreed to appoint Khairy as the minister in charge of the Covid-19 immunisation programme.',
'The DAP political education director said Khairy appears to be “much more” competent than Adham.'
]
def init_models(vocal_size: int=VOCAB_SIZE):
model = InferSent({
'bsize': 64,
'word_emb_dim': 300,
'enc_lstm_dim': 2048,
'pool_type': 'max',
'dpout_model': 0.0,
'version': VERSION
})
model.load_state_dict(torch.load(MODEL_PATH))
model = model.cuda() if USE_CUDA else model
model.set_w2v_path(VECTOR_PATH)
model.build_vocab_k_words(K=VOCAB_SIZE)
return model
if __name__ == "__main__":
model = init_models()
for sentence in sentences:
_, topWords = model.get_importance(sentence, tokenize=True)
print(topWords)