-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmeteor_score.py
102 lines (78 loc) · 3.57 KB
/
meteor_score.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from nltk.corpus import WordNetCorpusReader, wordnet
from nltk.stem.api import StemmerI
from nltk.stem.porter import PorterStemmer
class meteor:
def __init__(self):
self.stemmer = PorterStemmer()
def metor_score(self, refs, pred):
return max([self._metor_single_refs_pred(ref, pred) for ref in refs])
def _metor_single_refs_pred(self, ref, pred):
ref_num_word = self._create_num(ref)
pred_num_word = self._create_num(pred)
#matching_word by word
matching_word_by_word , ref_num_word , pred_num_word = self._matching_word_by_word(ref_num_word, pred_num_word)
#stemmer matching word by word
stemmer_matching_word_by_word , ref_num_word , pred_num_word = self._stemmer_matching_word_by_word(ref_num_word, pred_num_word)
#synonymm atching word by word
synonymm_matching_word_by_word , ref_num_word , pred_num_word = self._synonymm_matching_word_by_word(ref_num_word, pred_num_word)
all_marches = sorted(matching_word_by_word + stemmer_matching_word_by_word + synonymm_matching_word_by_word, key=lambda element : element[0])
ref_lenght = len(ref.split())
pred_lenght = len(pred.split())
matches_lenght = len(all_marches)
chunk = self._chunks(all_marches)
try:
precision = matches_lenght / pred_lenght
recall = matches_lenght / ref_lenght
fscore = (precision * recall * 10) / ((9 * precision) + recall)
chunk_count = self._chunks(all_marches)
frag_frac = chunk_count / matches_lenght
except ZeroDivisionError:
return 0
penalty = 0.5 * (frag_frac ** 3)
return fscore * (1 - penalty)
def _matching_word_by_word(self, ref_num_word, pred_num_word):
matching_word_by_word = list()
for index_pred in range(0 , len(pred_num_word))[::-1]:
for index_ref in range(0, len(ref_num_word))[::-1]:
if pred_num_word[index_pred][1] == ref_num_word[index_ref][1]:
matching_word_by_word.append(tuple([pred_num_word[index_pred][0], ref_num_word[index_ref][0]]))
ref_num_word.pop(index_ref)
pred_num_word.pop(index_pred)
break
return matching_word_by_word , ref_num_word, pred_num_word
def _stemmer_matching_word_by_word(self, ref_num_word, pred_num_word):
ref_num_word = [tuple([element[0], self.stemmer.stem(element[1])]) for element in ref_num_word]
pred_num_word = [tuple([element[0], self.stemmer.stem(element[1])]) for element in pred_num_word]
return self._matching_word_by_word(ref_num_word, pred_num_word)
def _synonymm_matching_word_by_word(self, ref_num_word, pred_num_word):
matching_word_by_word = []
for index_pred in range(0, len(pred_num_word))[::-1]:
word_synonymms= wordnet.synsets(pred_num_word[index_pred][1])
pred_synonymm = [pred_num_word[index_pred][1]]
for synonymm in word_synonymms:
for lemma in synonymm.lemmas():
if lemma.name().find("_") < 0:
pred_synonymm.append(lemma.name())
pred_synonymm = set(pred_synonymm)
for index_ref in range(0, len(ref_num_word))[::-1]:
if ref_num_word[index_ref] in pred_synonymm:
matching_word_by_word.append(tuple([pred_num_word[index_pred][0], ref_num_word[index_ref][0]]))
ref_num_word.pop(index_ref)
pred_num_word.pop(index_pred)
break
return matching_word_by_word, ref_num_word, pred_num_word
def _chunks(self, matches):
index = 0
chunk = 1
while index < len(matches) - 1:
if (matches[index + 1][0] == matches[index][0] + 1) and (matches[index + 1][1] == matches[index][1] + 1):
index += 1
continue
chunk += 1
index += 1
return chunk
def _create_num(self, text):
text = text.lower()
words = text.split()
num_words = [tuple([index, words[index]]) for index in range(len(words))]
return num_words