-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutils.py
70 lines (57 loc) · 1.78 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from voc import Tag
from nltk.stem.porter import PorterStemmer
import tensorflow as tf
#
stem = PorterStemmer().stem
# s = porter_stemmer.stem(u'symbolic' + u'execution')
def get_stem(setlist):
return [' '.join([stem(word) for word in phrase.split()]) for phrase in setlist]
def get_chunk(x, y):
"""
get the chunk of the tagging sentence
x: sentence list
y: the label of the sentence list
return: the chunk list of x sentence
"""
leny = len(y)
chunk_list = []
output = ''
for i in range(leny):
tag = y[i]
if tag == 'B':
output += x[i]
elif tag == 'M':
output += ' ' + x[i]
elif tag == 'E':
output += ' ' + x[i]
chunk_list.append(output)
output = ''
elif tag == 'S':
chunk_list.append(x[i])
return chunk_list
def get_phrase(sentence, y_predict, y_true):
tag = Tag()
id_to_tag = tag.idx2tag
sentence = list(sentence)
y_predict = list(y_predict)
y_true = list(y_true)
lens = len(sentence)
if y_predict[0] not in ['B', 'M', 'S', 'E', 'O']:
for i in range(lens):
y_predict[i] = id_to_tag[y_predict[i]]
y_true[i] = id_to_tag[y_true[i]]
return get_chunk(sentence, y_predict), get_chunk(sentence, y_true)
def get_prf_num(predict_labels, true_labels):
predict_labels = get_stem(predict_labels)
true_labels = get_stem(true_labels)
hit_num = 0
pred_num = 0
true_num = 0
hit_num += len(set(true_labels) & set(predict_labels))
pred_num += len(set(predict_labels))
true_num += len(set(true_labels))
return hit_num, pred_num, true_num