-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathevaluator.py
68 lines (61 loc) · 2.85 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import tensorflow as tf
from scipy.sparse import lil_matrix
class RecallEvaluator(object):
def __init__(self, model, train_user_item_matrix,test_user_item_matrix ):
"""
Create a evaluator for recall@K evaluation
:param model: the model we are going to evaluate
:param train_user_item_matrix: the user-item pairs used in the training set. These pairs will be ignored
in the recall calculation
:param test_user_item_matrix: the held-out user-item pairs we make prediction against
"""
self.model = model
self.train_user_item_matrix = lil_matrix(train_user_item_matrix)
self.test_user_item_matrix = lil_matrix(test_user_item_matrix)
n_users = train_user_item_matrix.shape[0]
self.user_to_test_set = {u: set(self.test_user_item_matrix.rows[u])
for u in range(n_users) if self.test_user_item_matrix.rows[u]}
if self.train_user_item_matrix is not None:
self.user_to_train_set = {u: set(self.train_user_item_matrix.rows[u])
for u in range(n_users) if self.train_user_item_matrix.rows[u]}
self.max_train_count = max(len(row) for row in self.train_user_item_matrix.rows)
else:
self.max_train_count = 0
def eval(self, sess, users,k=10):
"""
Compute the Top-K recall for a particular user given the predicted scores to items
:param users: the users to eval the recall
:param k: compute the recall for the top K items
:return: hitratio,ndgg@K
"""
_, user_tops = sess.run(tf.nn.top_k(self.model.item_scores, k + self.max_train_count),
{self.model.score_user_ids: users})
recalls = []
precisions = []
hit_ratios = []
ndcgs = []
for user_id, tops in zip(users, user_tops):
train_set = self.user_to_train_set.get(user_id, set())
test_set = self.user_to_test_set.get(user_id, set())
top_n_items = 0
hits = 0
dcg = 0.0
idcg = 0.0
for i in range(min(len(test_set), k)):
idcg += 1 / (np.log(i + 2) / np.log(2))
for i in tops:
# ignore item in the training set
if i in train_set:
continue
elif i in test_set:
dcg += 1 / (np.log(top_n_items + 2) / np.log(2))
hits += 1
top_n_items += 1
if top_n_items == k:
break
recalls.append(hits / float(len(test_set)))
precisions.append(hits / float(k))
hit_ratios.append(1.0) if hits > 0 else hit_ratios.append(0.0)
ndcgs.append(dcg/idcg)
return recalls,ndcgs,hit_ratios,precisions