-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathutils.py
100 lines (85 loc) · 3.03 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import matplotlib
import numpy as np
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from random import shuffle
from collections import defaultdict
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import inv
class Graph(object):
def __init__(self, base_dir, feature_file, edge_file, cluster_file, alpha, lambda_):
self.init(base_dir, feature_file, edge_file, cluster_file, alpha, lambda_)
def init(self, base_dir, feature_file, edge_file, cluster_file, alpha, lambda_):
feature = []
with open(feature_file) as f:
for line in f:
feature.append(np.array(list(map(int, line.rstrip().split(',')))))
self.feature = np.array(feature)
cluster = []
with open(cluster_file) as f:
for line in f:
cluster.append(np.array(list(map(int, line.rstrip().split(',')))))
self.cluster = np.array(cluster)
edges = defaultdict(set)
with open(edge_file) as f:
for line in f:
tuple = list(map(int, line.rstrip().split(',')))
assert len(tuple) == 2
edges[tuple[0]].add(tuple[1])
edges[tuple[1]].add(tuple[0])
for v, ns in edges.items():
if v not in ns:
edges[v].add(v)
indices = []
T_values, RI_values, RW_values = [], [], []
for v, ns in edges.items():
indices.append(np.array([v, v]))
T_values.append(1.0 / len(ns))
RI_values.append(1.0 - alpha / len(ns))
RW_values.append(1.0 - (1.0 - lambda_) / len(ns))
for n in ns:
if v != n:
indices.append(np.array([v, n]))
T_values.append(1.0 / len(ns))
RI_values.append(-alpha / len(ns))
RW_values.append(-(1.0 - lambda_) / len(ns))
self.indices = np.array(indices)
self.T_values = np.asarray(T_values, dtype=np.float32)
try:
self.RI = np.load(base_dir + 'RI.npy')
except:
self.RI = inv(csc_matrix((RI_values, (self.indices[:, 1], self.indices[:, 0])),
shape=(len(edges), len(edges)))).todense()
np.save(base_dir + 'RI.npy', self.RI.astype(np.float32))
try:
self.RW = np.load(base_dir + 'RW.npy')
except:
self.RW = lambda_ * inv(csc_matrix((RW_values, (self.indices[:, 0], self.indices[:, 1])),
shape=(len(edges), len(edges)))).todense()
self.RW /= np.sum(self.RW, axis=0)
np.save(base_dir + 'RW.npy', self.RW.astype(np.float32))
def load_graph(base_dir, feature_file, graph_file, cluster_file, alpha, lambda_):
return Graph(base_dir, feature_file, graph_file, cluster_file, alpha, lambda_)
def scatter(data, cluster, plot_file):
colors = []
choices = [0.0, 1.0 / 3.0, 2.0 / 3.0, 1.0]
for i in choices:
for j in choices:
for k in choices:
colors.append((i, j, k))
shuffle(colors)
assert len(set(cluster)) <= len(colors)
points_set = []
for cl in range(len(set(cluster))):
points_set.append(np.array([p for p, c in zip(data, cluster) if c == cl]))
plt.figure()
for i, points in enumerate(points_set):
if len(points) > 0:
plt.scatter(points[:, 0], points[:, 1], c=colors[i])
plt.savefig(plot_file)
plt.close()
def plot(data, plot_file):
plt.figure()
plt.plot(range(len(data)), data)
plt.savefig(plot_file)
plt.close()