-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathutils.py
108 lines (95 loc) · 4.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import torch
import numpy as np
import time
from sklearn.metrics import pairwise_distances
from sklearn.cluster import KMeans
# Transformation utils
# construct adj matrix from edge_index
# TODO: should consider GPU/CPU convertion
def convert_edge2adj(edge_index, num_nodes):
# float type
mat = torch.zeros((num_nodes, num_nodes))
for i in range(edge_index.shape[1]):
x, y = edge_index[:, i]
mat[x, y] = mat[y, x] = 1
return mat
def normalize(adj):
inv_sqrt_degree = 1. / torch.sqrt(adj.sum(dim=1, keepdim=False))
inv_sqrt_degree[inv_sqrt_degree == float("Inf")] = 0
return inv_sqrt_degree[:, None] * adj * inv_sqrt_degree[None, :]
# Clustering utils
# Note: code modified from https://github.com/google/active-learning/blob/master/sampling_methods/kcenter_greedy.py
def kcenter_choose(features, num_points, prev_index_list, n):
prev_index_len = len(prev_index_list)
# print('DEBUG n: {}'.format(n))
if prev_index_len == 0:
# one point initialization
prev_index_list = [np.random.randint(n)]
prev_index_len = len(prev_index_list)
# kCenter
# print('DEBUG min_distances shape', pairwise_distances(features, features[prev_index_list]).shape)
min_distances = np.min(pairwise_distances(features, features[prev_index_list]), axis=1)
# select num_points new indices
new_index_list = []
# import ipdb; ipdb.set_trace()
for _ in range(num_points - prev_index_len):
ind = np.argmax(min_distances)
# print('DEBUG ind', ind)
assert ind not in prev_index_list
new_index_list.append(ind)
# update distances
new_distances = pairwise_distances(features, features[ind].reshape(1, -1)).reshape(-1)
min_distances = np.minimum(min_distances, new_distances)
indices = torch.LongTensor( np.concatenate((prev_index_list, new_index_list)) )
ret_tensor = torch.zeros((n), dtype=torch.uint8)
ret_tensor[indices] = 1
return ret_tensor
def kmeans_choose(features, num_points, prev_index_list, n):
kmeans = KMeans(n_clusters=num_points).fit(features)
center_dist = pairwise_distances(kmeans.cluster_centers_, features) # k x n
full_new_index_list = np.argmin(center_dist, axis=1)
# TODO: difference of in_order when implementing coreset
ret_tensor = combine_new_old(full_new_index_list, prev_index_list,
num_points, n, in_order=True)
return ret_tensor
def kmedoids_choose(features, num_points, prev_index_list, n):
from pyclustering.cluster.kmedoids import kmedoids
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer
start_time = time.time()
# Prepare initial centers using K-Means++ method.
initial_centers = kmeans_plusplus_initializer(features, num_points).initialize() # num_points x feature_dim
distances = pairwise_distances(features, initial_centers, n_jobs=-1) # parallel computing, n x num_points
initial_medoids = np.argmin(distances, axis=0)
print('Medoids number', len(initial_medoids))
# Create instance of K-Medoids algorithm.
kmedoids_instance = kmedoids(features, initial_medoids)
# Run cluster analysis and obtain results.
kmedoids_instance.process()
print('K-Medoids clustering time', time.time() - start_time)
full_new_index_list = kmedoids_instance.get_medoids()
# TODO: difference of in_order when implementing coreset
ret_tensor = combine_new_old(full_new_index_list, prev_index_list,
num_points, n, in_order=True)
return ret_tensor
def combine_new_old(full_new_index_list, prev_index_list, num_points, n, in_order=True):
prev_index_len = len(prev_index_list)
if in_order:
# in-order difference
new_index_list = []
exist_num = 0
for ind in full_new_index_list:
if ind not in prev_index_list:
exist_num += 1
new_index_list.append(ind)
if exist_num == num_points - prev_index_len:
break
# return new_index_list
else:
# random difference
diff_list = np.asarray(list(set(full_new_index_list).difference(set(prev_index_list))))
new_index_list = diff_list[:-prev_index_len + num_points]
# return new_index_list
indices = torch.LongTensor( np.concatenate((prev_index_list, new_index_list)) )
ret_tensor = torch.zeros((n), dtype=torch.uint8)
ret_tensor[indices] = 1
return ret_tensor