This repository has been archived by the owner on May 31, 2024. It is now read-only.
forked from kellymarchisio/euc-v-graph-bli
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathclustering.py
65 lines (61 loc) · 2.42 KB
/
clustering.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import numpy as np
from collections import defaultdict
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import torch
def cluster_graph(directed_adj_matrix: np.array, theta: float, k: int, dictionary_form: bool = True) -> dict:
n = len(directed_adj_matrix)
clusters = defaultdict(lambda: defaultdict(int))
if k <= 0:
return
for m in range(1,k+1):
matrix = np.linalg.matrix_power(directed_adj_matrix, m)
for i in range(n):
for j in range(n):
if matrix[i][j] > theta:
clusters[i][j] = matrix[i][j]
for i in clusters.keys():
clusters[i] = dict(clusters[i])
clusters = dict(clusters)
if not dictionary_form:
for i in range(n):
clusters[i] = clusters[i].keys()
return clusters
def dict_to_tensor(clusters: dict) -> torch.Tensor:
n = len(clusters.keys())
tensor = torch.zeros([n, n], dtype = torch.float32)
for i in range(n):
if i not in clusters.keys():
tensor[i] = torch.ones([n], dtype = torch.float32)
else:
cluster = clusters[i]
for j in range(n):
if j not in cluster.keys():
tensor[i][j] = 0
else:
tensor[i][j] = cluster[j]
return tensor
def class_cluster(input: np.array, n_classes: int, theta: float, k: int) -> torch.Tensor:
tensor = dict_to_tensor(cluster_graph(input, theta, k, True))
counts = torch.count_nonzero(tensor, dim = 1)
indices = tensor.topk(n_classes).indices
tensor = tensor[indices]
for i in range(tensor.size()[1]):
tensor[:,i] = torch.nn.functional.one_hot(torch.argmax(tensor[:, i]), tensor.size()[0])
return tensor
def plot_clusters(input: np.array, theta: float, k: int, n_clusters: int, embeddings: torch.Tensor, n_components: int = 2) -> None:
z = class_cluster(input, n_clusters, theta, k).numpy()
tsne = TSNE(n_components)
tsne_results = tsne.fit_transform(embeddings.numpy())
if n_components == 2:
for i in n_clusters:
plt.scatter(tsne_results[z[0] == 1 , 0] , tsne_results[z[0] == 1 , 1] , label = i)
plt.scatter(np.mean(tsne_results[z[0] == 1 , 0]) , np.mean(tsne_results[z[0] == 1 , 1]) , s = 80, color = 'k')
elif n_components == 3:
fig = plt.figure(figsize=(12, 12))
ax = fig.add_subplot(projection='3d')
for i in n_clusters:
ax.scatter(tsne_results[z[0] == 1 , 0] , tsne_results[z[0] == 1 , 1] , tsne_results[z[0] == 1 , 2] , label = i)
ax.scatter(np.mean(tsne_results[z[0] == 1 , 0]) , np.mean(tsne_results[z[0] == 1 , 1]) , np.mean(tsne_results[z[0] == 1 , 2]) , s = 80, color = 'k')
plt.legend()
plt.show()