-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathkernel.py
209 lines (172 loc) · 7.86 KB
/
kernel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import math
import numpy as np
import scipy as sp
class KernelMatrix(object):
"""
implement the Graph Neural Tangent Kernel
"""
def __init__(self, num_layers, num_mlp_layers, jk, scale):
"""
num_layers: number of layers in the neural networks (including the input layer)
num_mlp_layers: number of MLP layers
jk: a bool variable indicating whether to add jumping knowledge
scale: the scale used aggregate neighbors [uniform, degree]
"""
self.num_layers = num_layers
self.num_mlp_layers = num_mlp_layers
self.jk = jk
self.scale = scale
assert(scale in ['uniform', 'degree'])
self.sigma_w2 = 2.
self.sigma_b2 = 0.01
def __adj_diag_nngp(self, S, adj_block, N, scale_mat):
# S = S / num_feat
# return adj_block.dot(S.reshape(-1)).reshape(N, N) * self.sigma_w2 + self.sigma_b2
return adj_block.dot(S.reshape(-1)).reshape(N, N) * scale_mat
def __next_diag_nngp(self, S):
diag = np.sqrt(np.diag(S))
# dense
S = S * self.sigma_w2
# relu
sqrt_xx_yy = diag[:, None] * diag[None, :]
S = S / sqrt_xx_yy
S = np.clip(S, -1, 1) # normalization
theta = np.arccos(S)
S = self.sigma_w2 * sqrt_xx_yy * (np.sin(theta) + (np.pi - theta) * np.cos(theta)) / 2. / np.pi
return S, diag
def __adj_nngp(self, S, adj_block, N1, N2, scale_mat):
"""
go through one adj layer, for all elements
"""
return adj_block.dot(S.reshape(-1)).reshape(N1, N2) * scale_mat
def __next_nngp(self, S, diag1, diag2):
# dense
S = S * self.sigma_w2
# relu
sqrt_xx_yy = diag1[:, None] * diag2[None, :]
S = S / sqrt_xx_yy
S = np.clip(S, -1, 1) # normalization
theta = np.arccos(S)
S = self.sigma_w2 * sqrt_xx_yy * (np.sin(theta) + (np.pi - theta) * np.cos(theta)) / 2. / np.pi
return S
def __next_diag(self, S):
"""
go through one normal layer, for diagonal element
S: covariance of last layer
"""
diag = np.sqrt(np.diag(S)) # the sqrt of the diagonal entries of sigma --> to make the diag of the Λ to be 1 in the later
S = S / diag[:, None] / diag[None, :]
S = np.clip(S, -1, 1) # normalization
# dot sigma
DS = (math.pi - np.arccos(S)) / math.pi # dot_sigma of next mlp layer
S = (S * (math.pi - np.arccos(S)) + np.sqrt(1 - S * S)) / np.pi # sigma of next mlp layer
S = S * diag[:, None] * diag[None, :]
return S, DS, diag
def __adj_diag(self, S, adj_block, N, scale_mat):
"""
go through one adj layer
S: the covariance
adj_block: the adjacency relation
N: number of vertices
scale_mat: scaling matrix
"""
return adj_block.dot(S.reshape(-1)).reshape(N, N) * scale_mat
def __next(self, S, diag1, diag2):
"""
go through one normal layer, for all elements
"""
S = S / diag1[:, None] / diag2[None, :]
S = np.clip(S, -1, 1)
DS = (math.pi - np.arccos(S)) / math.pi
S = (S * (math.pi - np.arccos(S)) + np.sqrt(1 - S * S)) / np.pi
S = S * diag1[:, None] * diag2[None, :]
return S, DS
def __adj(self, S, adj_block, N1, N2, scale_mat):
"""
go through one adj layer, for all elements
"""
return adj_block.dot(S.reshape(-1)).reshape(N1, N2) * scale_mat
def diag(self, g, A):
"""
compute the diagonal element of GNTK for graph `g` with adjacency matrix `A`
g: graph g
A: adjacency matrix
"""
N = A.shape[0] # num of nodes of graph g
# calculate c_u * c_u'
if self.scale == 'uniform':
scale_mat = 1.
else:
scale_mat = 1. / np.array(np.sum(A, axis=1) * np.sum(A, axis=0))
diag_list = [] # 储存l=1~L-1个block的相同graph的r = 0~R-1的sigma的对角线元素的sqrt, len = (L-1)*R
diag_nngp_list = []
nngp_xx_list = []
adj_block = sp.sparse.kron(A, A)
# input covariance
sigma = np.matmul(g.node_features, g.node_features.T) # 计算sigma^{0}
sigma = self.__adj_diag(sigma, adj_block, N, scale_mat) # 计算G = G'时的sigma^{1}_{0}
ntk = np.copy(sigma)
nngp = np.copy(sigma)
for layer in range(1, self.num_layers):
for mlp_layer in range(self.num_mlp_layers):
sigma, dot_sigma, diag = self.__next_diag(sigma) # sigma of next mlp layer, dot_sigma of next mlp layer, the sqrt of the diagonal entries of sigma
diag_list.append(diag)
ntk = ntk * dot_sigma + sigma
nngp, diag_nngp = self.__next_diag_nngp(nngp)
diag_nngp_list.append(diag_nngp)
# if not last layer
if layer != self.num_layers - 1:
########
nngp_xx_list.append(nngp)
nngp_old = np.copy(nngp)
nngp = np.matmul(np.matmul(nngp, nngp), nngp.T)
ntk = 2 * nngp + np.matmul(np.matmul(nngp_old, ntk*2), nngp_old.T)
########
sigma = self.__adj_diag(sigma, adj_block, N, scale_mat)
ntk = self.__adj_diag(ntk, adj_block, N, scale_mat) # compute sigma^{l+1}_{0} and ntk^{l+1}_{0}
nngp = self.__adj_diag_nngp(nngp, adj_block, N, scale_mat)
return diag_list, diag_nngp_list, nngp_xx_list
def kernel(self, g1, g2, diag_list1, diag_list2, A1, A2, diag_list1_nngp, diag_list2_nngp, nngp_xx_list1, nngp_xx_list2):
"""
compute the kernel value \Theta(g1, g2)
g1: graph1
g2: graph2
diag_list1, diag_list2: g1, g2's the diagonal elements of covariance matrix in all layers
A1, A2: g1, g2's adjacency matrix
"""
n1 = A1.shape[0] # num of nodes of g1
n2 = A2.shape[0] # num of nodes of g2
if self.scale == 'uniform':
scale_mat = 1.
else:
scale_mat = 1. / np.array(np.sum(A1, axis=1) * np.sum(A2, axis=0))
adj_block = sp.sparse.kron(A1, A2)
jump_ntk = 0
sigma = np.matmul(g1.node_features, g2.node_features.T) # 计算时的sigma^{0}
jump_ntk += sigma
sigma = self.__adj(sigma, adj_block, n1, n2, scale_mat) # 计算sigma^{1}_{0}
ntk = np.copy(sigma)
nngp = np.copy(sigma)
for layer in range(1, self.num_layers):
for mlp_layer in range(self.num_mlp_layers):
sigma, dot_sigma = self.__next(sigma,
diag_list1[(layer - 1) * self.num_mlp_layers + mlp_layer],
diag_list2[(layer - 1) * self.num_mlp_layers + mlp_layer]) # sigma of next mlp layer, dot_sigma of next mlp layer
ntk = ntk * dot_sigma + sigma # ntk
nngp = self.__next_nngp(nngp,
diag_list1_nngp[(layer - 1) * self.num_mlp_layers + mlp_layer],
diag_list2_nngp[(layer - 1) * self.num_mlp_layers + mlp_layer])
jump_ntk += ntk
# if not last layer
if layer != self.num_layers - 1:
########
nngp = np.matmul(np.matmul(nngp_xx_list1[(layer - 1)], nngp), nngp_xx_list2[(layer - 1)].T)
ntk = 2 * nngp + np.matmul(np.matmul(nngp_xx_list1[(layer - 1)], ntk*2), nngp_xx_list2[(layer - 1)].T)
########
sigma = self.__adj(sigma, adj_block, n1, n2, scale_mat)
ntk = self.__adj(ntk, adj_block, n1, n2, scale_mat) # compute sigma^{l+1}_{0} and ntk^{l+1}_{0}
nngp = self.__adj_nngp(nngp, adj_block, n1, n2, scale_mat)
if self.jk:
return np.sum(jump_ntk)
else:
return np.sum(ntk)