forked from XiaoxinHe/TAPE
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
cc7738@kit.edu
committed
Mar 17, 2024
1 parent
2e20c46
commit 10c2e71
Showing
6 changed files
with
234 additions
and
30 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import time | ||
import numpy as np | ||
import tensorflow as tf | ||
from tensorflow.keras.layers import Dense, Input | ||
from tensorflow.keras.models import Model | ||
from tensorflow.keras.regularizers import l1_l2 | ||
from tensorflow.keras import backend as K | ||
from tensorflow.keras.callbacks import History | ||
import scipy.sparse as sp | ||
|
||
from ..utils import preprocess_nxgraph | ||
|
||
def l_2nd(beta): | ||
def loss_2nd(y_true, y_pred): | ||
b_ = tf.ones_like(y_true) | ||
b_ = tf.where(tf.equal(y_true, 0), 1.0, beta) | ||
x = K.square((y_true - y_pred) * b_) | ||
t = K.sum(x, axis=-1) | ||
return K.mean(t) | ||
return loss_2nd | ||
|
||
def l_1st(alpha): | ||
def loss_1st(y_true, y_pred): | ||
L = y_true | ||
Y = y_pred | ||
batch_size = tf.cast(K.shape(L)[0], dtype=tf.float32) | ||
return alpha * 2 * tf.linalg.trace(tf.matmul(tf.matmul(Y, L, transpose_a=True), Y)) / batch_size | ||
return loss_1st | ||
|
||
def create_model(node_size, hidden_size=[256, 128], l1=1e-5, l2=1e-4): | ||
A = Input(shape=(node_size,)) | ||
L = Input(shape=(None,)) | ||
fc = A | ||
for i in range(len(hidden_size)): | ||
if i == len(hidden_size) - 1: | ||
fc = Dense(hidden_size[i], activation='relu', | ||
kernel_regularizer=l1_l2(l1, l2), name='1st')(fc) | ||
else: | ||
fc = Dense(hidden_size[i], activation='relu', | ||
kernel_regularizer=l1_l2(l1, l2))(fc) | ||
Y = fc | ||
for i in reversed(range(len(hidden_size) - 1)): | ||
fc = Dense(hidden_size[i], activation='relu', | ||
kernel_regularizer=l1_l2(l1, l2))(fc) | ||
A_ = Dense(node_size, 'relu', name='2nd')(fc) | ||
model = Model(inputs=[A, L], outputs=[A_, Y]) | ||
emb = Model(inputs=A, outputs=Y) | ||
return model, emb | ||
|
||
class SDNE(object): | ||
def __init__(self, graph, hidden_size=[32, 16], alpha=1e-6, beta=5., nu1=1e-5, nu2=1e-4): | ||
self.graph = graph | ||
self.idx2node, self.node2idx = preprocess_nxgraph(self.graph) | ||
self.node_size = self.graph.number_of_nodes() | ||
self.hidden_size = hidden_size | ||
self.alpha = alpha | ||
self.beta = beta | ||
self.nu1 = nu1 | ||
self.nu2 = nu2 | ||
self.A, self.L = self._create_A_L(self.graph, self.node2idx) | ||
self.reset_model() | ||
self.inputs = [self.A, self.L] | ||
self._embeddings = {} | ||
|
||
def reset_model(self, opt='adam'): | ||
self.model, self.emb_model = create_model(self.node_size, hidden_size=self.hidden_size, l1=self.nu1, | ||
l2=self.nu2) | ||
self.model.compile(opt, [l_2nd(self.beta), l_1st(self.alpha)]) | ||
self.get_embeddings() | ||
|
||
def train(self, batch_size=1024, epochs=1, initial_epoch=0, verbose=1): | ||
if batch_size >= self.node_size: | ||
if batch_size > self.node_size: | ||
print('batch_size({0}) > node_size({1}),set batch_size = {1}'.format( | ||
batch_size, self.node_size)) | ||
batch_size = self.node_size | ||
return self.model.fit([self.A.todense(), self.L.todense()], [self.A.todense(), self.L.todense()], | ||
batch_size=batch_size, epochs=epochs, initial_epoch=initial_epoch, verbose=verbose, | ||
shuffle=False) | ||
else: | ||
steps_per_epoch = (self.node_size - 1) // batch_size + 1 | ||
hist = History() | ||
hist.on_train_begin() | ||
logs = {} | ||
for epoch in range(initial_epoch, epochs): | ||
start_time = time.time() | ||
losses = np.zeros(3) | ||
for i in range(steps_per_epoch): | ||
index = np.arange( | ||
i * batch_size, min((i + 1) * batch_size, self.node_size)) | ||
A_train = self.A[index, :].todense() | ||
L_mat_train = self.L[index][:, index].todense() | ||
inp = [A_train, L_mat_train] | ||
batch_losses = self.model.train_on_batch(inp, inp) | ||
losses += batch_losses | ||
losses = losses / steps_per_epoch | ||
|
||
logs['loss'] = losses[0] | ||
logs['2nd_loss'] = losses[1] | ||
logs['1st_loss'] = losses[2] | ||
epoch_time = int(time.time() - start_time) | ||
hist.on_epoch_end(epoch, logs) | ||
if verbose > 0: | ||
print('Epoch {0}/{1}'.format(epoch + 1, epochs)) | ||
print('{0}s - loss: {1: .4f} - 2nd_loss: {2: .4f} - 1st_loss: {3: .4f}'.format( | ||
epoch_time, losses[0], losses[1], losses[2])) | ||
return hist | ||
|
||
def evaluate(self): | ||
return self.model.evaluate(x=self.inputs, y=self.inputs, batch_size=self.node_size) | ||
|
||
def get_embeddings(self): | ||
self._embeddings = {} | ||
embeddings = self.emb_model.predict(self.A.todense(), batch_size=self.node_size) | ||
look_back = self.idx2node | ||
for i, embedding in enumerate(embeddings): | ||
self._embeddings[look_back[i]] = embedding | ||
return self._embeddings | ||
|
||
def _create_A_L(self, graph, node2idx): | ||
node_size = graph.number_of_nodes() | ||
A_data = [] | ||
A_row_index = [] | ||
A_col_index = [] | ||
|
||
for edge in graph.edges(): | ||
v1, v2 = edge | ||
edge_weight = graph[v1][v2].get('weight', 1) | ||
|
||
A_data.append(edge_weight) | ||
A_row_index.append(node2idx[v1]) | ||
A_col_index.append(node2idx[v2]) | ||
|
||
A = sp.csr_matrix((A_data, (A_row_index, A_col_index)), shape=(node_size, node_size)) | ||
A_ = sp.csr_matrix((A_data + A_data, (A_row_index + A_col_index, A_col_index + A_row_index)), | ||
shape=(node_size, node_size)) | ||
|
||
D = sp.diags(A_.sum(axis=1).flatten().tolist()[0]) | ||
L = D - A_ | ||
return A, L |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters