zygmuntz
diff --git a/‎.gitignore
+4 b/‎.gitignore
+4
diff --git a/‎PointerLSTM.py
+30-14 b/‎PointerLSTM.py
+30-14
diff --git a/‎test.py
+23-27 b/‎test.py
+23-27
diff --git a/‎test_sums.py
+23-27 b/‎test_sums.py
+23-27
diff --git a/‎train.py
+34-33 b/‎train.py
+34-33
@@ -99,3 +99,7 @@ ENV/
 
 # mypy
 .mypy_cache/
+
+.idea/*
+model_weights/*
+
@@ -1,9 +1,10 @@
-from keras import initializations
-from keras.layers.recurrent import time_distributed_dense
+# from keras import initializations
+import keras.backend as K
 from keras.activations import tanh, softmax
-from keras.layers import LSTM
 from keras.engine import InputSpec
-import keras.backend as K
+from keras.layers import LSTM
+from keras.layers.recurrent import Recurrent
+from keras.layers.recurrent import _time_distributed_dense
 
 
 class PointerLSTM(LSTM):
@@ -12,19 +13,31 @@ def __init__(self, hidden_shape, *args, **kwargs):
         self.input_length = []
         super(PointerLSTM, self).__init__(*args, **kwargs)
 
+    def get_initial_states(self, x_input):
+        return Recurrent.get_initial_state(self, x_input)
+
     def build(self, input_shape):
         super(PointerLSTM, self).build(input_shape)
         self.input_spec = [InputSpec(shape=input_shape)]
-        init = initializations.get('orthogonal')
-        self.W1 = init((self.hidden_shape, 1))
-        self.W2 = init((self.hidden_shape, 1))
-        self.vt = init((input_shape[1], 1))
+        # init = initializations.get('orthogonal')
+        self.W1 = self.add_weight(name="W1",
+                                  shape=(self.hidden_shape, 1),
+                                  initializer="uniform",
+                                  trainable=True)
+        self.W2 = self.add_weight(name="W2",
+                                  shape=(self.hidden_shape, 1),
+                                  initializer="uniform",
+                                  trainable=True)
+        self.vt = self.add_weight(name="vt",
+                                  shape=(input_shape[1], 1),
+                                  initializer='uniform',
+                                  trainable=True)
         self.trainable_weights += [self.W1, self.W2, self.vt]
 
     def call(self, x, mask=None):
         input_shape = self.input_spec[0].shape
         en_seq = x
-        x_input = x[:, input_shape[1]-1, :]
+        x_input = x[:, input_shape[1] - 1, :]
         x_input = K.repeat(x_input, input_shape[1])
         initial_states = self.get_initial_states(x_input)
 
@@ -41,17 +54,17 @@ def call(self, x, mask=None):
         return outputs
 
     def step(self, x_input, states):
-    	#print "x_input:", x_input, x_input.shape
-    	# <TensorType(float32, matrix)>
-    	
+        # print "x_input:", x_input, x_input.shape
+        # <TensorType(float32, matrix)>
+
         input_shape = self.input_spec[0].shape
         en_seq = states[-1]
         _, [h, c] = super(PointerLSTM, self).step(x_input, states[:-1])
 
         # vt*tanh(W1*e+W2*d)
         dec_seq = K.repeat(h, input_shape[1])
-        Eij = time_distributed_dense(en_seq, self.W1, output_dim=1)
-        Dij = time_distributed_dense(dec_seq, self.W2, output_dim=1)
+        Eij = _time_distributed_dense(en_seq, self.W1, output_dim=1)
+        Dij = _time_distributed_dense(dec_seq, self.W2, output_dim=1)
         U = self.vt * tanh(Eij + Dij)
         U = K.squeeze(U, 2)
 
@@ -62,3 +75,6 @@ def step(self, x_input, states):
     def get_output_shape_for(self, input_shape):
         # output shape is not affected by the attention component
         return (input_shape[0], input_shape[1], input_shape[1])
+
+    def compute_output_shape(self, input_shape):
+        return (input_shape[0], input_shape[1], input_shape[1])
@@ -2,14 +2,11 @@
 
 "testing (validating) a trained model"
 
-import pickle
 import numpy as np
-
-from keras.models import Model
 from keras.layers import LSTM, Input
-from keras.utils.np_utils import to_categorical
 from keras.metrics import categorical_accuracy
-from sklearn.metrics import accuracy_score as accuracy
+from keras.models import Model
+from keras.utils.np_utils import to_categorical
 
 from PointerLSTM import PointerLSTM
 
@@ -24,18 +21,18 @@
 batch_size = 100
 
 hidden_size = 64
-weights_file = 'model_weights/model_weights_{}_steps_{}.hdf5'.format( n_steps, hidden_size )
+weights_file = 'model_weights/model_weights_{}_steps_{}.hdf5'.format(n_steps, hidden_size)
 
 #
 
-x = np.loadtxt( x_file, delimiter = ',', dtype = int )
-y = np.loadtxt( y_file, delimiter = ',', dtype = int )
+x = np.loadtxt(x_file, delimiter=',', dtype=int)
+y = np.loadtxt(y_file, delimiter=',', dtype=int)
 
-x = np.expand_dims( x, axis = 2 )
+x = np.expand_dims(x, axis=2)
 
 YY = []
 for y_ in y:
-	YY.append(to_categorical(y_))
+    YY.append(to_categorical(y_))
 YY = np.asarray(YY)
 
 x_train = x[:split_at]
@@ -52,31 +49,30 @@
 print("building model...")
 main_input = Input(shape=(seq_len, 1), name='main_input')
 
-encoder = LSTM(output_dim = hidden_size, return_sequences = True, name="encoder")(main_input)
+encoder = LSTM(output_dim=hidden_size, return_sequences=True, name="encoder")(main_input)
 decoder = PointerLSTM(hidden_size, output_dim=hidden_size, name="decoder")(encoder)
 
-model = Model( input=main_input, output=decoder )
+model = Model(input=main_input, output=decoder)
 
-print( "loading weights from {}...".format( weights_file ))
+print("loading weights from {}...".format(weights_file))
 try:
-	model.load_weights( weights_file )
+    model.load_weights(weights_file)
 except IOError:
-	print( "no weights file." )
-	raise SystemExit
+    print("no weights file.")
+    raise SystemExit
 
 model.compile(optimizer='rmsprop',
-			  loss='categorical_crossentropy',
-			  metrics=['accuracy'])
-			  
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
 
-print "testing..."
+print("testing...")
 
-p = model.predict( x_test[0:1] )
-p_bin = p.argmax( axis = 2 )
+p = model.predict(x_test[0:1])
+p_bin = p.argmax(axis=2)
 
-for y_, p_ in zip( y_test, p )[:5]:
-	print "y_test:", y_
-	print "p:     ", p_.argmax( axis = 1 )
-	print
+for y_, p_ in zip(y_test, p)[:5]:
+    print("y_test:", y_)
+    print("p:     ", p_.argmax(axis=1))
+    print()
 
-print "categorical accuracy: ", categorical_accuracy( YY_test, p ).eval()
+print("categorical accuracy: ", categorical_accuracy(YY_test, p).eval())
@@ -2,14 +2,11 @@
 
 "testing (validating) a trained model"
 
-import pickle
 import numpy as np
-
-from keras.models import Model
 from keras.layers import LSTM, Input
-from keras.utils.np_utils import to_categorical
 from keras.metrics import categorical_accuracy
-from sklearn.metrics import accuracy_score as accuracy
+from keras.models import Model
+from keras.utils.np_utils import to_categorical
 
 from PointerLSTM import PointerLSTM
 
@@ -22,22 +19,22 @@
 batch_size = 100
 
 hidden_size = 100
-weights_file = 'model_weights/model_weights_sums_{}.hdf5'.format( hidden_size )
+weights_file = 'model_weights/model_weights_sums_{}.hdf5'.format(hidden_size)
 
 n_steps = 3
 n_features = 2
 
 #
 
-x = np.loadtxt( x_file, delimiter = ',', dtype = int )
-y = np.loadtxt( y_file, delimiter = ',', dtype = int )
+x = np.loadtxt(x_file, delimiter=',', dtype=int)
+y = np.loadtxt(y_file, delimiter=',', dtype=int)
 
-x = x.reshape( x.shape[0], n_steps, -1 )
-assert( x.shape[-1] == n_features )
+x = x.reshape(x.shape[0], n_steps, -1)
+assert (x.shape[-1] == n_features)
 
 YY = []
 for y_ in y:
-	YY.append(to_categorical(y_))
+    YY.append(to_categorical(y_))
 YY = np.asarray(YY)
 
 x_train = x[:split_at]
@@ -52,31 +49,30 @@
 #
 
 print("building model...")
-main_input = Input( shape=( x.shape[1], x.shape[2] ), name='main_input' )
+main_input = Input(shape=(x.shape[1], x.shape[2]), name='main_input')
 
-encoder = LSTM(output_dim = hidden_size, return_sequences = True, name="encoder")(main_input)
+encoder = LSTM(output_dim=hidden_size, return_sequences=True, name="encoder")(main_input)
 decoder = PointerLSTM(hidden_size, output_dim=hidden_size, name="decoder")(encoder)
 
-model = Model( input=main_input, output=decoder )
+model = Model(input=main_input, output=decoder)
 
-print( "loading weights from {}...".format( weights_file ))
+print("loading weights from {}...".format(weights_file))
 try:
-	model.load_weights( weights_file )
+    model.load_weights(weights_file)
 except IOError:
-	print( "no weights file." )
+    print("no weights file.")
 
 model.compile(optimizer='rmsprop',
-			  loss='categorical_crossentropy',
-			  metrics=['accuracy'])
-			  
-print( 'testing...')
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
 
-p = model.predict( x_test )
+print('testing...')
 
-for y_, p_ in zip( y_test, p )[:5]:
-	print "y_test:", y_
-	print "p:     ", p_.argmax( axis = 1 )
-	print
+p = model.predict(x_test)
 
-print "categorical accuracy: ", categorical_accuracy( YY_test, p ).eval()
+for y_, p_ in zip(y_test, p)[:5]:
+    print("y_test:", y_)
+    print("p:     ", p_.argmax(axis=1))
+    print()
 
+print("categorical accuracy: ", categorical_accuracy(YY_test, p).eval())
@@ -2,11 +2,9 @@
 
 "order integer sequences of length given by n_steps"
 
-import pickle
 import numpy as np
-
-from keras.models import Model
 from keras.layers import LSTM, Input
+from keras.models import Model
 from keras.utils.np_utils import to_categorical
 
 from PointerLSTM import PointerLSTM
@@ -15,26 +13,25 @@
 
 n_steps = 5
 
-x_file = 'data/x_{}.csv'.format( n_steps )
-y_file = 'data/y_{}.csv'.format( n_steps )
+x_file = 'data/x_{}.csv'.format(n_steps)
+y_file = 'data/y_{}.csv'.format(n_steps)
 
 split_at = 9000
 batch_size = 100
 
 hidden_size = 64
-weights_file = 'model_weights/model_weights_{}_steps_{}.hdf5'.format( n_steps, hidden_size )
-
+weights_file = 'model_weights/model_weights_{}_steps_{}.hdf5'.format(n_steps, hidden_size)
 
 #
 
-x = np.loadtxt( x_file, delimiter = ',', dtype = int )
-y = np.loadtxt( y_file, delimiter = ',', dtype = int )
+x = np.loadtxt(x_file, delimiter=',', dtype=int)
+y = np.loadtxt(y_file, delimiter=',', dtype=int)
 
-x = np.expand_dims( x, axis = 2 )
+x = np.expand_dims(x, axis=2)
 
 YY = []
 for y_ in y:
-	YY.append(to_categorical(y_))
+    YY.append(to_categorical(y_))
 YY = np.asarray(YY)
 
 x_train = x[:split_at]
@@ -44,42 +41,46 @@
 YY_train = YY[:split_at]
 YY_test = YY[split_at:]
 
-assert( n_steps == x.shape[1] )
+assert (n_steps == x.shape[1])
 
 #
 
-print( "building model..." )
-main_input = Input( shape=( n_steps, 1 ), name='main_input' )
+print("building model...")
+main_input = Input(shape=(n_steps, 1), name='main_input')
 
-encoder = LSTM(output_dim = hidden_size, return_sequences = True, name="encoder")(main_input)
-decoder = PointerLSTM(hidden_size, output_dim=hidden_size, name="decoder")(encoder)
+encoder = LSTM(units=hidden_size, return_sequences=True, name="encoder")(main_input)
+print(encoder)
+decoder = PointerLSTM(hidden_size, units=hidden_size, name="decoder")(encoder)
 
-model = Model( input=main_input, output=decoder )
+model = Model(inputs=main_input, outputs=decoder)
 
-print( "loading weights from {}...".format( weights_file ))
+print("loading weights from {}...".format(weights_file))
 try:
-	model.load_weights( weights_file )
+    model.load_weights(weights_file)
 except IOError:
-	print( "no weights file, starting anew." )
+    print("no weights file, starting anew.")
 
 model.compile(optimizer='rmsprop',
-			  loss='categorical_crossentropy',
-			  metrics=['accuracy'])
-			  
-print( 'training and saving model weights each epoch...' )
+              loss='categorical_crossentropy',
+              metrics=['accuracy'])
+
+print('training and saving model weights each epoch...')
+
+validation_data = (x_test, YY_test)
 
-validation_data = ( x_test, YY_test )
+epoch_counter = 0
 
 while True:
 
-	history = model.fit( x_train, YY_train, nb_epoch = 1, batch_size = batch_size, 
-		validation_data = validation_data )
+    history = model.fit(x_train, YY_train, epochs=1, batch_size=batch_size,
+                        validation_data=validation_data)
 
-	p = model.predict( x_test )
+    p = model.predict(x_test)
 
-	for y_, p_ in zip( y_test, p )[:5]:
-		print "y_test:", y_
-		print "p:     ", p_.argmax( axis = 1 )
-		print
+    for y_, p_ in list(zip(y_test, p))[:5]:
+        print("epoch_counter: ", epoch_counter)
+        print("y_test:", y_)
+        print("p:     ", p_.argmax(axis=1))
+        print()
 
-	model.save_weights( weights_file )
+    # model.save(weights_file)