changebio · Jul 17, 2017
diff --git a/‎LICENSE.md
+24 b/‎LICENSE.md
+24
diff --git a/‎data/DistanceNetwork/input_image.npy
8.08 KB b/‎data/DistanceNetwork/input_image.npy
8.08 KB
diff --git a/‎data/DistanceNetwork/similarities.npy
2.58 KB b/‎data/DistanceNetwork/similarities.npy
2.58 KB
diff --git a/‎data/DistanceNetwork/support_set.npy
160 KB b/‎data/DistanceNetwork/support_set.npy
160 KB
diff --git a/‎data/gen_encode.npy
8.08 KB b/‎data/gen_encode.npy
8.08 KB
diff --git a/‎data/lstm/encoded_images_after.npy
8.08 KB b/‎data/lstm/encoded_images_after.npy
8.08 KB
diff --git a/‎data/lstm/encoded_images_before.npy
168 KB b/‎data/lstm/encoded_images_before.npy
168 KB
diff --git a/‎data/lstm/output_state_bw.npy
168 KB b/‎data/lstm/output_state_bw.npy
168 KB
diff --git a/‎data/lstm/output_state_fw.npy
8.08 KB b/‎data/lstm/output_state_fw.npy
8.08 KB
diff --git a/‎data/preds.npy
2.58 KB b/‎data/preds.npy
2.58 KB
diff --git a/‎data/similarities.npy
2.58 KB b/‎data/similarities.npy
2.58 KB
diff --git a/‎data/softmax_similarities.npy
2.58 KB b/‎data/softmax_similarities.npy
2.58 KB
diff --git a/‎data/support_set_y.npy
50.1 KB b/‎data/support_set_y.npy
50.1 KB
diff --git a/‎data/target_image.npy
98.1 KB b/‎data/target_image.npy
98.1 KB
diff --git a/‎datasets/__init__.py b/‎datasets/__init__.py
diff --git a/‎datasets/__init__.pyc
107 Bytes b/‎datasets/__init__.pyc
107 Bytes
diff --git a/‎datasets/omniglot.py
+121 b/‎datasets/omniglot.py
+121
diff --git a/‎datasets/omniglot.pyc
3.83 KB b/‎datasets/omniglot.pyc
3.83 KB
diff --git a/‎datasets/omniglotNShot.py
+158 b/‎datasets/omniglotNShot.py
+158
diff --git a/‎datasets/omniglotNShot.pyc
5.71 KB b/‎datasets/omniglotNShot.pyc
5.71 KB
diff --git a/‎main.py
+46 b/‎main.py
+46
@@ -0,0 +1,24 @@
+MIT License
+
+Copyright (c) 2017 Albert Berenguel
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+1. The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+2. Original authors' names are not deleted.
+3. The authors' names are not used to endorse or promote products derived
+from this software
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,121 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Albert Berenguel
+## Computer Vision Center (CVC). Universitat Autonoma de Barcelona
+## Email: aberenguel@cvc.uab.es
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##
+## Arcknowledgments:
+## https://github.com/ludc. Using some parts of his Omniglot code.
+## https://github.com/AntreasAntoniou. Using some parts of his Omniglot code.
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from __future__ import print_function
+import torch.utils.data as data
+import os
+import os.path
+import errno
+
+class OMNIGLOT(data.Dataset):
+    urls = [
+        'https://github.com/brendenlake/omniglot/raw/master/python/images_background.zip',
+        'https://github.com/brendenlake/omniglot/raw/master/python/images_evaluation.zip'
+    ]
+    raw_folder = 'raw'
+    processed_folder = 'processed'
+    training_file = 'training.pt'
+    test_file = 'test.pt'
+
+    '''
+    The items are (filename,category). The index of all the categories can be found in self.idx_classes
+    Args:
+    - root: the directory where the dataset will be stored
+    - transform: how to transform the input
+    - target_transform: how to transform the target
+    - download: need to download the dataset
+    '''
+    def __init__(self, root, transform=None, target_transform=None, download=False):
+        self.root = root
+        self.transform = transform
+        self.target_transform = target_transform
+
+        if download:
+            self.download()
+
+        if not self._check_exists():
+            raise RuntimeError('Dataset not found.'
+                               + ' You can use download=True to download it')
+
+        self.all_items=find_classes(os.path.join(self.root, self.processed_folder))
+        self.idx_classes=index_classes(self.all_items)
+
+    def __getitem__(self, index):
+        filename=self.all_items[index][0]
+        img=str.join('/',[self.all_items[index][2],filename])
+
+        target=self.idx_classes[self.all_items[index][1]]
+        if self.transform is not None:
+            img = self.transform(img)
+        if self.target_transform is not None:
+            target = self.target_transform(target)
+
+        return  img,target
+
+    def __len__(self):
+        return len(self.all_items)
+
+    def _check_exists(self):
+        return os.path.exists(os.path.join(self.root, self.processed_folder, "images_evaluation")) and \
+               os.path.exists(os.path.join(self.root, self.processed_folder, "images_background"))
+
+    def download(self):
+        from six.moves import urllib
+        import zipfile
+
+        if self._check_exists():
+            return
+
+        # download files
+        try:
+            os.makedirs(os.path.join(self.root, self.raw_folder))
+            os.makedirs(os.path.join(self.root, self.processed_folder))
+        except OSError as e:
+            if e.errno == errno.EEXIST:
+                pass
+            else:
+                raise
+
+        for url in self.urls:
+            print('== Downloading ' + url)
+            data = urllib.request.urlopen(url)
+            filename = url.rpartition('/')[2]
+            file_path = os.path.join(self.root, self.raw_folder, filename)
+            with open(file_path, 'wb') as f:
+                f.write(data.read())
+            file_processed = os.path.join(self.root, self.processed_folder)
+            print("== Unzip from "+file_path+" to "+file_processed)
+            zip_ref = zipfile.ZipFile(file_path, 'r')
+            zip_ref.extractall(file_processed)
+            zip_ref.close()
+        print("Download finished.")
+
+def find_classes(root_dir):
+    retour=[]
+    for (root,dirs,files) in os.walk(root_dir):
+        for f in files:
+            if (f.endswith("png")):
+                r=root.split('/')
+                lr=len(r)
+                retour.append((f,r[lr-2]+"/"+r[lr-1],root))
+    print("== Found %d items "%len(retour))
+    return retour
+
+def index_classes(items):
+    idx={}
+    for i in items:
+        if (not i[1] in idx):
+            idx[i[1]]=len(idx)
+    print("== Found %d classes"% len(idx))
+    return idx
@@ -0,0 +1,158 @@
+from datasets import omniglot
+import torchvision.transforms as transforms
+from PIL import Image
+from option import Options
+import os.path
+
+import numpy as np
+np.random.seed(2191)  # for reproducibility
+
+# LAMBDA FUNCTIONS
+filenameToPILImage = lambda x: Image.open(x).convert('L')
+PiLImageResize = lambda x: x.resize((28,28))
+np_reshape = lambda x: np.reshape(x, (28, 28, 1))
+
+class OmniglotNShotDataset():
+    def __init__(self, batch_size = 100, classes_per_set=10, samples_per_class=1):
+
+        args = Options().parse()
+
+        if not os.path.isfile(os.path.join(args.dataroot,'data.npy')):
+            self.x = omniglot.OMNIGLOT(args.dataroot, download=True,
+                                     transform=transforms.Compose([filenameToPILImage,
+                                                                   PiLImageResize,
+                                                                   np_reshape]))
+                                                                   #transforms.ToTensor()]))
+
+            """
+            # Convert to the format of AntreasAntoniou. Format [nClasses,nCharacters,28,28,1]
+            """
+            temp = dict()
+            for (img, label) in self.x:
+                if label in temp:
+                    temp[label].append(img)
+                else:
+                    temp[label]=[img]
+            self.x = [] # Free memory
+
+            for classes in temp.keys():
+                self.x.append(np.array(temp[temp.keys()[classes]]))
+            self.x = np.array(self.x)
+            temp = [] # Free memory
+            np.save(os.path.join(args.dataroot,'data.npy'),self.x)
+        else:
+            self.x = np.load(os.path.join(args.dataroot,'data.npy'))
+
+        """
+        Constructs an N-Shot omniglot Dataset
+        :param batch_size: Experiment batch_size
+        :param classes_per_set: Integer indicating the number of classes per set
+        :param samples_per_class: Integer indicating samples per class
+        e.g. For a 20-way, 1-shot learning task, use classes_per_set=20 and samples_per_class=1
+             For a 5-way, 10-shot learning task, use classes_per_set=5 and samples_per_class=10
+        """
+
+        shuffle_classes = np.arange(self.x.shape[0])
+        np.random.shuffle(shuffle_classes)
+        self.x = self.x[shuffle_classes]
+        self.x_train, self.x_test, self.x_val  = self.x[:1200], self.x[1200:1500], self.x[1500:]
+        self.normalization()
+
+        self.batch_size = batch_size
+        self.n_classes = self.x.shape[0]
+        self.classes_per_set = classes_per_set
+        self.samples_per_class = samples_per_class
+
+        self.indexes = {"train": 0, "val": 0, "test": 0}
+        self.datasets = {"train": self.x_train, "val": self.x_val, "test": self.x_test} #original data cached
+        self.datasets_cache = {"train": self.load_data_cache(self.datasets["train"]),  #current epoch data cached
+                               "val": self.load_data_cache(self.datasets["val"]),
+                               "test": self.load_data_cache(self.datasets["test"])}
+
+    def normalization(self):
+        """
+        Normalizes our data, to have a mean of 0 and sdt of 1
+        """
+        self.mean = np.mean(self.x_train)
+        self.std = np.std(self.x_train)
+        self.max = np.max(self.x_train)
+        self.min = np.min(self.x_train)
+        print("train_shape", self.x_train.shape, "test_shape", self.x_test.shape, "val_shape", self.x_val.shape)
+        print("before_normalization", "mean", self.mean, "max", self.max, "min", self.min, "std", self.std)
+        self.x_train = (self.x_train - self.mean) / self.std
+        self.x_val = (self.x_val - self.mean) / self.std
+        self.x_test = (self.x_test - self.mean) / self.std
+        self.mean = np.mean(self.x_train)
+        self.std = np.std(self.x_train)
+        self.max = np.max(self.x_train)
+        self.min = np.min(self.x_train)
+        print("after_normalization", "mean", self.mean, "max", self.max, "min", self.min, "std", self.std)
+
+    def load_data_cache(self, data_pack):
+        """
+        Collects 1000 batches data for N-shot learning
+        :param data_pack: Data pack to use (any one of train, val, test)
+        :return: A list with [support_set_x, support_set_y, target_x, target_y] ready to be fed to our networks
+        """
+        n_samples = self.samples_per_class * self.classes_per_set
+        data_cache = []
+        for sample in range(1000):
+            support_set_x = np.zeros((self.batch_size, n_samples, 28, 28, 1))
+            support_set_y = np.zeros((self.batch_size, n_samples))
+            target_x = np.zeros((self.batch_size, 28, 28, 1), dtype=np.int)
+            target_y = np.zeros((self.batch_size,), dtype=np.int)
+            for i in range(self.batch_size):
+                ind = 0
+                pinds = np.random.permutation(n_samples)
+                classes = np.random.choice(data_pack.shape[0], self.classes_per_set, False)
+                x_hat_class = np.random.randint(self.classes_per_set)
+                for j, cur_class in enumerate(classes):  # each class
+                    example_inds = np.random.choice(data_pack.shape[1], self.samples_per_class, False)
+                    for eind in example_inds:
+                        support_set_x[i, pinds[ind], :, :, :] = data_pack[cur_class][eind]
+                        support_set_y[i, pinds[ind]] = j
+                        ind += 1
+                    if j == x_hat_class:
+                        target_x[i, :, :, :] = data_pack[cur_class][np.random.choice(data_pack.shape[1])]
+                        target_y[i] = j
+
+            data_cache.append([support_set_x, support_set_y, target_x, target_y])
+        return data_cache
+
+    def get_batch(self, dataset_name):
+        """
+        Gets next batch from the dataset with name.
+        :param dataset_name: The name of the dataset (one of "train", "val", "test")
+        :return:
+        """
+        if self.indexes[dataset_name] >= len(self.datasets_cache[dataset_name]):
+            self.indexes[dataset_name] = 0
+            self.datasets_cache[dataset_name] = self.load_data_cache(self.datasets[dataset_name])
+        next_batch = self.datasets_cache[dataset_name][self.indexes[dataset_name]]
+        self.indexes[dataset_name] += 1
+        x_support_set, y_support_set, x_target, y_target = next_batch
+        return x_support_set, y_support_set, x_target, y_target
+
+    def get_train_batch(self):
+
+        """
+        Get next training batch
+        :return: Next training batch
+        """
+        return self.get_batch("train")
+
+    def get_test_batch(self):
+
+        """
+        Get next test batch
+        :return: Next test_batch
+        """
+        return self.get_batch("test")
+
+    def get_val_batch(self):
+
+        """
+        Get next val batch
+        :return: Next val batch
+        """
+        return self.get_batch("val")
@@ -0,0 +1,46 @@
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+## Created by: Albert Berenguel
+## Computer Vision Center (CVC). Universitat Autonoma de Barcelona
+## Email: aberenguel@cvc.uab.es
+## Copyright (c) 2017
+##
+## This source code is licensed under the MIT-style license found in the
+## LICENSE file in the root directory of this source tree
+##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+from datasets import omniglotNShot
+from option import Options
+
+#Dummy test
+import torch
+from torch.autograd import Variable
+import torch.nn as nn
+
+#args = Options().parse()
+#a = omniglotNShot.OmniglotNShotDataset()
+
+from models.BidirectionalLSTM import BidirectionalLSTM
+
+'''
+#Function to make dummy data.
+def datagen(batch_size, seq_length,  vector_dim):
+    return torch.rand(seq_length, batch_size,  vector_dim)
+
+samples = 100000
+batch_size = 32
+sequence_len = 20
+vector_dim = 64
+layer_sizes = [100, 100, 100]
+
+lstm = BidirectionalLSTM(layer_sizes = layer_sizes, batch_size = batch_size, vector_dim = 64).cuda()
+
+for sample in range(samples):
+
+    input = Variable(datagen(batch_size, sequence_len, vector_dim).cuda(), requires_grad = True)
+    hidden, output = lstm(input)
+
+
+b = 0
+'''
+
+