diff --git a/week6/grading.py b/week6/grading.py new file mode 120000 index 0000000..a4917b1 --- /dev/null +++ b/week6/grading.py @@ -0,0 +1 @@ +../grading.py \ No newline at end of file diff --git a/week6/grading_utils.py b/week6/grading_utils.py new file mode 100644 index 0000000..0a470a3 --- /dev/null +++ b/week6/grading_utils.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import numpy as np +import random + + +def test_vocab(vocab, PAD, UNK, START, END): + return [ + len(vocab), + len(np.unique(list(vocab.values()))), + int(all([_ in vocab for _ in [PAD, UNK, START, END]])) + ] + + +def test_captions_indexing(train_captions_indexed, vocab, UNK): + starts = set() + ends = set() + between = set() + unk_count = 0 + for caps in train_captions_indexed: + for cap in caps: + starts.add(cap[0]) + between.update(cap[1:-1]) + ends.add(cap[-1]) + for w in cap: + if w == vocab[UNK]: + unk_count += 1 + return [ + len(starts), + len(ends), + len(between), + len(between | starts | ends), + int(all([isinstance(x, int) for x in (between | starts | ends)])), + unk_count + ] + + +def test_captions_batching(batch_captions_to_matrix): + return (batch_captions_to_matrix([[1, 2, 3], [4, 5]], -1, max_len=None).ravel().tolist() + + batch_captions_to_matrix([[1, 2, 3], [4, 5]], -1, max_len=2).ravel().tolist() + + batch_captions_to_matrix([[1, 2, 3], [4, 5]], -1, max_len=10).ravel().tolist()) + + +def get_feed_dict_for_testing(decoder, IMG_EMBED_SIZE, vocab): + return { + decoder.img_embeds: np.random.random((32, IMG_EMBED_SIZE)), + decoder.sentences: np.random.randint(0, len(vocab), (32, 20)) + } + + +def test_decoder_shapes(decoder, IMG_EMBED_SIZE, vocab, s): + tensors_to_test = [ + decoder.h0, + decoder.word_embeds, + decoder.flat_hidden_states, + decoder.flat_token_logits, + decoder.flat_ground_truth, + decoder.flat_loss_mask, + decoder.loss + ] + all_shapes = [] + for t in tensors_to_test: + _ = s.run(t, feed_dict=get_feed_dict_for_testing(decoder, IMG_EMBED_SIZE, vocab)) + all_shapes.extend(_.shape) + return all_shapes + + +def test_random_decoder_loss(decoder, IMG_EMBED_SIZE, vocab, s): + loss = s.run(decoder.loss, feed_dict=get_feed_dict_for_testing(decoder, IMG_EMBED_SIZE, vocab)) + return loss + + +def test_validation_loss(decoder, s, generate_batch, val_img_embeds, val_captions_indexed): + np.random.seed(300) + random.seed(300) + val_loss = 0 + for _ in range(1000): + val_loss += s.run(decoder.loss, generate_batch(val_img_embeds, + val_captions_indexed, + 32, + 20)) + val_loss /= 1000. + return val_loss diff --git a/week6/images/encoder_decoder.png b/week6/images/encoder_decoder.png new file mode 100644 index 0000000..6aa072a Binary files /dev/null and b/week6/images/encoder_decoder.png differ diff --git a/week6/images/encoder_decoder_explained.png b/week6/images/encoder_decoder_explained.png new file mode 100644 index 0000000..e392afa Binary files /dev/null and b/week6/images/encoder_decoder_explained.png differ diff --git a/week6/images/inceptionv3.png b/week6/images/inceptionv3.png new file mode 100644 index 0000000..64f2694 Binary files /dev/null and b/week6/images/inceptionv3.png differ diff --git a/week6/utils.py b/week6/utils.py new file mode 100644 index 0000000..423beff --- /dev/null +++ b/week6/utils.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +import os +import queue +import threading +import zipfile +import tqdm +import cv2 +import numpy as np +import pickle + + +def image_center_crop(img): + raise NotImplementedError() + + +def decode_image_from_buf(buf): + img = cv2.imdecode(np.asarray(bytearray(buf), dtype=np.uint8), 1) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img + + +def crop_and_preprocess(img, input_shape, preprocess_for_model): + img = image_center_crop(img) # take center crop + img = cv2.resize(img, input_shape) # resize for our model + img = img.astype("float32") # prepare for normalization + img = preprocess_for_model(img) # preprocess for model + return img + + +def apply_model(zip_fn, model, preprocess_for_model, extensions=(".jpg",), input_shape=(224, 224), batch_size=32): + # queue for cropped images + q = queue.Queue(maxsize=batch_size * 10) + + # when read thread put all images in queue + read_thread_completed = threading.Event() + + # time for read thread to die + kill_read_thread = threading.Event() + + def reading_thread(zip_fn): + zf = zipfile.ZipFile(zip_fn) + for fn in tqdm.tqdm_notebook(zf.namelist()): + if kill_read_thread.is_set(): + break + if os.path.splitext(fn)[-1] in extensions: + buf = zf.read(fn) # read raw bytes from zip for fn + img = decode_image_from_buf(buf) # decode raw bytes + img = crop_and_preprocess(img, input_shape, preprocess_for_model) + while True: + try: + q.put((os.path.split(fn)[-1], img), timeout=1) # put in queue + except queue.Full: + if kill_read_thread.is_set(): + break + continue + break + + read_thread_completed.set() # read all images + + # start reading thread + t = threading.Thread(target=reading_thread, args=(zip_fn,)) + t.daemon = True + t.start() + + img_fns = [] + img_embeddings = [] + + batch_imgs = [] + + def process_batch(batch_imgs): + batch_imgs = np.stack(batch_imgs, axis=0) + batch_embeddings = model.predict(batch_imgs) + img_embeddings.append(batch_embeddings) + + try: + while True: + try: + fn, img = q.get(timeout=1) + except queue.Empty: + if read_thread_completed.is_set(): + break + continue + img_fns.append(fn) + batch_imgs.append(img) + if len(batch_imgs) == batch_size: + process_batch(batch_imgs) + batch_imgs = [] + q.task_done() + # process last batch + if len(batch_imgs): + process_batch(batch_imgs) + finally: + kill_read_thread.set() + t.join() + + q.join() + + img_embeddings = np.vstack(img_embeddings) + return img_embeddings, img_fns + + +def save_pickle(obj, fn): + with open(fn, "wb") as f: + pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) + + +def read_pickle(fn): + with open(fn, "rb") as f: + return pickle.load(f) diff --git a/week6/week6_final_project_image_captioning_clean.ipynb b/week6/week6_final_project_image_captioning_clean.ipynb new file mode 100644 index 0000000..b5d0038 --- /dev/null +++ b/week6/week6_final_project_image_captioning_clean.ipynb @@ -0,0 +1,2402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Image Captioning Final Project\n", + "\n", + "In this final project you will define and train an image-to-caption model, that can produce descriptions for real world images!\n", + "\n", + "\n", + "\n", + "Model architecture: CNN encoder and RNN decoder. \n", + "(https://research.googleblog.com/2014/11/a-picture-is-worth-thousand-coherent.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2017-08-27T10:16:46.508273Z", + "start_time": "2017-08-27T10:16:46.506062Z" + } + }, + "source": [ + "# Import stuff" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T12:30:35.584796Z", + "start_time": "2017-09-17T12:30:35.581343Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# use preloaded keras datasets and models\n", + "! mkdir -p ~/.keras/datasets\n", + "! mkdir -p ~/.keras/models\n", + "! ln -s $(realpath ../readonly/keras/datasets/*) ~/.keras/datasets/\n", + "! ln -s $(realpath ../readonly/keras/models/*) ~/.keras/models/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:05.229736Z", + "start_time": "2017-09-17T14:31:56.495874Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import keras\n", + "import numpy as np\n", + "%matplotlib inline\n", + "import matplotlib.pyplot as plt\n", + "import keras, keras.layers as L\n", + "import keras.backend as K\n", + "import tqdm\n", + "import utils\n", + "import time\n", + "import zipfile\n", + "import json\n", + "from collections import defaultdict\n", + "import re\n", + "import random\n", + "from random import choice\n", + "import grading\n", + "import grading_utils" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Fill in your Coursera token and email\n", + "To successfully submit your answers to our grader, please fill in your Coursera submission token and email" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "grader = grading.Grader(assignment_key=\"NEDBg6CgEee8nQ6uE8a7OA\", \n", + " all_parts=[\"19Wpv\", \"uJh73\", \"yiJkt\", \"rbpnH\", \"E2OIL\", \"YJR7z\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# token expires every 30 min\n", + "COURSERA_TOKEN = ### YOUR TOKEN HERE\n", + "COURSERA_EMAIL = ### YOUR EMAIL HERE" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Download data\n", + "\n", + "Takes 10 hours and 20 GB. We've downloaded necessary files for you.\n", + "\n", + "Relevant links (just in case):\n", + "- train images http://msvocds.blob.core.windows.net/coco2014/train2014.zip\n", + "- validation images http://msvocds.blob.core.windows.net/coco2014/val2014.zip\n", + "- captions for both train and validation http://msvocds.blob.core.windows.net/annotations-1-0-3/captions_train-val2014.zip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# link prepared files here\n", + "! ln -s $(realpath ../readonly/week6/*) ." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2017-08-27T10:23:45.863881Z", + "start_time": "2017-08-27T10:23:45.861693Z" + } + }, + "source": [ + "# Extract image features\n", + "\n", + "We will use pre-trained InceptionV3 model for CNN encoder (https://research.googleblog.com/2016/03/train-your-own-image-classifier-with.html) and extract its last hidden layer as an embedding:\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:09.629321Z", + "start_time": "2017-09-17T14:32:09.627108Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "IMG_SIZE = 299" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:09.836606Z", + "start_time": "2017-09-17T14:32:09.831028Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# we take the last hidden layer of IncetionV3 as an image embedding\n", + "def get_cnn_encoder():\n", + " K.set_learning_phase(0)\n", + " model = keras.applications.InceptionV3(include_top=False)\n", + " preprocess_for_model = keras.applications.inception_v3.preprocess_input\n", + "\n", + " model = keras.engine.training.Model(model.inputs, keras.layers.GlobalAveragePooling2D()(model.output))\n", + " return model, preprocess_for_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Features extraction takes too much time on CPU:\n", + "- Takes 16 minutes on GPU.\n", + "- 25x slower (InceptionV3) on CPU and takes 7 hours.\n", + "- 10x slower (MobileNet) on CPU and takes 3 hours.\n", + "\n", + "So we've done it for you with the following code:\n", + "```python\n", + "# load pre-trained model\n", + "K.clear_session()\n", + "encoder, preprocess_for_model = get_cnn_encoder()\n", + "\n", + "# extract train features\n", + "train_img_embeds, train_img_fns = utils.apply_model(\n", + " \"train2014.zip\", encoder, preprocess_for_model, input_shape=(IMG_SIZE, IMG_SIZE))\n", + "utils.save_pickle(train_img_embeds, \"train_img_embeds.pickle\")\n", + "utils.save_pickle(train_img_fns, \"train_img_fns.pickle\")\n", + "\n", + "# extract validation features\n", + "val_img_embeds, val_img_fns = utils.apply_model(\n", + " \"val2014.zip\", encoder, preprocess_for_model, input_shape=(IMG_SIZE, IMG_SIZE))\n", + "utils.save_pickle(val_img_embeds, \"val_img_embeds.pickle\")\n", + "utils.save_pickle(val_img_fns, \"val_img_fns.pickle\")\n", + "\n", + "# sample images for learners\n", + "def sample_zip(fn_in, fn_out, rate=0.01, seed=42):\n", + " np.random.seed(seed)\n", + " with zipfile.ZipFile(fn_in) as fin, zipfile.ZipFile(fn_out, \"w\") as fout:\n", + " sampled = filter(lambda _: np.random.rand() < rate, fin.filelist)\n", + " for zInfo in sampled:\n", + " fout.writestr(zInfo, fin.read(zInfo))\n", + " \n", + "sample_zip(\"train2014.zip\", \"train2014_sample.zip\")\n", + "sample_zip(\"val2014.zip\", \"val2014_sample.zip\")\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:12.621413Z", + "start_time": "2017-09-17T14:32:11.986281Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# load prepared embeddings\n", + "train_img_embeds = utils.read_pickle(\"train_img_embeds.pickle\")\n", + "train_img_fns = utils.read_pickle(\"train_img_fns.pickle\")\n", + "val_img_embeds = utils.read_pickle(\"val_img_embeds.pickle\")\n", + "val_img_fns = utils.read_pickle(\"val_img_fns.pickle\")\n", + "# check shapes\n", + "print(train_img_embeds.shape, len(train_img_fns))\n", + "print(val_img_embeds.shape, len(val_img_fns))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:21.515330Z", + "start_time": "2017-09-17T14:32:21.400879Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# check prepared samples of images\n", + "! ls *_sample.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Extract captions for images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:32:24.897276Z", + "start_time": "2017-09-17T14:32:22.942805Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# extract captions from zip\n", + "def get_captions_for_fns(fns, zip_fn, zip_json_path):\n", + " zf = zipfile.ZipFile(zip_fn)\n", + " j = json.loads(zf.read(zip_json_path).decode(\"utf8\"))\n", + " id_to_fn = {img[\"id\"]: img[\"file_name\"] for img in j[\"images\"]}\n", + " fn_to_caps = defaultdict(list)\n", + " for cap in j['annotations']:\n", + " fn_to_caps[id_to_fn[cap['image_id']]].append(cap['caption'])\n", + " fn_to_caps = dict(fn_to_caps)\n", + " return list(map(lambda x: fn_to_caps[x], fns))\n", + " \n", + "train_captions = get_captions_for_fns(train_img_fns, \"captions_train-val2014.zip\", \n", + " \"annotations/captions_train2014.json\")\n", + "\n", + "val_captions = get_captions_for_fns(val_img_fns, \"captions_train-val2014.zip\", \n", + " \"annotations/captions_val2014.json\")\n", + "\n", + "# check shape\n", + "print(len(train_img_fns), len(train_captions))\n", + "print(len(val_img_fns), len(val_captions))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:42:06.492565Z", + "start_time": "2017-09-17T14:42:06.245458Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# look at training example (each has 5 captions)\n", + "def show_trainig_example(train_img_fns, train_captions, example_idx=0):\n", + " \"\"\"\n", + " You can change example_idx and see different images\n", + " \"\"\"\n", + " zf = zipfile.ZipFile(\"train2014_sample.zip\")\n", + " captions_by_file = dict(zip(train_img_fns, train_captions))\n", + " all_files = set(train_img_fns)\n", + " found_files = list(filter(lambda x: x.filename.rsplit(\"/\")[-1] in all_files, zf.filelist))\n", + " example = found_files[example_idx]\n", + " img = utils.decode_image_from_buf(zf.read(example))\n", + " plt.imshow(utils.image_center_crop(img))\n", + " plt.title(\"\\n\".join(captions_by_file[example.filename.rsplit(\"/\")[-1]]))\n", + " plt.show()\n", + " \n", + "show_trainig_example(train_img_fns, train_captions, example_idx=142)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prepare captions for training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:40.637447Z", + "start_time": "2017-09-17T14:43:40.633717Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# preview captions data\n", + "train_captions[:2]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:40.932131Z", + "start_time": "2017-09-17T14:43:40.891187Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# special tokens\n", + "PAD = \"#PAD#\"\n", + "UNK = \"#UNK#\"\n", + "START = \"#START#\"\n", + "END = \"#END#\"\n", + "\n", + "# split sentence into tokens (split into lowercased words)\n", + "def split_sentence(sentence):\n", + " return list(filter(lambda x: len(x) > 0, re.split('\\W+', sentence.lower())))\n", + "\n", + "def generate_vocabulary(train_captions):\n", + " \"\"\"\n", + " Return {token: index} for all train tokens (words) that occur 5 times or more, \n", + " `index` should be from 0 to N, where N is a number of unique tokens in the resulting dictionary.\n", + " Also, add PAD (for batch padding), UNK (unknown, out of vocabulary), \n", + " START (start of sentence) and END (end of sentence) tokens into the vocabulary.\n", + " \"\"\"\n", + " vocab = ### YOUR CODE HERE ###\n", + " return {token: index for index, token in enumerate(sorted(vocab))}\n", + " \n", + "def caption_tokens_to_indices(captions, vocab):\n", + " \"\"\"\n", + " `captions` argument is an array of arrays:\n", + " [\n", + " [\n", + " \"image1 caption1\",\n", + " \"image1 caption2\",\n", + " ...\n", + " ],\n", + " [\n", + " \"image2 caption1\",\n", + " \"image2 caption2\",\n", + " ...\n", + " ],\n", + " ...\n", + " ]\n", + " Use `split_sentence` function to split sentence into tokens.\n", + " Replace all tokens with vocabulary indices, use UNK for unknown words (out of vocabulary).\n", + " Add START and END tokens to start and end of each sentence respectively.\n", + " For the example above you should produce the following:\n", + " [\n", + " [\n", + " [vocab[START], vocab[\"image1\"], vocab[\"caption1\"], vocab[END]],\n", + " [vocab[START], vocab[\"image1\"], vocab[\"caption2\"], vocab[END]],\n", + " ...\n", + " ],\n", + " ...\n", + " ]\n", + " \"\"\"\n", + " res = ### YOUR CODE HERE ###\n", + " return res" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:44.824532Z", + "start_time": "2017-09-17T14:43:41.264769Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# prepare vocabulary\n", + "vocab = generate_vocabulary(train_captions)\n", + "vocab_inverse = {idx: w for w, idx in vocab.items()}\n", + "print(len(vocab))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:53.206639Z", + "start_time": "2017-09-17T14:43:44.826028Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# replace tokens with indices\n", + "train_captions_indexed = caption_tokens_to_indices(train_captions, vocab)\n", + "val_captions_indexed = caption_tokens_to_indices(val_captions, vocab)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Captions have different length, but we need to batch them, that's why we will add PAD tokens so that all sentences have an euqal length. \n", + "\n", + "We will crunch LSTM through all the tokens, but we will ignore padding tokens during loss calculation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T16:11:52.425546Z", + "start_time": "2017-09-17T16:11:52.414004Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# we will use this during training\n", + "def batch_captions_to_matrix(batch_captions, pad_idx, max_len=None):\n", + " \"\"\"\n", + " `batch_captions` is an array of arrays:\n", + " [\n", + " [vocab[START], ..., vocab[END]],\n", + " [vocab[START], ..., vocab[END]],\n", + " ...\n", + " ]\n", + " Put vocabulary indexed captions into np.array of shape (len(batch_captions), columns),\n", + " where \"columns\" is max(map(len, batch_captions)) when max_len is None\n", + " and \"columns\" = min(max_len, max(map(len, batch_captions))) otherwise.\n", + " Add padding with vocab[PAD] where necessary.\n", + " Input example: [[1, 2, 3], [4, 5]]\n", + " Output example: np.array([[1, 2, 3], [4, 5, vocab[PAD]]]) if max_len=None\n", + " Output example: np.array([[1, 2], [4, 5]]) if max_len=2\n", + " Output example: np.array([[1, 2, 3], [4, 5, vocab[PAD]]]) if max_len=100\n", + " Try to use numpy, we need this function to be fast!\n", + " \"\"\"\n", + " matrix = ###YOUR CODE HERE###\n", + " return matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T16:12:02.051692Z", + "start_time": "2017-09-17T16:12:02.045821Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "## GRADED PART, DO NOT CHANGE!\n", + "# Vocabulary creation\n", + "grader.set_answer(\"19Wpv\", grading_utils.test_vocab(vocab, PAD, UNK, START, END))\n", + "# Captions indexing\n", + "grader.set_answer(\"uJh73\", grading_utils.test_captions_indexing(train_captions_indexed, vocab, UNK))\n", + "# Captions batching\n", + "grader.set_answer(\"yiJkt\", grading_utils.test_captions_batching(batch_captions_to_matrix))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# you can make submission with answers so far to check yourself at this stage\n", + "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2017-08-27T13:34:09.664927Z", + "start_time": "2017-08-27T13:34:09.662597Z" + } + }, + "source": [ + "# Training" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define architecture" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Since our problem is to generate image captions, RNN text generator should be conditioned on image. The idea is to use image features as an initial state for RNN instead of zeros. \n", + "\n", + "Remember that you should transform image feature vector to RNN hidden state size by fully-connected layer and then pass it to RNN.\n", + "\n", + "During training we will feed ground truth tokens into the lstm to get predictions of next tokens. \n", + "\n", + "Notice that we don't need to feed last token (END) as input (http://cs.stanford.edu/people/karpathy/):\n", + "\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T16:33:04.453351Z", + "start_time": "2017-09-17T16:33:04.449675Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "IMG_EMBED_SIZE = train_img_embeds.shape[1]\n", + "IMG_EMBED_BOTTLENECK = 120\n", + "WORD_EMBED_SIZE = 100\n", + "LSTM_UNITS = 300\n", + "LOGIT_BOTTLENECK = 120\n", + "pad_idx = vocab[PAD]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T16:38:46.296544Z", + "start_time": "2017-09-17T16:38:46.290670Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# remember to reset your graph if you want to start building it from scratch!\n", + "tf.reset_default_graph()\n", + "tf.set_random_seed(42)\n", + "s = tf.InteractiveSession()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we define decoder graph.\n", + "\n", + "We use Keras layers where possible because we can use them in functional style with weights reuse like this:\n", + "```python\n", + "dense_layer = L.Dense(42, input_shape=(None, 100) activation='relu')\n", + "a = tf.placeholder('float32', [None, 100])\n", + "b = tf.placeholder('float32', [None, 100])\n", + "dense_layer(a) # that's how we applied dense layer!\n", + "dense_layer(b) # and again\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T16:38:48.300312Z", + "start_time": "2017-09-17T16:38:48.128590Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "class decoder:\n", + " # [batch_size, IMG_EMBED_SIZE] of CNN image features\n", + " img_embeds = tf.placeholder('float32', [None, IMG_EMBED_SIZE])\n", + " # [batch_size, time steps] of word ids\n", + " sentences = tf.placeholder('int32', [None, None])\n", + " \n", + " # we use bottleneck here to reduce the number of parameters\n", + " # image embedding -> bottleneck\n", + " img_embed_to_bottleneck = L.Dense(IMG_EMBED_BOTTLENECK, \n", + " input_shape=(None, IMG_EMBED_SIZE), \n", + " activation='elu')\n", + " # image embedding bottleneck -> lstm initial state\n", + " img_embed_bottleneck_to_h0 = L.Dense(LSTM_UNITS,\n", + " input_shape=(None, IMG_EMBED_BOTTLENECK),\n", + " activation='elu')\n", + " # word -> embedding\n", + " word_embed = L.Embedding(len(vocab), WORD_EMBED_SIZE)\n", + " # lstm cell (from tensorflow)\n", + " lstm = tf.nn.rnn_cell.LSTMCell(LSTM_UNITS)\n", + " \n", + " # we use bottleneck here to reduce model complexity\n", + " # lstm output -> logits bottleneck\n", + " token_logits_bottleneck = L.Dense(LOGIT_BOTTLENECK, activation=\"elu\")\n", + " # logits bottleneck -> logits for next token prediction\n", + " token_logits = L.Dense(len(vocab))\n", + " \n", + " # initial lstm cell state of shape (None, LSTM_UNITS),\n", + " # we need to condition it on `img_embeds` placeholder.\n", + " c0 = h0 = ### YOUR CODE HERE ###\n", + "\n", + " # embed all tokens but the last for lstm input,\n", + " # remember that L.Embedding is callable,\n", + " # use `sentences` placeholder as input.\n", + " word_embeds = ### YOUR CODE HERE ###\n", + " \n", + " # during training we use ground truth tokens `word_embeds` as context for next token prediction.\n", + " # that means that we know all the inputs for our lstm and can get \n", + " # all the hidden states with one tensorflow operation (tf.nn.dynamic_rnn).\n", + " # `hidden_states` has a shape of [batch_size, time steps, LSTM_UNITS].\n", + " hidden_states, _ = tf.nn.dynamic_rnn(lstm, word_embeds,\n", + " initial_state=tf.nn.rnn_cell.LSTMStateTuple(c0, h0))\n", + "\n", + " # now we need to calculate token logits for all the hidden states\n", + " \n", + " # first, we reshape `hidden_states` to [-1, LSTM_UNITS]\n", + " flat_hidden_states = ### YOUR CODE HERE ###\n", + "\n", + " # then, we calculate logits for next tokens using `token_logits` layer\n", + " flat_token_logits = ### YOUR CODE HERE ###\n", + " \n", + " # then, we flatten the ground truth token ids.\n", + " # remember, that we predict next tokens for each time step,\n", + " # use `sentences` placeholder.\n", + " flat_ground_truth = ### YOUR CODE HERE ###\n", + "\n", + " # we need to know where we have real tokens (not padding) in `flat_ground_truth`,\n", + " # we don't want to propagate the loss for padded output tokens,\n", + " # fill `flat_loss_mask` with 1.0 for real tokens (not vocab[PAD]) and 0.0 otherwise.\n", + " flat_loss_mask = ### YOUR CODE HERE ###\n", + "\n", + " # compute cross-entropy between `flat_ground_truth` and `flat_token_logits` predicted by lstm\n", + " xent = tf.nn.sparse_softmax_cross_entropy_with_logits(\n", + " labels=flat_ground_truth, \n", + " logits=flat_token_logits\n", + " )\n", + "\n", + " # compute average `xent` over tokens with nonzero `flat_loss_mask`.\n", + " # we don't want to account misclassification of PAD tokens, because that doesn't make sense,\n", + " # we have PAD tokens for batching purposes only!\n", + " loss = ### YOUR CODE HERE ###" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# define optimizer operation to minimize the loss\n", + "optimizer = tf.train.AdamOptimizer(learning_rate=0.001)\n", + "train_step = optimizer.minimize(decoder.loss)\n", + "\n", + "# will be used to save/load network weights.\n", + "# you need to reset your default graph and define it in the same way to be able to load the saved weights!\n", + "saver = tf.train.Saver()\n", + "\n", + "# intialize all variables\n", + "s.run(tf.global_variables_initializer())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "## GRADED PART, DO NOT CHANGE!\n", + "# Decoder shapes test\n", + "grader.set_answer(\"rbpnH\", grading_utils.test_decoder_shapes(decoder, IMG_EMBED_SIZE, vocab, s))\n", + "# Decoder random loss test\n", + "grader.set_answer(\"E2OIL\", grading_utils.test_random_decoder_loss(decoder, IMG_EMBED_SIZE, vocab, s))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# you can make submission with answers so far to check yourself at this stage\n", + "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training loop\n", + "Evaluate train and validation metrics through training and log them. Ensure that loss decreases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:59.397913Z", + "start_time": "2017-09-17T14:43:58.913391Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "train_captions_indexed = np.array(train_captions_indexed)\n", + "val_captions_indexed = np.array(val_captions_indexed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:43:59.529548Z", + "start_time": "2017-09-17T14:43:59.399567Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# generate batch via random sampling of images and captions for them,\n", + "# we use `max_len` parameter to control the length of the captions (truncating long captions)\n", + "def generate_batch(images_embeddings, indexed_captions, batch_size, max_len=None):\n", + " \"\"\"\n", + " `images_embeddings` is a np.array of shape [number of images, IMG_EMBED_SIZE].\n", + " `indexed_captions` holds 5 vocabulary indexed captions for each image:\n", + " [\n", + " [\n", + " [vocab[START], vocab[\"image1\"], vocab[\"caption1\"], vocab[END]],\n", + " [vocab[START], vocab[\"image1\"], vocab[\"caption2\"], vocab[END]],\n", + " ...\n", + " ],\n", + " ...\n", + " ]\n", + " Generate a random batch of size `batch_size`.\n", + " Take random images and choose one random caption for each image.\n", + " Remember to use `batch_captions_to_matrix` for padding and respect `max_len` parameter.\n", + " Return feed dict {decoder.img_embeds: ..., decoder.sentences: ...}.\n", + " \"\"\"\n", + " batch_image_embeddings = ### YOUR CODE HERE ###\n", + " \n", + " batch_captions_matrix = ### YOUR CODE HERE ###\n", + " \n", + " return {decoder.img_embeds: batch_image_embeddings, \n", + " decoder.sentences: batch_captions_matrix}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:44:00.437338Z", + "start_time": "2017-09-17T14:44:00.434472Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "n_epochs = 12\n", + "n_batches_per_epoch = 1000\n", + "n_validation_batches = 100 # how many batches are used for validation after each epoch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:44:01.497022Z", + "start_time": "2017-09-17T14:44:00.962013Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# you can load trained weights here\n", + "# you can load \"weights_{epoch}\" and continue training\n", + "# uncomment the next line if you need to load weights\n", + "# saver.restore(s, \"weights\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Look at the training and validation loss, they should be decreasing!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T12:42:16.120494Z", + "start_time": "2017-09-17T12:31:03.779162Z" + }, + "collapsed": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "# actual training loop\n", + "MAX_LEN = 20 # truncate long captions to speed up training\n", + "\n", + "# to make training reproducible\n", + "np.random.seed(42)\n", + "random.seed(42)\n", + "\n", + "for epoch in range(n_epochs):\n", + " \n", + " train_loss = 0\n", + " pbar = tqdm.tqdm_notebook(range(n_batches_per_epoch))\n", + " counter = 0\n", + " for _ in pbar:\n", + " train_loss += s.run([decoder.loss, train_step], \n", + " generate_batch(train_img_embeds, \n", + " train_captions_indexed, \n", + " batch_size, \n", + " MAX_LEN))[0]\n", + " counter += 1\n", + " pbar.set_description(\"Training loss: %f\" % (train_loss / counter))\n", + " \n", + " train_loss /= n_batches_per_epoch\n", + " \n", + " val_loss = 0\n", + " for _ in range(n_validation_batches):\n", + " val_loss += s.run(decoder.loss, generate_batch(val_img_embeds,\n", + " val_captions_indexed, \n", + " batch_size, \n", + " MAX_LEN))\n", + " val_loss /= n_validation_batches\n", + " \n", + " print('Epoch: {}, train loss: {}, val loss: {}'.format(epoch, train_loss, val_loss))\n", + "\n", + " # save weights after finishing epoch\n", + " saver.save(s, \"weights_{}\".format(epoch))\n", + " \n", + "print(\"Finished!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "## GRADED PART, DO NOT CHANGE!\n", + "# Validation loss\n", + "grader.set_answer(\"YJR7z\", grading_utils.test_validation_loss(\n", + " decoder, s, generate_batch, val_img_embeds, val_captions_indexed))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "# you can make submission with answers so far to check yourself at this stage\n", + "grader.submit(COURSERA_EMAIL, COURSERA_TOKEN)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T12:42:16.399349Z", + "start_time": "2017-09-17T12:42:16.122158Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# check that it's learnt something, outputs accuracy of next word prediction (should be around 0.5)\n", + "from sklearn.metrics import accuracy_score, log_loss\n", + "\n", + "def decode_sentence(sentence_indices):\n", + " return \" \".join(list(map(vocab_inverse.get, sentence_indices)))\n", + "\n", + "def check_after_training(n_examples):\n", + " fd = generate_batch(train_img_embeds, train_captions_indexed, batch_size)\n", + " logits = decoder.flat_token_logits.eval(fd)\n", + " truth = decoder.flat_ground_truth.eval(fd)\n", + " mask = decoder.flat_loss_mask.eval(fd).astype(bool)\n", + " print(\"Loss:\", decoder.loss.eval(fd))\n", + " print(\"Accuracy:\", accuracy_score(logits.argmax(axis=1)[mask], truth[mask]))\n", + " for example_idx in range(n_examples):\n", + " print(\"Example\", example_idx)\n", + " print(\"Predicted:\", decode_sentence(logits.argmax(axis=1).reshape((batch_size, -1))[example_idx]))\n", + " print(\"Truth:\", decode_sentence(truth.reshape((batch_size, -1))[example_idx]))\n", + " print(\"\")\n", + "\n", + "check_after_training(3)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T12:42:16.535481Z", + "start_time": "2017-09-17T12:42:16.400830Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# save graph weights to file!\n", + "saver.save(s, \"weights\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Applying model\n", + "\n", + "Here we construct a graph for our final model.\n", + "\n", + "It will work as follows:\n", + "- take an image as an input and embed it\n", + "- condition lstm on that embedding\n", + "- predict the next token given a START input token\n", + "- use predicted token as an input at next time step\n", + "- iterate until you predict an END token" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:44:22.546086Z", + "start_time": "2017-09-17T14:44:16.029331Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "class final_model:\n", + " # CNN encoder\n", + " encoder, preprocess_for_model = get_cnn_encoder()\n", + " saver.restore(s, \"weights\") # keras applications corrupt our graph, so we restore trained weights\n", + " \n", + " # containers for current lstm state\n", + " lstm_c = tf.Variable(tf.zeros([1, LSTM_UNITS]), name=\"cell\")\n", + " lstm_h = tf.Variable(tf.zeros([1, LSTM_UNITS]), name=\"hidden\")\n", + "\n", + " # input images\n", + " input_images = tf.placeholder('float32', [None, IMG_SIZE, IMG_SIZE, 3], name='images')\n", + "\n", + " # get image embeddings\n", + " img_embeds = encoder(input_images)\n", + "\n", + " # initialize lstm state conditioned on image\n", + " init_c = init_h = decoder.img_embed_bottleneck_to_h0(decoder.img_embed_to_bottleneck(img_embeds))\n", + " init_lstm = tf.assign(lstm_c, init_c), tf.assign(lstm_h, init_h)\n", + " \n", + " # current word index\n", + " current_word = tf.placeholder('int32', [None], name='current_input')\n", + "\n", + " # embedding for current word\n", + " word_embed = decoder.word_embed(current_word)\n", + "\n", + " # apply lstm cell, get new lstm states\n", + " new_c, new_h = decoder.lstm(word_embed, tf.nn.rnn_cell.LSTMStateTuple(lstm_c, lstm_h))[1]\n", + "\n", + " # compute logits for next token\n", + " new_logits = decoder.token_logits(decoder.token_logits_bottleneck(new_h))\n", + " # compute probabilities for next token\n", + " new_probs = tf.nn.softmax(new_logits)\n", + "\n", + " # `one_step` outputs probabilities of next token and updates lstm hidden state\n", + " one_step = new_probs, tf.assign(lstm_c, new_c), tf.assign(lstm_h, new_h)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T17:27:17.828681Z", + "start_time": "2017-09-17T17:27:17.820029Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# look at how temperature works for probability distributions\n", + "# for high temperature we have more uniform distribution\n", + "_ = np.array([0.5, 0.4, 0.1])\n", + "for t in [0.01, 0.1, 1, 10, 100]:\n", + " print(\" \".join(map(str, _**(1/t) / np.sum(_**(1/t)))), \"with temperature\", t)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T14:44:22.575410Z", + "start_time": "2017-09-17T14:44:22.547785Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# this is an actual prediction loop\n", + "def generate_caption(image, t=1, sample=False, max_len=20):\n", + " \"\"\"\n", + " Generate caption for given image.\n", + " if `sample` is True, we will sample next token from predicted probability distribution.\n", + " `t` is a temperature during that sampling,\n", + " higher `t` causes more uniform-like distribution = more chaos.\n", + " \"\"\"\n", + " # condition lstm on the image\n", + " s.run(final_model.init_lstm, \n", + " {final_model.input_images: [image]})\n", + " \n", + " # current caption\n", + " # start with only START token\n", + " caption = [vocab[START]]\n", + " \n", + " for _ in range(max_len):\n", + " next_word_probs = s.run(final_model.one_step, \n", + " {final_model.current_word: [caption[-1]]})[0]\n", + " next_word_probs = next_word_probs.ravel()\n", + " \n", + " # apply temperature\n", + " next_word_probs = next_word_probs**(1/t) / np.sum(next_word_probs**(1/t))\n", + "\n", + " if sample:\n", + " next_word = np.random.choice(range(len(vocab)), p=next_word_probs)\n", + " else:\n", + " next_word = np.argmax(next_word_probs)\n", + "\n", + " caption.append(next_word)\n", + " if next_word == vocab[END]:\n", + " break\n", + " \n", + " return list(map(vocab_inverse.get, caption))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T17:44:15.525786Z", + "start_time": "2017-09-17T17:44:15.238979Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "# look at validation prediction example\n", + "def apply_model_to_image_raw_bytes(raw):\n", + " img = utils.decode_image_from_buf(raw)\n", + " fig = plt.figure(figsize=(7, 7))\n", + " plt.grid('off')\n", + " plt.axis('off')\n", + " plt.imshow(img)\n", + " img = utils.crop_and_preprocess(img, (IMG_SIZE, IMG_SIZE), final_model.preprocess_for_model)\n", + " print(' '.join(generate_caption(img)[1:-1]))\n", + " plt.show()\n", + "\n", + "def show_valid_example(val_img_fns, example_idx=0):\n", + " zf = zipfile.ZipFile(\"val2014_sample.zip\")\n", + " all_files = set(val_img_fns)\n", + " found_files = list(filter(lambda x: x.filename.rsplit(\"/\")[-1] in all_files, zf.filelist))\n", + " example = found_files[example_idx]\n", + " apply_model_to_image_raw_bytes(zf.read(example))\n", + " \n", + "show_valid_example(val_img_fns, example_idx=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T15:07:47.191185Z", + "start_time": "2017-09-17T15:06:44.121069Z" + }, + "collapsed": true, + "scrolled": true + }, + "outputs": [], + "source": [ + "# sample more images from validation\n", + "for idx in np.random.choice(range(len(zipfile.ZipFile(\"val2014_sample.zip\").filelist) - 1), 10):\n", + " show_valid_example(val_img_fns, example_idx=idx)\n", + " time.sleep(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can download any image from the Internet and appply your model to it!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-17T17:42:56.055265Z", + "start_time": "2017-09-17T17:42:54.242164Z" + }, + "collapsed": true + }, + "outputs": [], + "source": [ + "! wget http://www.bijouxandbits.com/wp-content/uploads/2016/06/portal-cake-10.jpg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "apply_model_to_image_raw_bytes(open(\"portal-cake-10.jpg\", \"rb\").read())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-24T12:34:10.689158Z", + "start_time": "2017-09-24T12:34:10.675938Z" + } + }, + "source": [ + "Now it's time to find 10 examples where your model works good and 10 examples where it fails! \n", + "\n", + "You can use images from validation set as follows:\n", + "```python\n", + "show_valid_example(val_img_fns, example_idx=...)\n", + "```\n", + "\n", + "You can use images from the Internet as follows:\n", + "```python\n", + "! wget ...\n", + "apply_model_to_image_raw_bytes(open(\"...\", \"rb\").read())\n", + "```\n", + "\n", + "If you use these functions, the output will be embedded into your notebook and will be visible during peer review!\n", + "\n", + "When you're done, download your noteboook using \"File\" -> \"Download as\" -> \"Notebook\" and prepare that file for peer review!" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "### YOUR EXAMPLES HERE ###" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "ExecuteTime": { + "end_time": "2017-09-24T12:34:25.055581Z", + "start_time": "2017-09-24T12:34:25.052373Z" + } + }, + "source": [ + "That's it! \n", + "\n", + "Congratulations, you've trained your image captioning model and now can produce captions for any picture from the Internet!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.4.3" + }, + "toc": { + "colors": { + "hover_highlight": "#DAA520", + "navigate_num": "#000000", + "navigate_text": "#333333", + "running_highlight": "#FF0000", + "selected_highlight": "#FFD700", + "sidebar_border": "#EEEEEE", + "wrapper_background": "#FFFFFF" + }, + "moveMenuLeft": true, + "nav_menu": { + "height": "157px", + "width": "252px" + }, + "navigate_menu": true, + "number_sections": true, + "sideBar": true, + "threshold": 4, + "toc_cell": false, + "toc_section_display": "block", + "toc_window_display": false, + "widenNotebook": false + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": { + "05ecf0e1a16b4929902104affe9dc8a5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_acf09a7845774d8f80bfe71285de01c2", + "style": "IPY_MODEL_0e14999bda2f4969a286ae53549f4b46", + "value": "100% 82784/82784 [10:24<00:00, 132.46it/s]" + } + }, + "06128d93067e4801adf53a5ef7f258ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_d6c6b9b0c4d64d0f942c4614d1c86562", + "max": 82784, + "style": "IPY_MODEL_67541ca1dd6d4e318f8699a1c97e52c4", + "value": 3551 + } + }, + "07edd46f77d44e59b435d25dec7fd053": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "082d987bb4d949959237b3f7d77fc068": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_abdd1f8972ea4d07bb3021aef195d718", + "max": 82784, + "style": "IPY_MODEL_e9fd243128f54f168458219cb8a58904", + "value": 2476 + } + }, + "0c18f8065dfc4285b5aa2dbebe56f31d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "0e14999bda2f4969a286ae53549f4b46": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "0f3829d632f1484c926f42793d87a4aa": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_78c9e28c7bfb4181adc862cb1369eddd", + "max": 82784, + "style": "IPY_MODEL_50905a891aa24f769a0971a2ce9a0364", + "value": 4236 + } + }, + "10a9f38803b4487480a0db699084fd6e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "1126e79e712f496b86c67a4acfb2598b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_be7b6389899041d3b6583c3a7a834c81", + "max": 82784, + "style": "IPY_MODEL_ed04668bc4d54f5fa4edcae249f0ecb5", + "value": 954 + } + }, + "11e4a5c477d54f95a71f128459f0d69f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "12e8dbb2338c4632aa61b99e90968946": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "1570eac5f58c4d5f8f6986cffc6823e4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "17ba1fe4116347fab533ece5a413cb96": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_2e786a7c0bf5485383e7649f2337d62e", + "style": "IPY_MODEL_dfd9dc8c00254cb9b670a63099463c1e", + "value": " 3% 2476/82784 [00:19<10:37, 125.97it/s]" + } + }, + "1924c59303504865ac212a0ab2628c03": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "1cb5bdbc09bd495d82c96eea7da32c38": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "1d43f3472e6c4789a0607af9483cca08": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "1f1cf79af7c14662b63de6e552567ce2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "2070606955284d8fb3a95d2bc5658cb9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_79bf1ccf268949eda595e3d27a23ea67", + "style": "IPY_MODEL_ae2c38164c8e46eb8a795714caf938c0", + "value": " 6% 5081/82784 [00:39<09:06, 142.22it/s]" + } + }, + "250e91ec387c41a4af61fb318ce83471": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_2f69b1f966fc4e3b80dcf9b4ba757447", + "IPY_MODEL_2070606955284d8fb3a95d2bc5658cb9" + ], + "layout": "IPY_MODEL_9d8827c874264df485f7c01b446114d9" + } + }, + "25606d5fe062488493309b5c584d76e8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "layout": "IPY_MODEL_6b6ca8fc225646eab4fb2495254341f4", + "max": 82784, + "style": "IPY_MODEL_07edd46f77d44e59b435d25dec7fd053", + "value": 641 + } + }, + "25aac3c02e434ddb8ba48a488d6bba4c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_1cb5bdbc09bd495d82c96eea7da32c38", + "style": "IPY_MODEL_8a079e84369e452089def45cb8611579", + "value": " 4% 3551/82784 [00:29<09:04, 145.53it/s]" + } + }, + "2cb83227ad124b1ea6bb1054aa761f1b": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_0f3829d632f1484c926f42793d87a4aa", + "IPY_MODEL_d80b196d2ed04ac898473d15b4713ff0" + ], + "layout": "IPY_MODEL_1f1cf79af7c14662b63de6e552567ce2" + } + }, + "2e786a7c0bf5485383e7649f2337d62e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "2f69b1f966fc4e3b80dcf9b4ba757447": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_a000e9ff594a41b8b32c12759e98aa62", + "max": 82784, + "style": "IPY_MODEL_82b3b61e8da845fbbb7a52cb97f9ac40", + "value": 5081 + } + }, + "373ecd265e6243beb96875060f06b5f6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "39e2f0f5848e4cf9b4e9ff1c7fb429b4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_e4d703c1616c4f1daab45dee792c04ef", + "style": "IPY_MODEL_bbe135687ff843408d998146496a92df", + "value": " 5% 3812/82784 [00:31<09:37, 136.77it/s]" + } + }, + "3b7568e8d3224b0e93ae040cc3aa0999": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "3bd2728803ef4c2a841857bbbc58996a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_0c18f8065dfc4285b5aa2dbebe56f31d", + "max": 82784, + "style": "IPY_MODEL_e11077a328ef4ce881429f146e9bd32a", + "value": 4125 + } + }, + "3c0dd29fdc564939b7fd5bbde13b710c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "3db58b23e97b4e46a89ebb555a969b9c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "3df38b182a154779b3147866b50154e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "3eb934943e7947b996d2d9d2b5835f8d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_ab1de819696640e7b0852cb5e1f1099d", + "IPY_MODEL_c5f3fc161c094562a9d653c352ede3e6" + ], + "layout": "IPY_MODEL_845add417df541f797835fe0af7bdfc6" + } + }, + "4245d3a98b964dd2b13aad8a878e6c99": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "layout": "IPY_MODEL_66ae82fd1c0d475fac833d4a30f02944", + "max": 82784, + "style": "IPY_MODEL_681d19294bbb4938a72f7b7b85430b02", + "value": 703 + } + }, + "44a670b1861242a793907e043728daa1": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "451e308820b74437828c45da73bd54c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "485a5264618a4666ba7c781a8a2b0872": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "49013664517841a68091a25828104e3f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_8f5f98bee7d14034a51d92237ed80fa4", + "style": "IPY_MODEL_485a5264618a4666ba7c781a8a2b0872", + "value": " 3% 2607/82784 [00:22<09:32, 140.07it/s]" + } + }, + "4a77c0bf7e7d42c184646a9d42c1709f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_f70462b059cf4a1c856de2148365423a", + "max": 82784, + "style": "IPY_MODEL_10a9f38803b4487480a0db699084fd6e", + "value": 1055 + } + }, + "4ac579df79d4407c8def2a502c57893f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "4df189bc7b2e4d92af9c3ff0de376b8f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "4e2fbbd4a122409f8bd844e5299d1b59": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "50905a891aa24f769a0971a2ce9a0364": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "539266076d8c4dfc9069d93bcd375bdb": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "55b44b2a76ae44f6a728b4ec0e7563ee": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_e08b24212f3f4badbf933798a5419112", + "IPY_MODEL_39e2f0f5848e4cf9b4e9ff1c7fb429b4" + ], + "layout": "IPY_MODEL_60c5a79e87734276b890d6b57545e11a" + } + }, + "5b9a3e413bc74c8596107180dda26b36": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_c89df6b6b1e04437a83b3a3569f4e79e", + "style": "IPY_MODEL_9b70f95365e44d1a9fd7158ab79f27e2", + "value": " 9% 7840/82784 [01:01<08:42, 143.38it/s]" + } + }, + "5e0dde0f79154116985e0dc84e266406": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "60c5a79e87734276b890d6b57545e11a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "65fead8ffcbe4d16a33d687124cfcc59": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_9c638cccfbe14fed82c3f88462493288", + "IPY_MODEL_e4ab877ed304441fabd215999af82934" + ], + "layout": "IPY_MODEL_1924c59303504865ac212a0ab2628c03" + } + }, + "66ae82fd1c0d475fac833d4a30f02944": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "67541ca1dd6d4e318f8699a1c97e52c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "681d19294bbb4938a72f7b7b85430b02": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "699f5ea179e5464bbcb550e1e40f81b6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "success", + "layout": "IPY_MODEL_95a2d7b92bba4def89556e33a0f996f5", + "max": 82784, + "style": "IPY_MODEL_1d43f3472e6c4789a0607af9483cca08", + "value": 82784 + } + }, + "6ab2e82383a24fae9372bdde004f2410": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "6b6ca8fc225646eab4fb2495254341f4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "6ca2e3f141eb471f892892d7dc38c0b9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "725e48e005354fe1b058d14f9e678c79": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_ee7d582338814010a521367102497e35", + "style": "IPY_MODEL_f0583a6fed9a4ef59eefe40fd1d3afc2", + "value": " 1% 954/82784 [00:10<09:42, 140.51it/s]" + } + }, + "7306ef30490a4bec8e6b1f5b545489c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_88e44cd0c45846d28dd79ad1b010bd4e", + "style": "IPY_MODEL_3b7568e8d3224b0e93ae040cc3aa0999", + "value": " 1% 1055/82784 [00:08<11:23, 119.59it/s]" + } + }, + "7452003ba95d441ba3f5085337bf35d1": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_f06a12c21e5f4ba5bb33bb5d30a6799a", + "style": "IPY_MODEL_e623f195c7d9481b9f1a60ba869ed585", + "value": " 12% 10198/82784 [01:24<09:02, 133.82it/s]" + } + }, + "749893c223f64081b80f4fa21982d8d9": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "7897c08b417d47068b64cc06eefd4e36": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "78c9e28c7bfb4181adc862cb1369eddd": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "79bf1ccf268949eda595e3d27a23ea67": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "7a2b1d2e50ce41298e25d430358c1ba0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_4a77c0bf7e7d42c184646a9d42c1709f", + "IPY_MODEL_7306ef30490a4bec8e6b1f5b545489c2" + ], + "layout": "IPY_MODEL_7897c08b417d47068b64cc06eefd4e36" + } + }, + "7f23822aa19841e287855fdf6c3a8bb6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_4245d3a98b964dd2b13aad8a878e6c99", + "IPY_MODEL_e85cea75b7764e5f832f42af76539428" + ], + "layout": "IPY_MODEL_fb7a6df512af4f0e96c51b74dc808f7d" + } + }, + "7f92ed9e2e0845c99ee77733d90327cb": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_451e308820b74437828c45da73bd54c2", + "max": 82784, + "style": "IPY_MODEL_3df38b182a154779b3147866b50154e6", + "value": 10198 + } + }, + "82b3b61e8da845fbbb7a52cb97f9ac40": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "845add417df541f797835fe0af7bdfc6": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "87deb091467e481eb21b38866c322ac7": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_06128d93067e4801adf53a5ef7f258ff", + "IPY_MODEL_25aac3c02e434ddb8ba48a488d6bba4c" + ], + "layout": "IPY_MODEL_4e2fbbd4a122409f8bd844e5299d1b59" + } + }, + "88e44cd0c45846d28dd79ad1b010bd4e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "8a079e84369e452089def45cb8611579": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "8a43479ba459450f98adb71fc28dae3d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_3c0dd29fdc564939b7fd5bbde13b710c", + "max": 82784, + "style": "IPY_MODEL_cd9e6a05d05540149a1d5a355770a8ca", + "value": 2607 + } + }, + "8f5f98bee7d14034a51d92237ed80fa4": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "9030f81b9f2e4ae08403784678140aa6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_25606d5fe062488493309b5c584d76e8", + "IPY_MODEL_d97ba3bab8124c04bcb1912d4c4dd6ff" + ], + "layout": "IPY_MODEL_12e8dbb2338c4632aa61b99e90968946" + } + }, + "907e05fcf9a34f1cb061867f7036f5f8": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "949b920dd46249e78c8010ff506b643d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "95a2d7b92bba4def89556e33a0f996f5": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "9b70f95365e44d1a9fd7158ab79f27e2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "9c638cccfbe14fed82c3f88462493288": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "success", + "layout": "IPY_MODEL_373ecd265e6243beb96875060f06b5f6", + "max": 40505, + "style": "IPY_MODEL_dbf2668607e44d8e9c5e883cebf426b8", + "value": 40505 + } + }, + "9d7a10d8f8a44e8cb410c54d4bb15e14": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "layout": "IPY_MODEL_d70da475e2f242c0b0396519ec358663", + "max": 82784, + "style": "IPY_MODEL_1570eac5f58c4d5f8f6986cffc6823e4", + "value": 7840 + } + }, + "9d8827c874264df485f7c01b446114d9": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "9eac0deac16b46b8817312d5cbdf5a54": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_082d987bb4d949959237b3f7d77fc068", + "IPY_MODEL_17ba1fe4116347fab533ece5a413cb96" + ], + "layout": "IPY_MODEL_de3aa9d4f1034e2b903129375d3e88bf" + } + }, + "a000e9ff594a41b8b32c12759e98aa62": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "ab1de819696640e7b0852cb5e1f1099d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "success", + "layout": "IPY_MODEL_907e05fcf9a34f1cb061867f7036f5f8", + "max": 82784, + "style": "IPY_MODEL_5e0dde0f79154116985e0dc84e266406", + "value": 82784 + } + }, + "abdd1f8972ea4d07bb3021aef195d718": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "ac15325494744bfd80981d4b1513716d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_8a43479ba459450f98adb71fc28dae3d", + "IPY_MODEL_49013664517841a68091a25828104e3f" + ], + "layout": "IPY_MODEL_ee895b18841044b6a071177b402bf762" + } + }, + "acf09a7845774d8f80bfe71285de01c2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "ae2c38164c8e46eb8a795714caf938c0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "b74b68a6faa9453489cfb9fedef0ef58": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "bbe135687ff843408d998146496a92df": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "be7b6389899041d3b6583c3a7a834c81": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "bf465d7a3f034195b08f3ce5ffc763b4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_3bd2728803ef4c2a841857bbbc58996a", + "IPY_MODEL_c6df1af3bc704c6bb91aa3e1e63495c3" + ], + "layout": "IPY_MODEL_4df189bc7b2e4d92af9c3ff0de376b8f" + } + }, + "c5f3fc161c094562a9d653c352ede3e6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_4ac579df79d4407c8def2a502c57893f", + "style": "IPY_MODEL_d40c1902086a42f7bd2086b01cd512de", + "value": "100% 82784/82784 [10:32<00:00, 129.27it/s]" + } + }, + "c6df1af3bc704c6bb91aa3e1e63495c3": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_f49b4dfaefa3444fb85bc574fa8ccba2", + "style": "IPY_MODEL_749893c223f64081b80f4fa21982d8d9", + "value": " 5% 4125/82784 [00:31<09:39, 135.64it/s]" + } + }, + "c89df6b6b1e04437a83b3a3569f4e79e": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "cbe4b6dc3b824c22a1905a6a19d3527c": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_7f92ed9e2e0845c99ee77733d90327cb", + "IPY_MODEL_7452003ba95d441ba3f5085337bf35d1" + ], + "layout": "IPY_MODEL_e9ff1ea3e90f488882f33abccf7a9890" + } + }, + "cd9e6a05d05540149a1d5a355770a8ca": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "d2b005e186b84ea2a3265794a176c03e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d40c1902086a42f7bd2086b01cd512de": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d6c6b9b0c4d64d0f942c4614d1c86562": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "d70da475e2f242c0b0396519ec358663": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "d80b196d2ed04ac898473d15b4713ff0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_6ab2e82383a24fae9372bdde004f2410", + "style": "IPY_MODEL_d2b005e186b84ea2a3265794a176c03e", + "value": " 5% 4236/82784 [00:33<09:36, 136.22it/s]" + } + }, + "d93edfafafc74a8b99d1aecdc0fbf305": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "d97ba3bab8124c04bcb1912d4c4dd6ff": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_11e4a5c477d54f95a71f128459f0d69f", + "style": "IPY_MODEL_d93edfafafc74a8b99d1aecdc0fbf305", + "value": " 1% 641/82784 [00:46<1:28:29, 15.47it/s]" + } + }, + "db864b5dc3e94803ba6c94477fd66613": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "dbf2668607e44d8e9c5e883cebf426b8": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "de3aa9d4f1034e2b903129375d3e88bf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "dfd9dc8c00254cb9b670a63099463c1e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e08b24212f3f4badbf933798a5419112": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "IntProgressModel", + "state": { + "bar_style": "danger", + "layout": "IPY_MODEL_e4a333bfa7044b69a37febb00d075501", + "max": 82784, + "style": "IPY_MODEL_f72e3eda095c405d80fff2a8e0847b07", + "value": 3812 + } + }, + "e11077a328ef4ce881429f146e9bd32a": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "e2141fa44d6243cbbb416171c1231706": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_699f5ea179e5464bbcb550e1e40f81b6", + "IPY_MODEL_05ecf0e1a16b4929902104affe9dc8a5" + ], + "layout": "IPY_MODEL_539266076d8c4dfc9069d93bcd375bdb" + } + }, + "e4a333bfa7044b69a37febb00d075501": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "e4ab877ed304441fabd215999af82934": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_44a670b1861242a793907e043728daa1", + "style": "IPY_MODEL_db864b5dc3e94803ba6c94477fd66613", + "value": "100% 40505/40505 [05:07<00:00, 131.73it/s]" + } + }, + "e4d703c1616c4f1daab45dee792c04ef": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "e623f195c7d9481b9f1a60ba869ed585": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "e85cea75b7764e5f832f42af76539428": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HTMLModel", + "state": { + "layout": "IPY_MODEL_b74b68a6faa9453489cfb9fedef0ef58", + "style": "IPY_MODEL_6ca2e3f141eb471f892892d7dc38c0b9", + "value": " 1% 703/82784 [01:40<3:29:20, 6.53it/s]" + } + }, + "e9fd243128f54f168458219cb8a58904": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "e9ff1ea3e90f488882f33abccf7a9890": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "ed04668bc4d54f5fa4edcae249f0ecb5": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "ee7d582338814010a521367102497e35": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "ee895b18841044b6a071177b402bf762": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "f0583a6fed9a4ef59eefe40fd1d3afc2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "DescriptionStyleModel", + "state": { + "description_width": "" + } + }, + "f06a12c21e5f4ba5bb33bb5d30a6799a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "f49b4dfaefa3444fb85bc574fa8ccba2": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "f4a4a4e6135d414c838caa11e97ac8a0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_9d7a10d8f8a44e8cb410c54d4bb15e14", + "IPY_MODEL_5b9a3e413bc74c8596107180dda26b36" + ], + "layout": "IPY_MODEL_3db58b23e97b4e46a89ebb555a969b9c" + } + }, + "f70462b059cf4a1c856de2148365423a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + }, + "f72e3eda095c405d80fff2a8e0847b07": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "ProgressStyleModel", + "state": { + "description_width": "" + } + }, + "f8a0a398931b4f4f9c003d432e6cd5d2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.0.0", + "model_name": "HBoxModel", + "state": { + "children": [ + "IPY_MODEL_1126e79e712f496b86c67a4acfb2598b", + "IPY_MODEL_725e48e005354fe1b058d14f9e678c79" + ], + "layout": "IPY_MODEL_949b920dd46249e78c8010ff506b643d" + } + }, + "fb7a6df512af4f0e96c51b74dc808f7d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.0.0", + "model_name": "LayoutModel", + "state": {} + } + }, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}