{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Copyright 2019 Google Inc. All Rights Reserved.\n",
    "#\n",
    "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
    "# you may not use this file except in compliance with the License.\n",
    "# You may obtain a copy of the License at\n",
    "#\n",
    "#     http://www.apache.org/licenses/LICENSE-2.0\n",
    "#\n",
    "# Unless required by applicable law or agreed to in writing, software\n",
    "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
    "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
    "# See the License for the specific language governing permissions and\n",
    "# limitations under the License."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install Pipeline SDK - This only needs to be ran once in the enviroment. \n",
    "# you can find the latest package @ https://github.com/kubeflow/pipelines/releases\n",
    "#KFP_PACKAGE = 'https://storage.googleapis.com/ml-pipeline/release/0.1.20/kfp.tar.gz'\n",
    "#!pip3 install $KFP_PACKAGE --upgrade"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import kfp\n",
    "import kfp.dsl as dsl\n",
    "from kfp.gcp import use_gcp_secret\n",
    "from kubernetes import client as k8s_client\n",
    "from kfp import compiler\n",
    "from kfp import notebook\n",
    "from kfp import components as comp"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT_NAME='chavoshi-dev-2'\n",
    "DLMODEL_IMAGE = 'tensorflow/tensorflow:1.12.0-py3'\n",
    "#DLMODEL_IMAGE = 'tensorflow/tensorflow:1.12.0-py3-gpu'\n",
    "GCLOUD_SDK = 'google/cloud-sdk:latest'\n",
    "NOOP_IMAGE = 'ubuntu:16.04'\n",
    "EXPERIMENT_NAME = 'Image_classification'\n",
    "LOCAL_PATH = '/mnt/vol'\n",
    "IMAGE_FOLDER = 'small_bolt_images'\n",
    "IMAGE_SOURCE = 'gs://cisco-live-2019-demo/' + IMAGE_FOLDER\n",
    "OUTPUT_DIR = 'gs://chavoshi-dev-mlpipeline/%s' % EXPERIMENT_NAME # Such as gs://bucket/objact/path\n",
    "BASE_IMAGE='gcr.io/%s/pusherbase:latest' % PROJECT_NAME\n",
    "TARGET_IMAGE='gcr.io/%s/pusher:latest' % PROJECT_NAME"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kfp.onprem import mount_pvc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#def copy_source_data_op(step_name='load-data'):\n",
    "#    return dsl.ContainerOp(\n",
    "#        name = step_name,\n",
    "#        image = GCLOUD_SDK,\n",
    "#        command=['sh', '-c'],\n",
    "#        arguments = [  'rm -rf ' +LOCAL_PATH+ '/* ' +   '&& gsutil -m cp -r -n '+IMAGE_SOURCE+' '+LOCAL_PATH +' && rm -rf /mnt/vol/saved_model && rm -rf /mnt/vol/retrain_logs && gcloud auth activate-service-account --key-file /secret/gcp-credentials/user-gcp-sa.json && gsutil -m rm -r gs://test-gtc-demo-2019/retrain_logs/* 2> /dev/null || true']\n",
    "#    ).add_volume(k8s_client.V1Volume(name='workdir', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "#                ).add_volume_mount(k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')).apply(use_gcp_secret('user-gcp-sa'))\n",
    "def copy_source_data_op(step_name='load-data'):\n",
    "    return dsl.ContainerOp(\n",
    "        name = step_name,\n",
    "        image = GCLOUD_SDK,\n",
    "        command=['sh', '-c'],\n",
    "        arguments = [  'rm -rf ' +LOCAL_PATH+ '/* ' +   '&& gsutil -m cp -r -n '+IMAGE_SOURCE+' '+LOCAL_PATH +' && rm -rf /mnt/vol/saved_model && rm -rf /mnt/vol/retrain_logs && gcloud auth activate-service-account --key-file /secret/gcp-credentials/user-gcp-sa.json && gsutil -m rm -r gs://test-gtc-demo-2019/retrain_logs/* 2> /dev/null || true']\n",
    "    ).apply(mount_pvc(pvc_name='pvccc', volume_mount_path=LOCAL_PATH)).apply(use_gcp_secret('user-gcp-sa'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "gs://chavoshi-dev-mlpipeline/Image_classification gcr.io/chavoshi-dev-2/pusherbase:latest\n"
     ]
    }
   ],
   "source": [
    "print(OUTPUT_DIR, BASE_IMAGE)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2019-05-14 12:24:47:INFO:Checking path: gs://chavoshi-dev-mlpipeline/Image_classification...\n",
      "2019-05-14 12:24:47:INFO:Generate build files.\n",
      "2019-05-14 12:24:47:INFO:Start a kaniko job for build.\n",
      "2019-05-14 12:24:47:INFO:Cannot Find local kubernetes config. Trying in-cluster config.\n",
      "2019-05-14 12:24:47:INFO:Initialized with in-cluster config.\n",
      "2019-05-14 12:24:52:INFO:5 seconds: waiting for job to complete\n",
      "2019-05-14 12:24:57:INFO:10 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:02:INFO:15 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:07:INFO:20 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:12:INFO:25 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:17:INFO:30 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:22:INFO:35 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:27:INFO:40 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:32:INFO:45 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:37:INFO:50 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:42:INFO:55 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:47:INFO:60 seconds: waiting for job to complete\n",
      "2019-05-14 12:25:47:INFO:Kaniko job complete.\n",
      "2019-05-14 12:25:48:INFO:Build image complete.\n"
     ]
    }
   ],
   "source": [
    "%%docker {BASE_IMAGE} {OUTPUT_DIR}\n",
    "FROM tensorflow/tensorflow:1.12.0-py3\n",
    "RUN pip3 install tensorflow_hub &&\\\n",
    "    curl -O https://raw.githubusercontent.com/tensorflow/hub/master/examples/image_retraining/retrain.py\n",
    "ENTRYPOINT [\"python\", \"retrain.py\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/mnt/vol small_bolt_images\n"
     ]
    }
   ],
   "source": [
    "print(LOCAL_PATH, IMAGE_FOLDER)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this first step is to create buttlenecks, by setting training steps to zero \n",
    "# this is done separately as in the following steps we may choose to run multiple \n",
    "# steps with various hyper parameters. \n",
    "\n",
    "def pre_process_op(step_name='preprocess-data'):\n",
    "    return dsl.ContainerOp(\n",
    "        name = step_name,\n",
    "        image = BASE_IMAGE,\n",
    "        arguments = [\n",
    "            '--image_dir', LOCAL_PATH+'/'+IMAGE_FOLDER,\n",
    "            '--output_labels', LOCAL_PATH+'/output_labels.txt',\n",
    "            '--summaries_dir', LOCAL_PATH+'/retrain_logs',\n",
    "            '--how_many_training_steps', 0,\n",
    "            '--learning_rate', 0.01,\n",
    "            '--bottleneck_dir', LOCAL_PATH+'/bottleneck',\n",
    "            '--tfhub_module', 'https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/2',\n",
    "            #'--saved_model_dir', LOCAL_PATH+'/saved_model',\n",
    "        ]\n",
    "    ).add_volume(k8s_client.V1Volume(name='workdir', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "                ).add_volume_mount(k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')\n",
    "                                  ).apply(use_gcp_secret('user-gcp-sa')\n",
    "                                         #).set_gpu_limit('1')\n",
    "                                         ).set_cpu_request('2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# multiple instace of training can run in paralel with various hyper parameters ex learning rate \n",
    "# however the same tfhub module should be used as in buttle neck creation step\n",
    "def train_op(step_name='train'):\n",
    "    return dsl.ContainerOp(\n",
    "        name = step_name,\n",
    "        image = BASE_IMAGE,\n",
    "        arguments = [\n",
    "            '--image_dir', LOCAL_PATH+'/'+IMAGE_FOLDER,\n",
    "            '--output_labels', LOCAL_PATH+'/output_labels.txt',\n",
    "            '--summaries_dir', LOCAL_PATH+'/retrain_logs',\n",
    "            '--how_many_training_steps', 10,\n",
    "            '--learning_rate', 0.01,\n",
    "            '--bottleneck_dir', LOCAL_PATH+'/bottleneck',\n",
    "            '--tfhub_module', 'https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/2',\n",
    "            '--saved_model_dir', LOCAL_PATH+'/saved_model',\n",
    "                        #'--saved_model_dir', OUTPUT_DIR+'/BOLT/saved_model',\n",
    "\n",
    "        ]\n",
    "    ).add_volume(k8s_client.V1Volume(name='workdir', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "                ).add_volume_mount(k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')\n",
    "                                  ).apply(use_gcp_secret('user-gcp-sa')\n",
    "                                         ).set_gpu_limit('1')\n",
    "                                         #).set_cpu_request('2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tensorboard_op2(step_name='tensorboard'):\n",
    "    return dsl.ContainerOp(\n",
    "        name = step_name,\n",
    "        image = GCLOUD_SDK,\n",
    "        command=['sh', '-c'],\n",
    "        arguments = ['''echo '{\"outputs\": [{\"source\": \"gs://test-gtc-demo-2019/retrain_logs\", \n",
    "        \"type\": \"tensorboard\"}]}'>/mlpipeline-ui-metadata.json && gcloud auth activate-service-account --key-file '/secret/gcp-credentials/user-gcp-sa.json' && gsutil -m cp -R mnt/vol/retrain_logs gs://test-gtc-demo-2019 ''']\n",
    "    ).add_volume(\n",
    "        k8s_client.V1Volume(\n",
    "            name='workdir', \n",
    "            persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "                ).add_volume_mount(\n",
    "        k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')\n",
    "    ).apply(use_gcp_secret('user-gcp-sa'))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def tensorboard_func():\n",
    "    from tensorflow.python.lib.io import file_io\n",
    "    import json\n",
    "    \n",
    "    # Exports a sample tensorboard:\n",
    "    metadata = {\n",
    "        'outputs' : [{\n",
    "            'type': 'tensorboard',\n",
    "            'source': 'gs://test-gtc-demo-2019/retrain_logs',\n",
    "        }]\n",
    "    }\n",
    "    \n",
    "    with file_io.FileIO('/mlpipeline-ui-metadata.json', 'w') as f:\n",
    "      json.dump(metadata, f)  \n",
    "\n",
    "    import os\n",
    "    \n",
    "    #TODO: copy training files\n",
    "            \n",
    "tensorboard_op = comp.func_to_container_op(tensorboard_func, base_image='tensorflow/tensorflow:1.12.0-py3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this step simply changes the permissions on the drive to make accessible jupyter hub and other locations\n",
    "def tflite_transform_op():\n",
    "    import tensorflow as tf\n",
    "\n",
    "    converter = tf.lite.TFLiteConverter.from_saved_model('mnt/vol/retrain')\n",
    "    tflite_model = converter.convert()\n",
    "    open(\"converted_model.tflite\", \"wb\").write(tflite_model)\n",
    "    return                    \n",
    "                    \n",
    "tensorboard_op = comp.func_to_container_op(tensorboard_func, base_image='tensorflow/tensorflow:1.12.0-py3')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# this step simply changes the permissions on the drive to make accessible jupyter hub and other locations\n",
    "def publish_op(step_name='publish content'):\n",
    "    return dsl.ContainerOp(\n",
    "        name = step_name,\n",
    "        image = GCLOUD_SDK,\n",
    "        command=['sh', '-c'],\n",
    "        arguments = ['chmod -R 0777 /mnt/vol/ ']\n",
    "    ).add_volume(k8s_client.V1Volume(name='workdir', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "                ).add_volume_mount(k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')).apply(use_gcp_secret('user-gcp-sa'))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "@dsl.pipeline(\n",
    "  name='TFHub Image Classifier',\n",
    "  description='Users TFHub based models such as Mobilenetv2 and NasNet to train an image classifer.'\n",
    ")\n",
    "def tfhub_image_classifier_dag(\n",
    "    model_version: dsl.PipelineParam = dsl.PipelineParam(name='model-version', value='1'),\n",
    "):\n",
    "    #copy source data\n",
    "    copy_source_data = copy_source_data_op()\n",
    "    \n",
    " \n",
    "    pre_process_data = pre_process_op()\n",
    " \n",
    "    pre_process_data.after(copy_source_data)\n",
    "    \n",
    "    \n",
    "    train = train_op()\n",
    "    \n",
    "    train.after(pre_process_data)  \n",
    "    tensorboard = tensorboard_op2()\n",
    "    tensorboard.after(train)\n",
    "    \n",
    "    tflite = tflite_transform_op().add_volume(k8s_client.V1Volume(name='workdir', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(claim_name='nfs'))\n",
    "                ).add_volume_mount(k8s_client.V1VolumeMount(mount_path=LOCAL_PATH, name='workdir')\n",
    "\n",
    "    tflite.after(train)\n",
    "    \n",
    "    \n",
    "    publish = publish_op()\n",
    "    publish.after(train)\n",
    "    \n",
    "    \n",
    "    #deploy = deploy_op()\n",
    "    #deploy.after(publish)\n",
    "    \n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "client = kfp.Client()\n",
    "exp = client.list_experiments().experiments[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "from kfp import compiler\n",
    "compiler.Compiler().compile(tfhub_image_classifier_dag,  'tfhub_image_classifier_dag.tar.gz')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "Run link <a href=\"/pipeline/#/runs/details/b9b5cd37-4c18-11e9-8554-42010a8a01f3\" target=\"_blank\" >here</a>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "run = client.run_pipeline(exp.id, 'TF Hub Image Classifier', 'tfhub_image_classifier_dag.tar.gz',\n",
    "    params={})"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}