{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU",
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "0c30b2e5a51c4033b1c0379fff41e204": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ada3eee6be3f44a0b610e1f0f97955e5",
              "IPY_MODEL_48f78eba409940ba84256f50690fbc84",
              "IPY_MODEL_b0cb5c4d5f684db38ac10b79b0c39c87"
            ],
            "layout": "IPY_MODEL_cd606d4eff7443f6bf62f664cf3b6285"
          }
        },
        "ada3eee6be3f44a0b610e1f0f97955e5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_6844ccfa43614970b11ac72ac8dd7fec",
            "placeholder": "​",
            "style": "IPY_MODEL_68bc51539005432b8cf15d27cc104cdd",
            "value": "llama-2-13b-chat.ggmlv3.q5_1.bin: 100%"
          }
        },
        "48f78eba409940ba84256f50690fbc84": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1fbd88a2690847cd925557bab62ef84a",
            "max": 9763701888,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_28c529d0c99c436a9b91ae7c7457d783",
            "value": 9763701888
          }
        },
        "b0cb5c4d5f684db38ac10b79b0c39c87": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b488ca54843e4f6c9101b26427801b4f",
            "placeholder": "​",
            "style": "IPY_MODEL_9be5081d007d4efc8f83a4621f91c353",
            "value": " 9.76G/9.76G [01:33<00:00, 101MB/s]"
          }
        },
        "cd606d4eff7443f6bf62f664cf3b6285": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6844ccfa43614970b11ac72ac8dd7fec": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "68bc51539005432b8cf15d27cc104cdd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "1fbd88a2690847cd925557bab62ef84a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "28c529d0c99c436a9b91ae7c7457d783": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b488ca54843e4f6c9101b26427801b4f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "9be5081d007d4efc8f83a4621f91c353": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# **Llama 2**\n",
        "\n",
        "The Llama 2 is a collection of pretrained and fine-tuned generative text models, ranging from 7 billion to 70 billion parameters, designed for dialogue use cases.\n",
        "\n",
        "It outperforms open-source chat models on most benchmarks and is on par with popular closed-source models in human evaluations for helpfulness and safety.\n",
        "\n",
        "[Llama 2 13B-chat](https://huggingface.co/meta-llama/Llama-2-13b-chat)\n",
        "\n",
        "llama.cpp's objective is to run the LLaMA model with 4-bit integer quantization on MacBook. It is a plain C/C++ implementation optimized for Apple silicon and x86 architectures, supporting various integer quantization and BLAS libraries. Originally a web chat example, it now serves as a development playground for ggml library features.\n",
        "\n",
        "GGML, a C library for machine learning, facilitates the distribution of large language models (LLMs). It utilizes quantization to enable efficient LLM execution on consumer hardware. GGML files contain binary-encoded data, including version number, hyperparameters, vocabulary, and weights. The vocabulary comprises tokens for language generation, while the weights determine the LLM's size. Quantization reduces precision to optimize resource usage.\n",
        "\n",
        "Quantized Models from the Hugging Face Community\n",
        "The Hugging Face community provides quantized models, which allow us to efficiently and effectively utilize the model on the T4 GPU. It is important to consult reliable sources before using any model.\n",
        "\n",
        "There are several variations available, but the ones that interest us are based on the GGLM library.\n",
        "\n",
        "We can see the different variations that Llama-2-13B-GGML has [here](https://huggingface.co/models?search=llama%202%20ggml).\n",
        "\n",
        "In this case, we will use the model called [Llama-2-13B-chat-GGML](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML).\n",
        "\n",
        "## **Step 1: Install All the Required Packages**"
      ],
      "metadata": {
        "id": "HtAhyvBbnHbv"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# GPU llama-cpp-python\n",
        "!CMAKE_ARGS=\"-DLLAMA_CUBLAS=on\" FORCE_CMAKE=1 pip install llama-cpp-python==0.1.78 numpy==1.23.4 --force-reinstall --upgrade --no-cache-dir --verbose\n",
        "!pip install huggingface_hub -q\n",
        "!pip install llama-cpp-python==0.1.78\n",
        "!pip install numpy==1.23.4"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "NcrcOo_Ic8cr",
        "outputId": "2952db1b-e04b-42ab-84f0-2bdc32b7588b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)\n",
            "Collecting llama-cpp-python==0.1.78\n",
            "  Downloading llama_cpp_python-0.1.78.tar.gz (1.7 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.7/1.7 MB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Running command pip subprocess to install build dependencies\n",
            "  Using pip 23.1.2 from /usr/local/lib/python3.10/dist-packages/pip (python 3.10)\n",
            "  Collecting setuptools>=42\n",
            "    Downloading setuptools-69.2.0-py3-none-any.whl (821 kB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 821.5/821.5 kB 5.2 MB/s eta 0:00:00\n",
            "  Collecting scikit-build>=0.13\n",
            "    Downloading scikit_build-0.17.6-py3-none-any.whl (84 kB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 84.3/84.3 kB 6.2 MB/s eta 0:00:00\n",
            "  Collecting cmake>=3.18\n",
            "    Downloading cmake-3.28.3-py2.py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (26.3 MB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 26.3/26.3 MB 34.5 MB/s eta 0:00:00\n",
            "  Collecting ninja\n",
            "    Downloading ninja-1.11.1.1-py2.py3-none-manylinux1_x86_64.manylinux_2_5_x86_64.whl (307 kB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 307.2/307.2 kB 18.0 MB/s eta 0:00:00\n",
            "  Collecting distro (from scikit-build>=0.13)\n",
            "    Downloading distro-1.9.0-py3-none-any.whl (20 kB)\n",
            "  Collecting packaging (from scikit-build>=0.13)\n",
            "    Downloading packaging-24.0-py3-none-any.whl (53 kB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 53.5/53.5 kB 4.7 MB/s eta 0:00:00\n",
            "  Collecting tomli (from scikit-build>=0.13)\n",
            "    Downloading tomli-2.0.1-py3-none-any.whl (12 kB)\n",
            "  Collecting wheel>=0.32.0 (from scikit-build>=0.13)\n",
            "    Downloading wheel-0.43.0-py3-none-any.whl (65 kB)\n",
            "       ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 65.8/65.8 kB 6.2 MB/s eta 0:00:00\n",
            "  Installing collected packages: ninja, cmake, wheel, tomli, setuptools, packaging, distro, scikit-build\n",
            "    Creating /tmp/pip-build-env-s1td0zka/overlay/local/bin\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/ninja to 755\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/cmake to 755\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/cpack to 755\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/ctest to 755\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/wheel to 755\n",
            "    changing mode of /tmp/pip-build-env-s1td0zka/overlay/local/bin/distro to 755\n",
            "  ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "  ipython 7.34.0 requires jedi>=0.16, which is not installed.\n",
            "  Successfully installed cmake-3.28.3 distro-1.9.0 ninja-1.11.1.1 packaging-24.0 scikit-build-0.17.6 setuptools-69.2.0 tomli-2.0.1 wheel-0.43.0\n",
            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Running command Getting requirements to build wheel\n",
            "  running egg_info\n",
            "  writing llama_cpp_python.egg-info/PKG-INFO\n",
            "  writing dependency_links to llama_cpp_python.egg-info/dependency_links.txt\n",
            "  writing requirements to llama_cpp_python.egg-info/requires.txt\n",
            "  writing top-level names to llama_cpp_python.egg-info/top_level.txt\n",
            "  reading manifest file 'llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  adding license file 'LICENSE.md'\n",
            "  writing manifest file 'llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Running command Preparing metadata (pyproject.toml)\n",
            "  running dist_info\n",
            "  creating /tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info\n",
            "  writing /tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/PKG-INFO\n",
            "  writing dependency_links to /tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/dependency_links.txt\n",
            "  writing requirements to /tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/requires.txt\n",
            "  writing top-level names to /tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/top_level.txt\n",
            "  writing manifest file '/tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  reading manifest file '/tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  adding license file 'LICENSE.md'\n",
            "  writing manifest file '/tmp/pip-modern-metadata-s31zeewq/llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  creating '/tmp/pip-modern-metadata-s31zeewq/llama_cpp_python-0.1.78.dist-info'\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Collecting numpy==1.23.4\n",
            "  Downloading numpy-1.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m64.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hCollecting typing-extensions>=4.5.0 (from llama-cpp-python==0.1.78)\n",
            "  Downloading typing_extensions-4.10.0-py3-none-any.whl (33 kB)\n",
            "Collecting diskcache>=5.6.1 (from llama-cpp-python==0.1.78)\n",
            "  Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m152.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: llama-cpp-python\n",
            "  Running command Building wheel for llama-cpp-python (pyproject.toml)\n",
            "\n",
            "\n",
            "  --------------------------------------------------------------------------------\n",
            "  -- Trying 'Ninja' generator\n",
            "  --------------------------------\n",
            "  ---------------------------\n",
            "  ----------------------\n",
            "  -----------------\n",
            "  ------------\n",
            "  -------\n",
            "  --\n",
            "  CMake Deprecation Warning at CMakeLists.txt:1 (cmake_minimum_required):\n",
            "    Compatibility with CMake < 3.5 will be removed from a future version of\n",
            "    CMake.\n",
            "\n",
            "    Update the VERSION argument <min> value or use a ...<max> suffix to tell\n",
            "    CMake that the project does not need compatibility with older versions.\n",
            "\n",
            "  Not searching for unused variables given on the command line.\n",
            "\n",
            "  -- The C compiler identification is GNU 11.4.0\n",
            "  -- Detecting C compiler ABI info\n",
            "  -- Detecting C compiler ABI info - done\n",
            "  -- Check for working C compiler: /usr/bin/cc - skipped\n",
            "  -- Detecting C compile features\n",
            "  -- Detecting C compile features - done\n",
            "  -- The CXX compiler identification is GNU 11.4.0\n",
            "  -- Detecting CXX compiler ABI info\n",
            "  -- Detecting CXX compiler ABI info - done\n",
            "  -- Check for working CXX compiler: /usr/bin/c++ - skipped\n",
            "  -- Detecting CXX compile features\n",
            "  -- Detecting CXX compile features - done\n",
            "  -- Configuring done (1.1s)\n",
            "  -- Generating done (0.0s)\n",
            "  -- Build files have been written to: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_cmake_test_compile/build\n",
            "  --\n",
            "  -------\n",
            "  ------------\n",
            "  -----------------\n",
            "  ----------------------\n",
            "  ---------------------------\n",
            "  --------------------------------\n",
            "  -- Trying 'Ninja' generator - success\n",
            "  --------------------------------------------------------------------------------\n",
            "\n",
            "  Configuring Project\n",
            "    Working directory:\n",
            "      /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-build\n",
            "    Command:\n",
            "      /tmp/pip-build-env-s1td0zka/overlay/local/lib/python3.10/dist-packages/cmake/data/bin/cmake /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649 -G Ninja -DCMAKE_MAKE_PROGRAM:FILEPATH=/tmp/pip-build-env-s1td0zka/overlay/local/lib/python3.10/dist-packages/ninja/data/bin/ninja --no-warn-unused-cli -DCMAKE_INSTALL_PREFIX:PATH=/tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install -DPYTHON_VERSION_STRING:STRING=3.10.12 -DSKBUILD:INTERNAL=TRUE -DCMAKE_MODULE_PATH:PATH=/tmp/pip-build-env-s1td0zka/overlay/local/lib/python3.10/dist-packages/skbuild/resources/cmake -DPYTHON_EXECUTABLE:PATH=/usr/bin/python3 -DPYTHON_INCLUDE_DIR:PATH=/usr/include/python3.10 -DPYTHON_LIBRARY:PATH=/usr/lib/x86_64-linux-gnu/libpython3.10.so -DPython_EXECUTABLE:PATH=/usr/bin/python3 -DPython_ROOT_DIR:PATH=/usr -DPython_FIND_REGISTRY:STRING=NEVER -DPython_INCLUDE_DIR:PATH=/usr/include/python3.10 -DPython3_EXECUTABLE:PATH=/usr/bin/python3 -DPython3_ROOT_DIR:PATH=/usr -DPython3_FIND_REGISTRY:STRING=NEVER -DPython3_INCLUDE_DIR:PATH=/usr/include/python3.10 -DCMAKE_MAKE_PROGRAM:FILEPATH=/tmp/pip-build-env-s1td0zka/overlay/local/lib/python3.10/dist-packages/ninja/data/bin/ninja -DLLAMA_CUBLAS=on -DCMAKE_BUILD_TYPE:STRING=Release -DLLAMA_CUBLAS=on\n",
            "\n",
            "  Not searching for unused variables given on the command line.\n",
            "  -- The C compiler identification is GNU 11.4.0\n",
            "  -- The CXX compiler identification is GNU 11.4.0\n",
            "  -- Detecting C compiler ABI info\n",
            "  -- Detecting C compiler ABI info - done\n",
            "  -- Check for working C compiler: /usr/bin/cc - skipped\n",
            "  -- Detecting C compile features\n",
            "  -- Detecting C compile features - done\n",
            "  -- Detecting CXX compiler ABI info\n",
            "  -- Detecting CXX compiler ABI info - done\n",
            "  -- Check for working CXX compiler: /usr/bin/c++ - skipped\n",
            "  -- Detecting CXX compile features\n",
            "  -- Detecting CXX compile features - done\n",
            "  -- Found Git: /usr/bin/git (found version \"2.34.1\")\n",
            "  fatal: not a git repository (or any of the parent directories): .git\n",
            "  fatal: not a git repository (or any of the parent directories): .git\n",
            "  CMake Warning at vendor/llama.cpp/CMakeLists.txt:117 (message):\n",
            "    Git repository not found; to enable automatic generation of build info,\n",
            "    make sure Git is installed and the project is a Git repository.\n",
            "\n",
            "\n",
            "  -- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n",
            "  -- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Success\n",
            "  -- Found Threads: TRUE\n",
            "  -- Found CUDAToolkit: /usr/local/cuda/targets/x86_64-linux/include (found version \"12.2.140\")\n",
            "  -- cuBLAS found\n",
            "  -- The CUDA compiler identification is NVIDIA 12.2.140\n",
            "  -- Detecting CUDA compiler ABI info\n",
            "  -- Detecting CUDA compiler ABI info - done\n",
            "  -- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped\n",
            "  -- Detecting CUDA compile features\n",
            "  -- Detecting CUDA compile features - done\n",
            "  -- Using CUDA architectures: 52;61;70\n",
            "  -- CMAKE_SYSTEM_PROCESSOR: x86_64\n",
            "  -- x86 detected\n",
            "  -- Configuring done (4.2s)\n",
            "  -- Generating done (0.0s)\n",
            "  -- Build files have been written to: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-build\n",
            "  [1/9] Building C object vendor/llama.cpp/CMakeFiles/ggml.dir/ggml-alloc.c.o\n",
            "  [2/9] Building C object vendor/llama.cpp/CMakeFiles/ggml.dir/k_quants.c.o\n",
            "  [3/9] Building C object vendor/llama.cpp/CMakeFiles/ggml.dir/ggml.c.o\n",
            "  [4/9] Building CXX object vendor/llama.cpp/CMakeFiles/llama.dir/llama.cpp.o\n",
            "  [5/9] Building CUDA object vendor/llama.cpp/CMakeFiles/ggml.dir/ggml-cuda.cu.o\n",
            "  [6/9] Linking CUDA shared library vendor/llama.cpp/libggml_shared.so\n",
            "  [7/9] Linking CXX shared library vendor/llama.cpp/libllama.so\n",
            "  [8/9] Linking CUDA static library vendor/llama.cpp/libggml_static.a\n",
            "  [8/9] Install the project...\n",
            "  -- Install configuration: \"Release\"\n",
            "  -- Installing: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/lib/libggml_shared.so\n",
            "  -- Installing: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/lib/libllama.so\n",
            "  -- Set non-toolchain portion of runtime path of \"/tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/lib/libllama.so\" to \"\"\n",
            "  -- Installing: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/bin/convert.py\n",
            "  -- Installing: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/bin/convert-lora-to-ggml.py\n",
            "  -- Installing: /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/libllama.so\n",
            "  -- Set non-toolchain portion of runtime path of \"/tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/_skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/libllama.so\" to \"\"\n",
            "\n",
            "  copying llama_cpp/llama_cpp.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_cpp.py\n",
            "  copying llama_cpp/__init__.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/__init__.py\n",
            "  copying llama_cpp/utils.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/utils.py\n",
            "  copying llama_cpp/llama_types.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_types.py\n",
            "  copying llama_cpp/llama.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama.py\n",
            "  copying llama_cpp/llama_grammar.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_grammar.py\n",
            "  creating directory _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server\n",
            "  copying llama_cpp/server/__main__.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__main__.py\n",
            "  copying llama_cpp/server/__init__.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__init__.py\n",
            "  copying llama_cpp/server/app.py -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/app.py\n",
            "  copying /tmp/pip-install-9p3bawjo/llama-cpp-python_6dd0a7945b94409293294c53902ae649/llama_cpp/py.typed -> _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/py.typed\n",
            "\n",
            "  running bdist_wheel\n",
            "  running build\n",
            "  running build_py\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_cpp.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/utils.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_types.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_grammar.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__main__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/app.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/py.typed -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/libllama.so -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_cpp.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/utils.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_types.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/llama_grammar.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__main__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/llama_cpp/server/app.py -> _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server\n",
            "  copied 9 files\n",
            "  running build_ext\n",
            "  installing to _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel\n",
            "  running install\n",
            "  running install_lib\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/llama_cpp.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server/__main__.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/server/app.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp/server\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/__init__.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/utils.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/llama_types.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/llama.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/py.typed -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/libllama.so -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copying _skbuild/linux-x86_64-3.10/setuptools/lib.linux-x86_64-cpython-310/llama_cpp/llama_grammar.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp\n",
            "  copied 11 files\n",
            "  running install_data\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/lib\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/lib/libllama.so -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/lib\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/lib/libggml_shared.so -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/lib\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/bin\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/bin/convert.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/bin\n",
            "  copying _skbuild/linux-x86_64-3.10/cmake-install/bin/convert-lora-to-ggml.py -> _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.data/data/bin\n",
            "  running install_egg_info\n",
            "  running egg_info\n",
            "  writing llama_cpp_python.egg-info/PKG-INFO\n",
            "  writing dependency_links to llama_cpp_python.egg-info/dependency_links.txt\n",
            "  writing requirements to llama_cpp_python.egg-info/requires.txt\n",
            "  writing top-level names to llama_cpp_python.egg-info/top_level.txt\n",
            "  reading manifest file 'llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  adding license file 'LICENSE.md'\n",
            "  writing manifest file 'llama_cpp_python.egg-info/SOURCES.txt'\n",
            "  Copying llama_cpp_python.egg-info to _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78-py3.10.egg-info\n",
            "  running install_scripts\n",
            "  copied 0 files\n",
            "  creating _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel/llama_cpp_python-0.1.78.dist-info/WHEEL\n",
            "  creating '/tmp/pip-wheel-2x_lgu13/.tmp-e81j8hj_/llama_cpp_python-0.1.78-cp310-cp310-linux_x86_64.whl' and adding '_skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel' to it\n",
            "  adding 'llama_cpp/__init__.py'\n",
            "  adding 'llama_cpp/libllama.so'\n",
            "  adding 'llama_cpp/llama.py'\n",
            "  adding 'llama_cpp/llama_cpp.py'\n",
            "  adding 'llama_cpp/llama_grammar.py'\n",
            "  adding 'llama_cpp/llama_types.py'\n",
            "  adding 'llama_cpp/py.typed'\n",
            "  adding 'llama_cpp/utils.py'\n",
            "  adding 'llama_cpp/server/__init__.py'\n",
            "  adding 'llama_cpp/server/__main__.py'\n",
            "  adding 'llama_cpp/server/app.py'\n",
            "  adding 'llama_cpp_python-0.1.78.data/data/bin/convert-lora-to-ggml.py'\n",
            "  adding 'llama_cpp_python-0.1.78.data/data/bin/convert.py'\n",
            "  adding 'llama_cpp_python-0.1.78.data/data/lib/libggml_shared.so'\n",
            "  adding 'llama_cpp_python-0.1.78.data/data/lib/libllama.so'\n",
            "  adding 'llama_cpp_python-0.1.78.dist-info/LICENSE.md'\n",
            "  adding 'llama_cpp_python-0.1.78.dist-info/METADATA'\n",
            "  adding 'llama_cpp_python-0.1.78.dist-info/WHEEL'\n",
            "  adding 'llama_cpp_python-0.1.78.dist-info/top_level.txt'\n",
            "  adding 'llama_cpp_python-0.1.78.dist-info/RECORD'\n",
            "  removing _skbuild/linux-x86_64-3.10/setuptools/bdist.linux-x86_64/wheel\n",
            "  Building wheel for llama-cpp-python (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for llama-cpp-python: filename=llama_cpp_python-0.1.78-cp310-cp310-linux_x86_64.whl size=5811184 sha256=b84a9365fa1a7ad324d5bc255f11e2856b500000b99b568cf3c400be5fb8d502\n",
            "  Stored in directory: /tmp/pip-ephem-wheel-cache-e4bvk5ua/wheels/61/f9/20/9ca660a9d3f2a47e44217059409478865948b5c8a1cba70030\n",
            "Successfully built llama-cpp-python\n",
            "Installing collected packages: typing-extensions, numpy, diskcache, llama-cpp-python\n",
            "  Attempting uninstall: typing-extensions\n",
            "    Found existing installation: typing_extensions 4.10.0\n",
            "    Uninstalling typing_extensions-4.10.0:\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/__pycache__/typing_extensions.cpython-310.pyc\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/typing_extensions-4.10.0.dist-info/\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/typing_extensions.py\n",
            "      Successfully uninstalled typing_extensions-4.10.0\n",
            "  Attempting uninstall: numpy\n",
            "    Found existing installation: numpy 1.25.2\n",
            "    Uninstalling numpy-1.25.2:\n",
            "      Removing file or directory /usr/local/bin/f2py\n",
            "      Removing file or directory /usr/local/bin/f2py3\n",
            "      Removing file or directory /usr/local/bin/f2py3.10\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/numpy-1.25.2.dist-info/\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/numpy.libs/\n",
            "      Removing file or directory /usr/local/lib/python3.10/dist-packages/numpy/\n",
            "      Successfully uninstalled numpy-1.25.2\n",
            "  changing mode of /usr/local/bin/f2py to 755\n",
            "  changing mode of /usr/local/bin/f2py3 to 755\n",
            "  changing mode of /usr/local/bin/f2py3.10 to 755\n",
            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
            "torch 2.2.1+cu121 requires nvidia-cublas-cu12==12.1.3.1; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-cupti-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cuda-runtime-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cudnn-cu12==8.9.2.26; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cufft-cu12==11.0.2.54; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-curand-cu12==10.3.2.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cusolver-cu12==11.4.5.107; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-cusparse-cu12==12.1.0.106; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-nccl-cu12==2.19.3; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "torch 2.2.1+cu121 requires nvidia-nvtx-cu12==12.1.105; platform_system == \"Linux\" and platform_machine == \"x86_64\", which is not installed.\n",
            "chex 0.1.85 requires numpy>=1.24.1, but you have numpy 1.23.4 which is incompatible.\n",
            "tensorflow 2.15.0 requires numpy<2.0.0,>=1.23.5, but you have numpy 1.23.4 which is incompatible.\u001b[0m\u001b[31m\n",
            "\u001b[0mSuccessfully installed diskcache-5.6.3 llama-cpp-python-0.1.78 numpy-1.23.4 typing-extensions-4.10.0\n",
            "Requirement already satisfied: llama-cpp-python==0.1.78 in /usr/local/lib/python3.10/dist-packages (0.1.78)\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.1.78) (4.10.0)\n",
            "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.1.78) (1.23.4)\n",
            "Requirement already satisfied: diskcache>=5.6.1 in /usr/local/lib/python3.10/dist-packages (from llama-cpp-python==0.1.78) (5.6.3)\n",
            "Requirement already satisfied: numpy==1.23.4 in /usr/local/lib/python3.10/dist-packages (1.23.4)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## **Step 2: Import All the Required Libraries**"
      ],
      "metadata": {
        "id": "lfcma1oWoqRU"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import hf_hub_download"
      ],
      "metadata": {
        "id": "91E9er2WezWz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from llama_cpp import Llama"
      ],
      "metadata": {
        "id": "cbWVDAQXe247"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## **Step 3: Download the Model**"
      ],
      "metadata": {
        "id": "ei74qLUloxOr"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "model_name_or_path = \"TheBloke/Llama-2-13B-chat-GGML\"\n",
        "model_basename = \"llama-2-13b-chat.ggmlv3.q5_1.bin\" # the model is in bin format"
      ],
      "metadata": {
        "id": "fluWUdcKf3B3"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename)"
      ],
      "metadata": {
        "id": "X-_AjGnTftke",
        "outputId": "742f8608-5841-48fe-df47-139bdb26a702",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 153,
          "referenced_widgets": [
            "0c30b2e5a51c4033b1c0379fff41e204",
            "ada3eee6be3f44a0b610e1f0f97955e5",
            "48f78eba409940ba84256f50690fbc84",
            "b0cb5c4d5f684db38ac10b79b0c39c87",
            "cd606d4eff7443f6bf62f664cf3b6285",
            "6844ccfa43614970b11ac72ac8dd7fec",
            "68bc51539005432b8cf15d27cc104cdd",
            "1fbd88a2690847cd925557bab62ef84a",
            "28c529d0c99c436a9b91ae7c7457d783",
            "b488ca54843e4f6c9101b26427801b4f",
            "9be5081d007d4efc8f83a4621f91c353"
          ]
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.10/dist-packages/huggingface_hub/utils/_token.py:88: UserWarning: \n",
            "The secret `HF_TOKEN` does not exist in your Colab secrets.\n",
            "To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.\n",
            "You will be able to reuse this secret in all of your notebooks.\n",
            "Please note that authentication is recommended but still optional to access public models or datasets.\n",
            "  warnings.warn(\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "llama-2-13b-chat.ggmlv3.q5_1.bin:   0%|          | 0.00/9.76G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "0c30b2e5a51c4033b1c0379fff41e204"
            }
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## **Step 4: Loading the Model**"
      ],
      "metadata": {
        "id": "XtBlRX3Lo24W"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# GPU\n",
        "lcpp_llm = None\n",
        "lcpp_llm = Llama(\n",
        "    model_path=model_path,\n",
        "    n_threads=2, # CPU cores\n",
        "    n_batch=512, # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.\n",
        "    n_gpu_layers=32 # Change this value based on your model and your GPU VRAM pool.\n",
        "    )"
      ],
      "metadata": {
        "id": "gJU61CE4gWaj",
        "outputId": "9f1f68f4-1e3e-4853-9ca7-5263749c72a9",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 1 | VSX = 0 | \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# See the number of layers in GPU\n",
        "lcpp_llm.params.n_gpu_layers"
      ],
      "metadata": {
        "id": "X6JagX8XgZ3e",
        "outputId": "165e8ca6-7262-4ed5-df2b-86abd0422fa9",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "32"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "\n",
        "## **Step 5: Create a Prompt Template**"
      ],
      "metadata": {
        "id": "dcGXiPyGo9E9"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "prompt = \"Write a linear regression in python\"\n",
        "prompt_template=f'''SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.\n",
        "\n",
        "USER: {prompt}\n",
        "\n",
        "ASSISTANT:\n",
        "'''"
      ],
      "metadata": {
        "id": "5piaO8Jcgf4a"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## **Step 6: Generating the Response**"
      ],
      "metadata": {
        "id": "M6ZpbSQVpE3N"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "response=lcpp_llm(prompt=prompt_template, max_tokens=256, temperature=0.5, top_p=0.95,\n",
        "                  repeat_penalty=1.2, top_k=150,\n",
        "                  echo=True)"
      ],
      "metadata": {
        "id": "AMTWq2DXgnUk"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "print(response)"
      ],
      "metadata": {
        "id": "nuxW6hCegpZ7",
        "outputId": "f151a416-f25e-4fb5-9c09-661885252d0a",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'id': 'cmpl-88dc9343-75b1-4810-ab55-dfa67c520e5f', 'object': 'text_completion', 'created': 1710898708, 'model': '/root/.cache/huggingface/hub/models--TheBloke--Llama-2-13B-chat-GGML/snapshots/3140827b4dfcb6b562cd87ee3d7f07109b014dd0/llama-2-13b-chat.ggmlv3.q5_1.bin', 'choices': [{'text': \"SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.\\n\\nUSER: Write a linear regression in python\\n\\nASSISTANT:\\n\\nTo write a linear regression in Python, you can use the scikit-learn library. Here is an example of how to do this:\\n```\\nfrom sklearn.linear_model import LinearRegression\\nimport pandas as pd\\n\\n# Load your dataset into a Pandas DataFrame\\ndf = pd.read_csv('your_data.csv')\\n\\n# Create a linear regression object and fit the data\\nreg = LinearRegression().fit(df[['x1', 'x2']], df['y'])\\n\\n# Print the coefficients\\nprint(reg.coef_)\\n```\\nThis will output the coefficients of the linear regression, which you can use to make predictions on new data. For example:\\n```\\n# Predict the value of y for a given set of x values\\nx_values = [1, 2, 3]\\ny_pred = reg.predict(pd.Series([x_values]))\\nprint(y_pred)\\n```\\nThis will output the predicted values of y for the given set of x values.\\n\\nPlease note that this is just an example and you may need to modify it to fit your specific needs, such as changing the input features or\", 'index': 0, 'logprobs': None, 'finish_reason': 'length'}], 'usage': {'prompt_tokens': 39, 'completion_tokens': 256, 'total_tokens': 295}}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(response[\"choices\"][0][\"text\"])"
      ],
      "metadata": {
        "id": "QyVi-O_5g5J2",
        "outputId": "6eacbe3a-d2c3-4911-cc4c-b25277775b93",
        "colab": {
          "base_uri": "https://localhost:8080/"
        }
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "SYSTEM: You are a helpful, respectful and honest assistant. Always answer as helpfully.\n",
            "\n",
            "USER: Write a linear regression in python\n",
            "\n",
            "ASSISTANT:\n",
            "\n",
            "To write a linear regression in Python, you can use the scikit-learn library. Here is an example of how to do this:\n",
            "```\n",
            "from sklearn.linear_model import LinearRegression\n",
            "import pandas as pd\n",
            "\n",
            "# Load your dataset into a Pandas DataFrame\n",
            "df = pd.read_csv('your_data.csv')\n",
            "\n",
            "# Create a linear regression object and fit the data\n",
            "reg = LinearRegression().fit(df[['x1', 'x2']], df['y'])\n",
            "\n",
            "# Print the coefficients\n",
            "print(reg.coef_)\n",
            "```\n",
            "This will output the coefficients of the linear regression, which you can use to make predictions on new data. For example:\n",
            "```\n",
            "# Predict the value of y for a given set of x values\n",
            "x_values = [1, 2, 3]\n",
            "y_pred = reg.predict(pd.Series([x_values]))\n",
            "print(y_pred)\n",
            "```\n",
            "This will output the predicted values of y for the given set of x values.\n",
            "\n",
            "Please note that this is just an example and you may need to modify it to fit your specific needs, such as changing the input features or\n"
          ]
        }
      ]
    }
  ]
}