Skip to content

Commit 2af23d3

Browse files
🚀 Dockerize llamacpp (antimatter15#132)
* feat: dockerize llamacpp * feat: split build & runtime stages * split dockerfile into main & tools * add quantize into tool docker image * Update .devops/tools.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add docker action pipeline * change CI to publish at github docker registry * fix name runs-on macOS-latest is macos-latest (lowercase) * include docker versioned images * fix github action docker * fix docker.yml * feat: include all-in-one command tool & update readme.md --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent 904d2a8 commit 2af23d3

File tree

9 files changed

+270
-2
lines changed

9 files changed

+270
-2
lines changed

Diff for: ‎.devops/full.Dockerfile

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
FROM ubuntu:$UBUNTU_VERSION as build
4+
5+
RUN apt-get update && \
6+
apt-get install -y build-essential python3 python3-pip
7+
8+
RUN pip install --upgrade pip setuptools wheel \
9+
&& pip install torch torchvision torchaudio sentencepiece numpy
10+
11+
WORKDIR /app
12+
13+
COPY . .
14+
15+
RUN make
16+
17+
ENTRYPOINT ["/app/.devops/tools.sh"]

Diff for: ‎.devops/main.Dockerfile

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
ARG UBUNTU_VERSION=22.04
2+
3+
FROM ubuntu:$UBUNTU_VERSION as build
4+
5+
RUN apt-get update && \
6+
apt-get install -y build-essential
7+
8+
WORKDIR /app
9+
10+
COPY . .
11+
12+
RUN make
13+
14+
FROM ubuntu:$UBUNTU_VERSION as runtime
15+
16+
COPY --from=build /app/main /main
17+
18+
ENTRYPOINT [ "/main" ]

Diff for: ‎.devops/tools.sh

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
set -e
3+
4+
# Read the first argument into a variable
5+
arg1="$1"
6+
7+
# Shift the arguments to remove the first one
8+
shift
9+
10+
# Join the remaining arguments into a single string
11+
arg2="$@"
12+
13+
if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then
14+
python3 ./convert-pth-to-ggml.py $arg2
15+
elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then
16+
./quantize $arg2
17+
elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then
18+
./main $arg2
19+
elif [[ $arg1 == '--download' || $arg1 == '-d' ]]; then
20+
python3 ./download-pth.py $arg2
21+
elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then
22+
echo "Downloading model..."
23+
python3 ./download-pth.py "$1" "$2"
24+
echo "Converting PTH to GGML..."
25+
for i in `ls $1/$2/ggml-model-f16.bin*`; do
26+
if [ -f "${i/f16/q4_0}" ]; then
27+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
28+
else
29+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
30+
./quantize "$i" "${i/f16/q4_0}" 2
31+
fi
32+
done
33+
else
34+
echo "Unknown command: $arg1"
35+
echo "Available commands: "
36+
echo " --run (-r): Run a model previously converted into ggml"
37+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -t 8 -n 512"
38+
echo " --convert (-c): Convert a llama model into ggml"
39+
echo " ex: \"/models/7B/\" 1"
40+
echo " --quantize (-q): Optimize with quantization process ggml"
41+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
42+
echo " --download (-d): Download original llama model from CDN: https://agi.gpt4.org/llama/"
43+
echo " ex: \"/models/\" 7B"
44+
echo " --all-in-one (-a): Execute --download, --convert & --quantize"
45+
echo " ex: \"/models/\" 7B"
46+
fi

Diff for: ‎.dockerignore

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
*.o
2+
*.a
3+
.cache/
4+
.vs/
5+
.vscode/
6+
.DS_Store
7+
8+
build/
9+
build-em/
10+
build-debug/
11+
build-release/
12+
build-static/
13+
build-no-accel/
14+
build-sanitize-addr/
15+
build-sanitize-thread/
16+
17+
models/*
18+
19+
/main
20+
/quantize
21+
22+
arm_neon.h
23+
compile_commands.json
24+
Dockerfile

Diff for: ‎.github/workflows/build.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ jobs:
1919
make
2020
2121
macOS-latest:
22-
runs-on: macOS-latest
22+
runs-on: macos-latest
2323

2424
steps:
2525
- name: Clone

Diff for: ‎.github/workflows/docker.yml

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# This workflow uses actions that are not certified by GitHub.
2+
# They are provided by a third-party and are governed by
3+
# separate terms of service, privacy policy, and support
4+
# documentation.
5+
6+
# GitHub recommends pinning actions to a commit SHA.
7+
# To get a newer version, you will need to update the SHA.
8+
# You can also reference a tag or branch, but the action may change without warning.
9+
10+
name: Publish Docker image
11+
12+
on:
13+
pull_request:
14+
push:
15+
branches:
16+
- master
17+
18+
jobs:
19+
push_to_registry:
20+
name: Push Docker image to Docker Hub
21+
runs-on: ubuntu-latest
22+
env:
23+
COMMIT_SHA: ${{ github.sha }}
24+
strategy:
25+
matrix:
26+
config:
27+
- { tag: "light", dockerfile: ".devops/main.Dockerfile" }
28+
- { tag: "full", dockerfile: ".devops/full.Dockerfile" }
29+
steps:
30+
- name: Check out the repo
31+
uses: actions/checkout@v3
32+
33+
- name: Set up QEMU
34+
uses: docker/setup-qemu-action@v2
35+
36+
- name: Set up Docker Buildx
37+
uses: docker/setup-buildx-action@v2
38+
39+
- name: Log in to Docker Hub
40+
uses: docker/#-action@v2
41+
with:
42+
registry: ghcr.io
43+
username: ${{ github.actor }}
44+
password: ${{ secrets.GITHUB_TOKEN }}
45+
46+
- name: Build and push Docker image (versioned)
47+
if: github.event_name == 'push'
48+
uses: docker/build-push-action@v4
49+
with:
50+
context: .
51+
push: true
52+
tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
53+
file: ${{ matrix.config.dockerfile }}
54+
55+
- name: Build and push Docker image (tagged)
56+
uses: docker/build-push-action@v4
57+
with:
58+
context: .
59+
push: ${{ github.event_name == 'push' }}
60+
tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}"
61+
file: ${{ matrix.config.dockerfile }}

Diff for: ‎README.md

+32
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ Supported platforms:
3232
- [X] Mac OS
3333
- [X] Linux
3434
- [X] Windows (via CMake)
35+
- [X] Docker
3536

3637
---
3738

@@ -194,6 +195,37 @@ Finally, copy the `llama` binary and the model files to your device storage. Her
194195

195196
https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4
196197

198+
### Docker
199+
200+
#### Prerequisites
201+
* Docker must be installed and running on your system.
202+
* Create a folder to store big models & intermediate files (in ex. im using /llama/models)
203+
204+
#### Images
205+
We have two Docker images available for this project:
206+
207+
1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization.
208+
2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file.
209+
210+
#### Usage
211+
212+
The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image.
213+
214+
```bash
215+
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B
216+
```
217+
218+
On complete, you are ready to play!
219+
220+
```bash
221+
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
222+
```
223+
224+
or with light image:
225+
226+
```bash
227+
docker run -v /llama/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -t 8 -n 512
228+
```
197229

198230
## Limitations
199231

Diff for: ‎convert-pth-to-ggml.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
# At the start of the ggml file we write the model parameters
1717
# and vocabulary.
1818
#
19-
19+
import os
2020
import sys
2121
import json
2222
import struct
@@ -64,6 +64,10 @@ def get_n_parts(dim):
6464
sys.exit(1)
6565
fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".bin"
6666

67+
if os.path.exists(fname_out):
68+
print(f"Skip conversion, it already exists: {fname_out}")
69+
sys.exit(0)
70+
6771
with open(fname_hparams, "r") as f:
6872
hparams = json.load(f)
6973

Diff for: ‎download-pth.py

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import os
2+
import sys
3+
from tqdm import tqdm
4+
import requests
5+
6+
if len(sys.argv) < 3:
7+
print("Usage: download-pth.py dir-model model-type\n")
8+
print(" model-type: Available models 7B, 13B, 30B or 65B")
9+
sys.exit(1)
10+
11+
modelsDir = sys.argv[1]
12+
model = sys.argv[2]
13+
14+
num = {
15+
"7B": 1,
16+
"13B": 2,
17+
"30B": 4,
18+
"65B": 8,
19+
}
20+
21+
if model not in num:
22+
print(f"Error: model {model} is not valid, provide 7B, 13B, 30B or 65B")
23+
sys.exit(1)
24+
25+
print(f"Downloading model {model}")
26+
27+
files = ["checklist.chk", "params.json"]
28+
29+
for i in range(num[model]):
30+
files.append(f"consolidated.0{i}.pth")
31+
32+
resolved_path = os.path.abspath(os.path.join(modelsDir, model))
33+
os.makedirs(resolved_path, exist_ok=True)
34+
35+
for file in files:
36+
dest_path = os.path.join(resolved_path, file)
37+
38+
if os.path.exists(dest_path):
39+
print(f"Skip file download, it already exists: {file}")
40+
continue
41+
42+
url = f"https://agi.gpt4.org/llama/LLaMA/{model}/{file}"
43+
response = requests.get(url, stream=True)
44+
with open(dest_path, 'wb') as f:
45+
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
46+
for chunk in response.iter_content(chunk_size=1024):
47+
if chunk:
48+
f.write(chunk)
49+
t.update(len(chunk))
50+
51+
files2 = ["tokenizer_checklist.chk", "tokenizer.model"]
52+
for file in files2:
53+
dest_path = os.path.join(modelsDir, file)
54+
55+
if os.path.exists(dest_path):
56+
print(f"Skip file download, it already exists: {file}")
57+
continue
58+
59+
url = f"https://agi.gpt4.org/llama/LLaMA/{file}"
60+
response = requests.get(url, stream=True)
61+
with open(dest_path, 'wb') as f:
62+
with tqdm(unit='B', unit_scale=True, miniters=1, desc=file) as t:
63+
for chunk in response.iter_content(chunk_size=1024):
64+
if chunk:
65+
f.write(chunk)
66+
t.update(len(chunk))

0 commit comments

Comments
 (0)