generated from runpod-workers/worker-template
-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathDockerfile
75 lines (55 loc) · 2.18 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# Base image
FROM ghcr.io/huggingface/text-generation-inference:latest
ENV DEBIAN_FRONTEND=noninteractive
# Set the working directory
WORKDIR /
# Update and upgrade the system packages (Worker Template)
COPY builder/setup.sh /setup.sh
RUN /bin/bash /setup.sh && \
rm /setup.sh
# Install Python dependencies (Worker Template)
COPY builder/requirements.txt /requirements.txt
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install --upgrade -r /requirements.txt --no-cache-dir && \
rm /requirements.txt
# Add src files (Worker Template)
ADD src .
# Whether to download the model into /runpod-volume or not.
ARG DOWNLOAD_MODEL=
ENV DOWNLOAD_MODEL=$DOWNLOAD_MODEL
# Set environment variables
ARG HF_MODEL_ID=
ENV HF_MODEL_ID=$HF_MODEL_ID
ARG HF_MODEL_REVISION=
ENV HF_MODEL_REVISION=$HF_MODEL_REVISION
ARG SM_NUM_GPUS=
ENV SM_NUM_GPUS=$SM_NUM_GPUS
ARG HF_MODEL_QUANTIZE=
ENV HF_MODEL_QUANTIZE=$HF_MODEL_QUANTIZE
ARG HF_MODEL_TRUST_REMOTE_CODE=
ENV HF_MODEL_TRUST_REMOTE_CODE=$HF_MODEL_TRUST_REMOTE_CODE
ARG MODEL_BASE_PATH="/runpod-volume/"
ENV MODEL_BASE_PATH=$MODEL_BASE_PATH
ARG HUGGING_FACE_HUB_TOKEN=
ENV HUGGING_FACE_HUB_TOKEN=$HUGGING_FACE_HUB_TOKEN
ARG HF_MAX_TOTAL_TOKENS=
ENV HF_MAX_TOTAL_TOKENS=$HF_MAX_TOTAL_TOKENS
ARG HF_MAX_INPUT_LENGTH=
ENV HF_MAX_INPUT_LENGTH=$HF_MAX_INPUT_LENGTH
ARG HF_MAX_BATCH_TOTAL_TOKENS=
ENV HF_MAX_BATCH_TOTAL_TOKENS=$HF_MAX_BATCH_TOTAL_TOKENS
ARG HF_MAX_BATCH_PREFILL_TOKENS=
ENV HF_MAX_BATCH_PREFILL_TOKENS=$HF_MAX_BATCH_PREFILL_TOKENS
# Prepare the hugging face directories for caching datasets, models, and more.
ENV HF_DATASETS_CACHE="/runpod-volume/huggingface-cache/datasets"
ENV HUGGINGFACE_HUB_CACHE="/runpod-volume/huggingface-cache/hub"
ENV TRANSFORMERS_CACHE="/runpod-volume/huggingface-cache/hub"
# Conditionally download the model weights based on DOWNLOAD_MODEL
RUN if [ "$DOWNLOAD_MODEL" = "1" ]; then \
text-generation-server download-weights $HF_MODEL_ID; \
fi
# Quick temporary updates
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
RUN python3.10 -m pip install git+https://github.com/runpod/runpod-python@a1#egg=runpod --compile
RUN python3.10 -m pip install text_generation
ENTRYPOINT ["./entrypoint.sh"]