-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDockerfile.gpu
29 lines (21 loc) · 1.11 KB
/
Dockerfile.gpu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04
# Install Python
RUN apt update
RUN apt install -y python3 python3-pip && python3 --version
# Install PyTorch with GPU support
RUN pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Install git for bleeding-edge dependencies
RUN apt install -y git
COPY ./requirements.gpu.txt requirements.gpu.txt
RUN pip3 install -r requirements.gpu.txt
# Expose library path to dynamic library linking
ENV LD_LIBRARY_PATH $LD_LIBRARY_PATH:/usr/local/cuda-12/targets/x86_64-linux/lib
# Tell bitsandbytes to use CUDA 121
RUN ln -s /usr/local/cuda-12/targets/x86_64-linux/lib/libcudart.so.12 /usr/local/cuda-12/targets/x86_64-linux/lib/libcudart.so
# User land code
RUN pip install git+https://github.com/paolorechia/GPTQ-for-LLaMa@3797a5ced97e9075bd1786c5744091c146c7bc9f
COPY ./guidance_server guidance_server
WORKDIR guidance_server
RUN LLAMA_CUBLAS=1 CMAKE_ARGS=-DLLAMA_CUBLAS=on FORCE_CMAKE=1 pip install llama-cpp-python --no-cache-dir --force-reinstall --verbose
# Set the entrypoint
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "9000"]