From 222c8d7468536d4e480bad0f96bc87227aee8c1c Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Sat, 1 Jun 2024 11:24:34 -0700 Subject: [PATCH 1/3] add peft version --- serving/docker/Dockerfile | 66 ++++++++++++++++++++++++++++++- serving/docker/docker-compose.yml | 6 +++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index e96c885e7..4bee14a94 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -72,8 +72,70 @@ RUN scripts/install_python.sh && \ rm -rf /opt/djl/logs && \ chown -R djl:djl /opt/djl && \ rm -rf scripts && pip3 cache purge && \ - apt-get clean -y && rm -rf /var/lib/apt/lists/* \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true" -LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-27-0.cpu-full="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true" LABEL torch-version=$torch_version + + +FROM ubuntu:22.04 as vllm-build +# Borrowed from https://github.com/vllm-project/vllm/blob/v0.4.3/Dockerfile.cpu +ARG vllm_version=v0.4.3 + +WORKDIR /usr/src + +RUN apt-get update -y \ + && apt-get install -y git wget vim numactl gcc-12 g++-12 python3 python3-pip \ + && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 + +RUN pip install --upgrade pip \ + && pip install wheel packaging ninja setuptools>=49.4.0 numpy + +# FIXME: use official vLLM branch once the PR: https://github.com/vllm-project/vllm/pull/5200 merged +RUN git clone https://github.com/lanking520/vllm -b ${vllm_version} && cd vllm && \ + pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \ + VLLM_TARGET_DEVICE=cpu pip3 wheel . --no-deps + + +FROM base AS lmi-cpu + +ARG torch_version=2.3.0 +ARG protobuf_version=3.20.3 +ARG transformers_version=4.41.1 +ARG accelerate_version=0.30.1 +ARG datasets_version=2.19.1 +ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl" +ARG peft_version=0.11.1 + +COPY scripts scripts/ +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \ + scripts/install_python.sh && \ + scripts/install_djl_serving.sh $djl_version $torch_version && \ + scripts/install_s5cmd.sh x64 && \ + echo "${djl_version} cpufull" > /opt/djl/bin/telemetry && \ + djl-serving -i ai.djl.pytorch:pytorch-native-cpu:$torch_version:linux-x86_64 && \ + djl-serving -i ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:linux-x86_64 && \ + pip3 cache purge && \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* + +COPY --from=vllm-build /usr/src/vllm/*.whl scripts/ + +# FIXME remove Trtion import in the next release +RUN pip3 install torch==${torch_version}+cpu --extra-index-url https://download.pytorch.org/whl/cpu \ + ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \ + transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \ + mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} \ + triton >= 2.2.0 scripts/vllm*.whl \ + jinja2 safetensors ninja scipy sentence_transformers && \ + pip3 cache purge + +# final cleanup \ +RUN scripts/patch_oss_dlc.sh python && \ + rm -rf /opt/djl/logs && \ + chown -R djl:djl /opt/djl && \ + rm -rf scripts && pip3 cache purge && \ + apt-get clean -y && rm -rf /var/lib/apt/lists/* + +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true" diff --git a/serving/docker/docker-compose.yml b/serving/docker/docker-compose.yml index 201c2e550..caeecc7b0 100644 --- a/serving/docker/docker-compose.yml +++ b/serving/docker/docker-compose.yml @@ -12,6 +12,12 @@ services: target: cpu-full dockerfile: Dockerfile image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}cpu-full${NIGHTLY}" + lmi-cpu: + build: + context: . + target: lmi-cpu + dockerfile: Dockerfile + image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}lmi-cpu${NIGHTLY}" aarch64: build: context: . From f537f63bfcb6f3afad7211b4c1f57d2dc6a601da Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Sun, 2 Jun 2024 16:09:40 -0700 Subject: [PATCH 2/3] shift order --- serving/docker/Dockerfile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index 4bee14a94..2353e4709 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -122,11 +122,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio- COPY --from=vllm-build /usr/src/vllm/*.whl scripts/ # FIXME remove Trtion import in the next release -RUN pip3 install torch==${torch_version}+cpu --extra-index-url https://download.pytorch.org/whl/cpu \ +RUN pip3 install torch==${torch_version}+cpu triton >= 2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \ ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \ transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \ - mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} \ - triton >= 2.2.0 scripts/vllm*.whl \ + mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} scripts/vllm*.whl \ jinja2 safetensors ninja scipy sentence_transformers && \ pip3 cache purge From f7d96cf849218ee0204bc0084f0b8e48034ae617 Mon Sep 17 00:00:00 2001 From: Qing Lan Date: Sun, 2 Jun 2024 17:03:30 -0700 Subject: [PATCH 3/3] revert back --- serving/docker/Dockerfile | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile index 2353e4709..64cf26686 100644 --- a/serving/docker/Dockerfile +++ b/serving/docker/Dockerfile @@ -92,10 +92,9 @@ RUN apt-get update -y \ RUN pip install --upgrade pip \ && pip install wheel packaging ninja setuptools>=49.4.0 numpy -# FIXME: use official vLLM branch once the PR: https://github.com/vllm-project/vllm/pull/5200 merged -RUN git clone https://github.com/lanking520/vllm -b ${vllm_version} && cd vllm && \ +RUN git clone https://github.com/vllm-project/vllm -b ${vllm_version} && cd vllm && \ pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \ - VLLM_TARGET_DEVICE=cpu pip3 wheel . --no-deps + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel FROM base AS lmi-cpu @@ -109,7 +108,8 @@ ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1. ARG peft_version=0.11.1 COPY scripts scripts/ -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \ +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev gcc-12 g++-12 && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \ scripts/install_python.sh && \ scripts/install_djl_serving.sh $djl_version $torch_version && \ scripts/install_s5cmd.sh x64 && \ @@ -119,10 +119,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio- pip3 cache purge && \ apt-get clean -y && rm -rf /var/lib/apt/lists/* -COPY --from=vllm-build /usr/src/vllm/*.whl scripts/ +COPY --from=vllm-build /usr/src/vllm/dist/*.whl scripts/ # FIXME remove Trtion import in the next release -RUN pip3 install torch==${torch_version}+cpu triton >= 2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \ +RUN pip3 install torch==${torch_version}+cpu triton>=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \ ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \ transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \ mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} scripts/vllm*.whl \ @@ -136,5 +136,5 @@ RUN scripts/patch_oss_dlc.sh python && \ rm -rf scripts && pip3 cache purge && \ apt-get clean -y && rm -rf /var/lib/apt/lists/* -LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true" -LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.lmi-cpu="true" +LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.lmi-cpu="true"