From 222c8d7468536d4e480bad0f96bc87227aee8c1c Mon Sep 17 00:00:00 2001
From: Qing Lan <qingla@amazon.com>
Date: Sat, 1 Jun 2024 11:24:34 -0700
Subject: [PATCH 1/3] add peft version

---
 serving/docker/Dockerfile         | 66 ++++++++++++++++++++++++++++++-
 serving/docker/docker-compose.yml |  6 +++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile
index e96c885e7..4bee14a94 100644
--- a/serving/docker/Dockerfile
+++ b/serving/docker/Dockerfile
@@ -72,8 +72,70 @@ RUN scripts/install_python.sh && \
     rm -rf /opt/djl/logs && \
     chown -R djl:djl /opt/djl && \
     rm -rf scripts && pip3 cache purge && \
-    apt-get clean -y && rm -rf /var/lib/apt/lists/* \
+    apt-get clean -y && rm -rf /var/lib/apt/lists/*
 
 LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true"
-LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-27-0.cpu-full="true"
+LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true"
 LABEL torch-version=$torch_version
+
+
+FROM ubuntu:22.04 as vllm-build
+# Borrowed from https://github.com/vllm-project/vllm/blob/v0.4.3/Dockerfile.cpu
+ARG vllm_version=v0.4.3
+
+WORKDIR /usr/src
+
+RUN apt-get update  -y \
+    && apt-get install -y git wget vim numactl gcc-12 g++-12 python3 python3-pip \
+    && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12
+
+RUN pip install --upgrade pip \
+    && pip install wheel packaging ninja setuptools>=49.4.0 numpy
+
+# FIXME: use official vLLM branch once the PR: https://github.com/vllm-project/vllm/pull/5200 merged
+RUN git clone https://github.com/lanking520/vllm -b ${vllm_version} && cd vllm && \
+    pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \
+    VLLM_TARGET_DEVICE=cpu pip3 wheel . --no-deps
+
+
+FROM base AS lmi-cpu
+
+ARG torch_version=2.3.0
+ARG protobuf_version=3.20.3
+ARG transformers_version=4.41.1
+ARG accelerate_version=0.30.1
+ARG datasets_version=2.19.1
+ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.0-py3-none-any.whl"
+ARG peft_version=0.11.1
+
+COPY scripts scripts/
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \
+    scripts/install_python.sh && \
+    scripts/install_djl_serving.sh $djl_version $torch_version && \
+    scripts/install_s5cmd.sh x64 && \
+    echo "${djl_version} cpufull" > /opt/djl/bin/telemetry && \
+    djl-serving -i ai.djl.pytorch:pytorch-native-cpu:$torch_version:linux-x86_64 && \
+    djl-serving -i ai.djl.tensorflow:tensorflow-native-cpu:2.10.1:linux-x86_64 && \
+    pip3 cache purge && \
+    apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+COPY --from=vllm-build /usr/src/vllm/*.whl scripts/
+
+# FIXME remove Trtion import in the next release
+RUN pip3 install torch==${torch_version}+cpu --extra-index-url https://download.pytorch.org/whl/cpu \
+    ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \
+    transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \
+    mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} \
+    triton >= 2.2.0 scripts/vllm*.whl \
+    jinja2 safetensors ninja scipy sentence_transformers && \
+    pip3 cache purge
+
+# final cleanup \
+RUN scripts/patch_oss_dlc.sh python && \
+    rm -rf /opt/djl/logs && \
+    chown -R djl:djl /opt/djl && \
+    rm -rf scripts && pip3 cache purge && \
+    apt-get clean -y && rm -rf /var/lib/apt/lists/*
+
+LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true"
+LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true"
diff --git a/serving/docker/docker-compose.yml b/serving/docker/docker-compose.yml
index 201c2e550..caeecc7b0 100644
--- a/serving/docker/docker-compose.yml
+++ b/serving/docker/docker-compose.yml
@@ -12,6 +12,12 @@ services:
       target: cpu-full
       dockerfile: Dockerfile
     image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}cpu-full${NIGHTLY}"
+  lmi-cpu:
+    build:
+      context: .
+      target: lmi-cpu
+      dockerfile: Dockerfile
+    image: "deepjavalibrary/djl-serving:${RELEASE_VERSION}lmi-cpu${NIGHTLY}"
   aarch64:
     build:
       context: .

From f537f63bfcb6f3afad7211b4c1f57d2dc6a601da Mon Sep 17 00:00:00 2001
From: Qing Lan <qingla@amazon.com>
Date: Sun, 2 Jun 2024 16:09:40 -0700
Subject: [PATCH 2/3] shift order

---
 serving/docker/Dockerfile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile
index 4bee14a94..2353e4709 100644
--- a/serving/docker/Dockerfile
+++ b/serving/docker/Dockerfile
@@ -122,11 +122,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-
 COPY --from=vllm-build /usr/src/vllm/*.whl scripts/
 
 # FIXME remove Trtion import in the next release
-RUN pip3 install torch==${torch_version}+cpu --extra-index-url https://download.pytorch.org/whl/cpu \
+RUN pip3 install torch==${torch_version}+cpu triton >= 2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \
     ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \
     transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \
-    mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} \
-    triton >= 2.2.0 scripts/vllm*.whl \
+    mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} scripts/vllm*.whl \
     jinja2 safetensors ninja scipy sentence_transformers && \
     pip3 cache purge
 

From f7d96cf849218ee0204bc0084f0b8e48034ae617 Mon Sep 17 00:00:00 2001
From: Qing Lan <qingla@amazon.com>
Date: Sun, 2 Jun 2024 17:03:30 -0700
Subject: [PATCH 3/3] revert back

---
 serving/docker/Dockerfile | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/serving/docker/Dockerfile b/serving/docker/Dockerfile
index 2353e4709..64cf26686 100644
--- a/serving/docker/Dockerfile
+++ b/serving/docker/Dockerfile
@@ -92,10 +92,9 @@ RUN apt-get update  -y \
 RUN pip install --upgrade pip \
     && pip install wheel packaging ninja setuptools>=49.4.0 numpy
 
-# FIXME: use official vLLM branch once the PR: https://github.com/vllm-project/vllm/pull/5200 merged
-RUN git clone https://github.com/lanking520/vllm -b ${vllm_version} && cd vllm && \
+RUN git clone https://github.com/vllm-project/vllm -b ${vllm_version} && cd vllm && \
     pip install -v -r requirements-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu && \
-    VLLM_TARGET_DEVICE=cpu pip3 wheel . --no-deps
+    VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel
 
 
 FROM base AS lmi-cpu
@@ -109,7 +108,8 @@ ARG seq_scheduler_wheel="https://publish.djl.ai/seq_scheduler/seq_scheduler-0.1.
 ARG peft_version=0.11.1
 
 COPY scripts scripts/
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev g++ && \
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-dev libopenmpi-dev gcc-12 g++-12 && \
+    update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 && \
     scripts/install_python.sh && \
     scripts/install_djl_serving.sh $djl_version $torch_version && \
     scripts/install_s5cmd.sh x64 && \
@@ -119,10 +119,10 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -yq libaio-
     pip3 cache purge && \
     apt-get clean -y && rm -rf /var/lib/apt/lists/*
 
-COPY --from=vllm-build /usr/src/vllm/*.whl scripts/
+COPY --from=vllm-build /usr/src/vllm/dist/*.whl scripts/
 
 # FIXME remove Trtion import in the next release
-RUN pip3 install torch==${torch_version}+cpu triton >= 2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \
+RUN pip3 install torch==${torch_version}+cpu triton>=2.2.0 --extra-index-url https://download.pytorch.org/whl/cpu \
     ${seq_scheduler_wheel} peft==${peft_version} protobuf==${protobuf_version} \
     transformers==${transformers_version} hf-transfer zstandard datasets==${datasets_version} \
     mpi4py sentencepiece tiktoken blobfile einops accelerate==${accelerate_version} scripts/vllm*.whl \
@@ -136,5 +136,5 @@ RUN scripts/patch_oss_dlc.sh python && \
     rm -rf scripts && pip3 cache purge && \
     apt-get clean -y && rm -rf /var/lib/apt/lists/*
 
-LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.cpu-full="true"
-LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.cpu-full="true"
+LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.lmi-cpu="true"
+LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.djl.v0-28-0.lmi-cpu="true"