diff --git a/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm b/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm index 8b005722..76085416 100644 --- a/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm +++ b/model-engine/model_engine_server/inference/vllm/Dockerfile.vllm @@ -1,6 +1,7 @@ # syntax=docker/dockerfile:1 -ARG VLLM_VERSION=0.6.2 -ARG VLLM_BASE_IMAGE=vllm/vllm-openai:v${VLLM_VERSION} +ARG VLLM_VERSION=0.6.3 +ARG VLLM_BASE_REPO=vllm/vllm-openai +ARG VLLM_BASE_IMAGE=${VLLM_BASE_REPO}:v${VLLM_VERSION} FROM ${VLLM_BASE_IMAGE} AS base RUN apt-get update \ diff --git a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh index 65c49b32..10765cc0 100755 --- a/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh +++ b/model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh @@ -4,7 +4,7 @@ set -eo pipefail # Build and push vLLM docker image to AWS ECR. # -# Usage: VLLM_VERSION=0.5.3.post1 ./build_and_upload_image.sh vllm|vllm_batch|vllm_batch_v2 +# Usage: VLLM_VERSION=0.6.3 ./build_and_upload_image.sh vllm|vllm_batch|vllm_batch_v2 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) PROJECT_DIR=$SCRIPT_DIR/../../../.. @@ -21,14 +21,16 @@ if [ -z "$2" ]; then fi if [ -z "$3" ]; then - echo "Must supply the build target (either vllm or vllm_batch)" + echo "Must supply the build target (either vllm or vllm_batch_v2)" exit 1; fi + ACCOUNT=$1 IMAGE_TAG=$2 BUILD_TARGET=$3 VLLM_VERSION=${VLLM_VERSION:-"0.6.2"} +VLLM_BASE_REPO=${VLLM_BASE_REPO:-"vllm/vllm-openai"} # if build target = vllm use vllm otherwise use vllm_batch if [ "$BUILD_TARGET" == "vllm" ]; then @@ -40,6 +42,7 @@ fi aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin $ACCOUNT.dkr.ecr.us-west-2.amazonaws.com DOCKER_BUILDKIT=1 docker build \ --build-arg VLLM_VERSION=${VLLM_VERSION} \ + --build-arg VLLM_BASE_REPO=${VLLM_BASE_REPO} \ -f Dockerfile.vllm \ --target ${BUILD_TARGET} \ -t $IMAGE ${PROJECT_DIR} diff --git a/model-engine/model_engine_server/inference/vllm/requirements-dev.txt b/model-engine/model_engine_server/inference/vllm/requirements-dev.txt index d330101a..b75668a1 100644 --- a/model-engine/model_engine_server/inference/vllm/requirements-dev.txt +++ b/model-engine/model_engine_server/inference/vllm/requirements-dev.txt @@ -1 +1 @@ -vllm==0.6.2 \ No newline at end of file +vllm==0.6.3