From b962ee1470a019a72a1c17eddcf3a0471658a123 Mon Sep 17 00:00:00 2001 From: sumitd2 <91451282+sumitd2@users.noreply.github.com> Date: Sat, 7 Sep 2024 23:48:40 +0530 Subject: [PATCH] ppc64le: Dockerfile fixed, and a script for buildkite (#8026) --- .buildkite/run-cpu-test-ppc64le.sh | 32 ++++++++++++++++++++++++++++++ Dockerfile.ppc64le | 16 ++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) create mode 100755 .buildkite/run-cpu-test-ppc64le.sh diff --git a/.buildkite/run-cpu-test-ppc64le.sh b/.buildkite/run-cpu-test-ppc64le.sh new file mode 100755 index 0000000000000..a01cf3fe67489 --- /dev/null +++ b/.buildkite/run-cpu-test-ppc64le.sh @@ -0,0 +1,32 @@ +# This script build the CPU docker image and run the offline inference inside the container. +# It serves a sanity check for compilation and basic model usage. +set -ex + +# Try building the docker image +docker build -t cpu-test -f Dockerfile.ppc64le . + +# Setup cleanup +remove_docker_container() { docker rm -f cpu-test || true; } +trap remove_docker_container EXIT +remove_docker_container + +# Run the image, setting --shm-size=4g for tensor parallel. +#docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test cpu-test +docker run -itd --entrypoint /bin/bash -v ~/.cache/huggingface:/root/.cache/huggingface --privileged=true --network host -e HF_TOKEN --name cpu-test cpu-test + +# Run basic model test +docker exec cpu-test bash -c " + pip install pytest matplotlib einops transformers_stream_generator + pytest -v -s tests/models -m \"not vlm\" --ignore=tests/models/test_embedding.py --ignore=tests/models/test_oot_registration.py --ignore=tests/models/test_registry.py --ignore=tests/models/test_jamba.py --ignore=tests/models/test_danube3_4b.py" # Mamba and Danube3-4B on CPU is not supported + +# online inference +docker exec cpu-test bash -c " + python3 -m vllm.entrypoints.openai.api_server --model facebook/opt-125m & + timeout 600 bash -c 'until curl localhost:8000/v1/models; do sleep 1; done' || exit 1 + python3 benchmarks/benchmark_serving.py \ + --backend vllm \ + --dataset-name random \ + --model facebook/opt-125m \ + --num-prompts 20 \ + --endpoint /v1/completions \ + --tokenizer facebook/opt-125m" diff --git a/Dockerfile.ppc64le b/Dockerfile.ppc64le index d4e4c483cada8..16780f8ab950c 100644 --- a/Dockerfile.ppc64le +++ b/Dockerfile.ppc64le @@ -2,21 +2,27 @@ FROM mambaorg/micromamba ARG MAMBA_DOCKERFILE_ACTIVATE=1 USER root -RUN apt-get update -y && apt-get install -y git wget vim numactl gcc-12 g++-12 protobuf-compiler libprotobuf-dev && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 +ENV PATH="/usr/local/cargo/bin:$PATH:/opt/conda/bin/" + +RUN apt-get update -y && apt-get install -y git wget vim libnuma-dev libsndfile-dev libprotobuf-dev build-essential # Some packages in requirements-cpu are installed here # IBM provides optimized packages for ppc64le processors in the open-ce project for mamba # Currently these may not be available for venv or pip directly -RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 pytorch-cpu=2.1.2 torchvision-cpu=0.16.2 && micromamba clean --all --yes +RUN micromamba install -y -n base -c https://ftp.osuosl.org/pub/open-ce/1.11.0-p10/ -c defaults python=3.10 torchvision-cpu=0.16.2 rust && micromamba clean --all --yes COPY ./ /workspace/vllm WORKDIR /workspace/vllm # These packages will be in rocketce eventually -RUN pip install -v -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing +RUN pip install -v cmake torch==2.3.1 uvloop==0.20.0 -r requirements-cpu.txt --prefer-binary --extra-index-url https://repo.fury.io/mgiessing RUN VLLM_TARGET_DEVICE=cpu python3 setup.py install -WORKDIR /vllm-workspace -ENTRYPOINT ["/opt/conda/bin/python3", "-m", "vllm.entrypoints.openai.api_server"] +WORKDIR /workspace/ + +RUN ln -s /workspace/vllm/tests && ln -s /workspace/vllm/examples && ln -s /workspace/vllm/benchmarks + +ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] +