@@ -213,17 +213,7 @@ RUN --mount=type=bind,from=build_amdsmi,src=/install,target=/install/amdsmi/ \
213
213
/install/flashattention/*.whl\
214
214
/install/vllm/*.whl
215
215
216
- # Set up a non-root user for OpenShift
217
- RUN umask 002 && \
218
- useradd --uid 2000 --gid 0 vllm && \
219
- mkdir -p /licenses && \
220
- chmod g+rwx $HOME /usr/src /workspace
221
-
222
- COPY LICENSE /licenses/vllm.md
223
- COPY examples/*.jinja /app/data/template/
224
-
225
216
ENV HF_HUB_OFFLINE=1 \
226
- PORT=8000 \
227
217
HOME=/home/vllm \
228
218
# Allow requested max length to exceed what is extracted from the
229
219
# config.json
@@ -236,14 +226,23 @@ ENV HF_HUB_OFFLINE=1 \
236
226
TOKENIZERS_PARALLELISM=false \
237
227
RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 \
238
228
VLLM_USE_TRITON_FLASH_ATTN=0 \
229
+ HIP_FORCE_DEV_KERNARG=1 \
239
230
OUTLINES_CACHE_DIR=/tmp/outlines \
240
231
NUMBA_CACHE_DIR=/tmp/numba \
241
232
TRITON_CACHE_DIR=/tmp/triton
242
233
243
- # Switch to the non-root user
234
+ # setup non-root user for OpenShift
235
+ RUN umask 002 && \
236
+ useradd --uid 2000 --gid 0 vllm && \
237
+ mkdir -p /licenses /home/vllm && \
238
+ chmod g+rwx /home/vllm
239
+
240
+ COPY LICENSE /licenses/vllm.md
241
+ COPY examples/*.jinja /app/data/template/
242
+
244
243
USER 2000
244
+ WORKDIR /home/vllm
245
245
246
- # Set the entrypoint
247
246
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
248
247
249
248
@@ -254,7 +253,7 @@ USER root
254
253
RUN --mount=type=cache,target=/root/.cache/pip \
255
254
--mount=type=cache,target=/root/.cache/uv \
256
255
--mount=type=bind,from=build_vllm,src=/workspace/dist,target=/install/vllm/ \
257
- HOME=/root/ uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.5.3
256
+ HOME=/root uv pip install /install/vllm/*.whl vllm-tgis-adapter==0.5.3
258
257
259
258
ENV GRPC_PORT=8033 \
260
259
PORT=8000 \
0 commit comments