From 0ff543055b9e686628aad240a2bedc1808008343 Mon Sep 17 00:00:00 2001 From: Liren Tu Date: Sat, 2 Nov 2024 13:43:19 -0700 Subject: [PATCH] Set correct vllm params for 3b model --- docker-compose.yaml | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 650d8b6..c76ebdf 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -8,7 +8,29 @@ services: liquid-labs-vllm: image: liquidai/liquid-labs-vllm:${STACK_VERSION} container_name: ${MODEL_NAME} - command: --model /model --port 9000 + command: + - --model + - /model + - --port + - "9000" + - --max-logprobs + - "0" + - --dtype + - bfloat16 + - --device + - cuda + - --enable-chunked-prefill + - "False" + - --tensor_parallel_size + - "1" + - --gpu-memory-utilization + - "0.75" + - --max-model-len + - "65536" + - --max-num-seqs + - "1000" + - --max-seq-len-to-capture + - "65536" depends_on: - liquid-labs-model-volume # This is equivalent to "runtime: nvidia", but does not require