Set correct vllm params for 3b model

Liquid4All · Nov 2, 2024 · 0ff5430 · 0ff5430
1 parent 0f122be
commit 0ff5430
Showing 1 changed file with 23 additions and 1 deletion.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -8,7 +8,29 @@ services:
   liquid-labs-vllm:
     image: liquidai/liquid-labs-vllm:${STACK_VERSION}
     container_name: ${MODEL_NAME}
-    command: --model /model --port 9000
+    command:
+      - --model
+      - /model
+      - --port
+      - "9000"
+      - --max-logprobs
+      - "0"
+      - --dtype
+      - bfloat16
+      - --device
+      - cuda
+      - --enable-chunked-prefill
+      - "False"
+      - --tensor_parallel_size
+      - "1"
+      - --gpu-memory-utilization
+      - "0.75"
+      - --max-model-len
+      - "65536"
+      - --max-num-seqs
+      - "1000"
+      - --max-seq-len-to-capture
+      - "65536"
     depends_on:
       - liquid-labs-model-volume
     # This is equivalent to "runtime: nvidia", but does not require