From 0ff543055b9e686628aad240a2bedc1808008343 Mon Sep 17 00:00:00 2001
From: Liren Tu <tuliren@gmail.com>
Date: Sat, 2 Nov 2024 13:43:19 -0700
Subject: [PATCH] Set correct vllm params for 3b model

---
 docker-compose.yaml | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 650d8b6..c76ebdf 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -8,7 +8,29 @@ services:
   liquid-labs-vllm:
     image: liquidai/liquid-labs-vllm:${STACK_VERSION}
     container_name: ${MODEL_NAME}
-    command: --model /model --port 9000
+    command:
+      - --model
+      - /model
+      - --port
+      - "9000"
+      - --max-logprobs
+      - "0"
+      - --dtype
+      - bfloat16
+      - --device
+      - cuda
+      - --enable-chunked-prefill
+      - "False"
+      - --tensor_parallel_size
+      - "1"
+      - --gpu-memory-utilization
+      - "0.75"
+      - --max-model-len
+      - "65536"
+      - --max-num-seqs
+      - "1000"
+      - --max-seq-len-to-capture
+      - "65536"
     depends_on:
       - liquid-labs-model-volume
     # This is equivalent to "runtime: nvidia", but does not require