fix(layout): uses requirements.txt

Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
bentoml · Feb 11, 2025 · 1eeba4c · 1eeba4c
1 parent 8ccf4b4
commit 1eeba4c
Show file tree

Hide file tree

Showing 174 changed files with 537 additions and 7,993 deletions.
diff --git a/config.yaml b/config.yaml
@@ -3,7 +3,7 @@
     description: DeepSeek V3 671B
     provider: DeepSeek
   service_config:
-    name: deepseek-v3
+    name: bentovllm-deepseek-v3-671b-service
     traffic:
       timeout: 300
     resources:
@@ -20,7 +20,7 @@
     description: DeepSeek R1 671B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1
+    name: bentovllm-deepseek-r1-671b-service
     traffic:
       timeout: 300
     resources:
@@ -31,16 +31,16 @@
   engine_config:
     model: deepseek-ai/DeepSeek-R1
     tensor_parallel_size: 16
-    trust_remote_code: True
+    trust_remote_code: true
   server_config:
-    enable_reasoning: True
+    enable_reasoning: true
     reasoning_parser: deepseek_r1
 "deepseek-r1-distill-llama3.3-70b":
   metadata:
     description: DeepSeek R1 Distill Llama 3.3 70B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-llama3.3-70b-service
     traffic:
       timeout: 300
     resources:
@@ -57,7 +57,7 @@
     description: DeepSeek R1 Distill Qwen 2.5 32B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-qwen2.5-32b-service
     resources:
       gpu: 1
       gpu_type: nvidia-a100-80gb
@@ -73,7 +73,7 @@
     description: DeepSeek R1 Distill Qwen 2.5 14B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-qwen2.5-14b-service
     traffic:
       timeout: 300
     resources:
@@ -89,7 +89,7 @@
     description: DeepSeek R1 Distill Qwen 2.5 Math 7B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-qwen2.5-7b-math-service
     traffic:
       timeout: 300
     resources:
@@ -105,7 +105,7 @@
     description: DeepSeek R1 Distill Llama 3.1 8B
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-llama3.1-8b-service
     traffic:
       timeout: 300
     resources:
@@ -121,7 +121,7 @@
     description: DeepSeek R1 Distill Llama 3.1 8B Tool Calling
     provider: DeepSeek
   service_config:
-    name: deepseek-r1-distill
+    name: bentovllm-r1-llama3.1-8b-tool-calling-service
     traffic:
       timeout: 300
     resources:
@@ -133,15 +133,15 @@
     model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
     max_model_len: 4096
   server_config:
-    enable_auto_tool_choice: True
-    enable_tool_call_parser: True
+    enable_auto_tool_choice: true
+    enable_tool_call_parser: true
     tool_call_parser: "llama3_json"
 "gemma2-2b-instruct":
   metadata:
     description: Gemma 2 2B Instruct
     provider: Google
   service_config:
-    name: gemma2
+    name: bentovllm-gemma2-2b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -158,7 +158,7 @@
     description: Gemma 2 9B Instruct
     provider: Google
   service_config:
-    name: gemma2
+    name: bentovllm-gemma2-9b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -177,7 +177,7 @@
     description: Gemma 2 27B Instruct
     provider: Google
   service_config:
-    name: gemma2
+    name: bentovllm-gemma2-27b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -196,7 +196,7 @@
     description: Jamba 1.5 Mini
     provider: AI21 Lab
   service_config:
-    name: jamba1.5
+    name: bentovllm-jamba1.5-mini-service
     traffic:
       timeout: 300
     resources:
@@ -214,7 +214,7 @@
     description: Llama 3.1 8B Instruct
     provider: Meta
   service_config:
-    name: llama3.1
+    name: bentovllm-llama3.1-8b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -231,7 +231,7 @@
     description: Llama 3.2 1B Instruct
     provider: Meta
   service_config:
-    name: llama3.2
+    name: bentovllm-llama3.2-1b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -249,7 +249,7 @@
     description: Llama 3.1 3B Instruct
     provider: Meta
   service_config:
-    name: llama3.2
+    name: bentovllm-llama3.2-3b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -268,7 +268,7 @@
     provider: Meta
   vision: true
   service_config:
-    name: llama3.2
+    name: bentovllm-llama3.2-11b-v-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -289,7 +289,7 @@
     provider: Meta
   vision: true
   service_config:
-    name: llama3.2
+    name: bentovllm-llama3.2-90b-v-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -309,7 +309,7 @@
     description: Llama 3.3 70B Instruct
     provider: Meta
   service_config:
-    name: llama3.3
+    name: bentovllm-llama3.3-70b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -327,7 +327,7 @@
     provider: Mistral AI
   vision: true
   service_config:
-    name: pixtral
+    name: bentovllm-pixtral-12b-2409-service
     traffic:
       timeout: 300
     resources:
@@ -348,7 +348,7 @@
     description: Mixtral 8x7B v0.1 Mixture of Expert
     provider: Mistral AI
   service_config:
-    name: mixtral
+    name: bentovllm-mixtral-8x7b-v0.1-service
     traffic:
       timeout: 300
     resources:
@@ -366,7 +366,7 @@
     description: Ministral 8B Instruct 2410
     provider: Mistral AI
   service_config:
-    name: mistral-mini
+    name: bentovllm-ministral-8b-instruct-2410-service
     traffic:
       timeout: 300
     resources:
@@ -384,7 +384,7 @@
     description: Mistral Small 24B Instruct 2501
     provider: Mistral AI
   service_config:
-    name: mistral-small
+    name: bentovllm-mistral-small-24b-instruct-2501-service
     traffic:
       timeout: 300
     resources:
@@ -401,7 +401,7 @@
     description: Mistral Large 123B Instruct 2407
     provider: Mistral AI
   service_config:
-    name: mistral-large
+    name: bentovllm-mistral-large-123b-instruct-2407-service
     traffic:
       timeout: 300
     resources:
@@ -419,7 +419,7 @@
     description: Phi 4 14B
     provider: Microsoft
   service_config:
-    name: phi4
+    name: bentovllm-phi4-14b-service
     traffic:
       timeout: 300
     resources:
@@ -433,7 +433,7 @@
     description: Qwen 2.5 7B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5
+    name: bentovllm-qwen2.5-7b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -447,7 +447,7 @@
     description: Qwen 2.5 14B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5
+    name: bentovllm-qwen2.5-14b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -461,7 +461,7 @@
     description: Qwen 2.5 32B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5
+    name: bentovllm-qwen2.5-32b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -475,7 +475,7 @@
     description: Qwen 2.5 72B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5
+    name: bentovllm-qwen2.5-72b-instruct-service
     traffic:
       timeout: 300
     resources:
@@ -489,7 +489,7 @@
     description: Qwen 2.5 Coder 7B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5-coder
+    name: bentovllm-qwen2.5-coder-7b-instruct-service
     resources:
       gpu: 1
       gpu_type: nvidia-l4
@@ -499,15 +499,15 @@
     model: Qwen/Qwen2.5-Coder-7B-Instruct
     max_model_len: 8192
   server_config:
-    enable_auto_tool_choice: True
-    enable_tool_call_parser: True
+    enable_auto_tool_choice: true
+    enable_tool_call_parser: true
     tool_call_parser: "llama3_json"
 "qwen2.5-coder-32b-instruct":
   metadata:
     description: Qwen 2.5 Coder 32B Instruct
     provider: Alibaba
   service_config:
-    name: qwen2.5-coder
+    name: bentovllm-qwen2.5-coder-32b-instruct-service
     resources:
       gpu: 1
       gpu_type: nvidia-a100-80gb
@@ -517,8 +517,8 @@
     model: Qwen/Qwen2.5-Coder-32B-Instruct
     max_model_len: 8192
   server_config:
-    enable_auto_tool_choice: True
-    enable_tool_call_parser: True
+    enable_auto_tool_choice: true
+    enable_tool_call_parser: true
     tool_call_parser: "llama3_json"
 "qwen2.5vl-3b-instruct":
   metadata:
@@ -529,7 +529,7 @@
     max_model_len: 2048
     model: Qwen/Qwen2.5-VL-3B-Instruct
   service_config:
-    name: qwen2.5vl
+    name: bentovllm-qwen2.5vl-3b-instruct-service
     resources:
       gpu: 1
       gpu_type: nvidia-l4
@@ -546,7 +546,7 @@
     max_model_len: 2048
     model: Qwen/Qwen2.5-VL-7B-Instruct
   service_config:
-    name: qwen2.5vl
+    name: bentovllm-qwen2.5vl-7b-instruct-service
     resources:
       gpu: 1
       gpu_type: nvidia-l4

diff --git a/deepseek-r1-671b/.bentoignore b/deepseek-r1-671b/.bentoignore
@@ -0,0 +1,5 @@
+__pycache__/
+*.py[cod]
+*$py.class
+.ipynb_checkpoints
+venv/