Skip to content

Commit

Permalink
fix(layout): uses requirements.txt
Browse files Browse the repository at this point in the history
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
  • Loading branch information
aarnphm committed Feb 11, 2025
1 parent 8ccf4b4 commit 1eeba4c
Show file tree
Hide file tree
Showing 174 changed files with 537 additions and 7,993 deletions.
80 changes: 40 additions & 40 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
description: DeepSeek V3 671B
provider: DeepSeek
service_config:
name: deepseek-v3
name: bentovllm-deepseek-v3-671b-service
traffic:
timeout: 300
resources:
Expand All @@ -20,7 +20,7 @@
description: DeepSeek R1 671B
provider: DeepSeek
service_config:
name: deepseek-r1
name: bentovllm-deepseek-r1-671b-service
traffic:
timeout: 300
resources:
Expand All @@ -31,16 +31,16 @@
engine_config:
model: deepseek-ai/DeepSeek-R1
tensor_parallel_size: 16
trust_remote_code: True
trust_remote_code: true
server_config:
enable_reasoning: True
enable_reasoning: true
reasoning_parser: deepseek_r1
"deepseek-r1-distill-llama3.3-70b":
metadata:
description: DeepSeek R1 Distill Llama 3.3 70B
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-llama3.3-70b-service
traffic:
timeout: 300
resources:
Expand All @@ -57,7 +57,7 @@
description: DeepSeek R1 Distill Qwen 2.5 32B
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-qwen2.5-32b-service
resources:
gpu: 1
gpu_type: nvidia-a100-80gb
Expand All @@ -73,7 +73,7 @@
description: DeepSeek R1 Distill Qwen 2.5 14B
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-qwen2.5-14b-service
traffic:
timeout: 300
resources:
Expand All @@ -89,7 +89,7 @@
description: DeepSeek R1 Distill Qwen 2.5 Math 7B
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-qwen2.5-7b-math-service
traffic:
timeout: 300
resources:
Expand All @@ -105,7 +105,7 @@
description: DeepSeek R1 Distill Llama 3.1 8B
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-llama3.1-8b-service
traffic:
timeout: 300
resources:
Expand All @@ -121,7 +121,7 @@
description: DeepSeek R1 Distill Llama 3.1 8B Tool Calling
provider: DeepSeek
service_config:
name: deepseek-r1-distill
name: bentovllm-r1-llama3.1-8b-tool-calling-service
traffic:
timeout: 300
resources:
Expand All @@ -133,15 +133,15 @@
model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
max_model_len: 4096
server_config:
enable_auto_tool_choice: True
enable_tool_call_parser: True
enable_auto_tool_choice: true
enable_tool_call_parser: true
tool_call_parser: "llama3_json"
"gemma2-2b-instruct":
metadata:
description: Gemma 2 2B Instruct
provider: Google
service_config:
name: gemma2
name: bentovllm-gemma2-2b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -158,7 +158,7 @@
description: Gemma 2 9B Instruct
provider: Google
service_config:
name: gemma2
name: bentovllm-gemma2-9b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -177,7 +177,7 @@
description: Gemma 2 27B Instruct
provider: Google
service_config:
name: gemma2
name: bentovllm-gemma2-27b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -196,7 +196,7 @@
description: Jamba 1.5 Mini
provider: AI21 Lab
service_config:
name: jamba1.5
name: bentovllm-jamba1.5-mini-service
traffic:
timeout: 300
resources:
Expand All @@ -214,7 +214,7 @@
description: Llama 3.1 8B Instruct
provider: Meta
service_config:
name: llama3.1
name: bentovllm-llama3.1-8b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -231,7 +231,7 @@
description: Llama 3.2 1B Instruct
provider: Meta
service_config:
name: llama3.2
name: bentovllm-llama3.2-1b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -249,7 +249,7 @@
description: Llama 3.1 3B Instruct
provider: Meta
service_config:
name: llama3.2
name: bentovllm-llama3.2-3b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -268,7 +268,7 @@
provider: Meta
vision: true
service_config:
name: llama3.2
name: bentovllm-llama3.2-11b-v-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -289,7 +289,7 @@
provider: Meta
vision: true
service_config:
name: llama3.2
name: bentovllm-llama3.2-90b-v-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -309,7 +309,7 @@
description: Llama 3.3 70B Instruct
provider: Meta
service_config:
name: llama3.3
name: bentovllm-llama3.3-70b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -327,7 +327,7 @@
provider: Mistral AI
vision: true
service_config:
name: pixtral
name: bentovllm-pixtral-12b-2409-service
traffic:
timeout: 300
resources:
Expand All @@ -348,7 +348,7 @@
description: Mixtral 8x7B v0.1 Mixture of Expert
provider: Mistral AI
service_config:
name: mixtral
name: bentovllm-mixtral-8x7b-v0.1-service
traffic:
timeout: 300
resources:
Expand All @@ -366,7 +366,7 @@
description: Ministral 8B Instruct 2410
provider: Mistral AI
service_config:
name: mistral-mini
name: bentovllm-ministral-8b-instruct-2410-service
traffic:
timeout: 300
resources:
Expand All @@ -384,7 +384,7 @@
description: Mistral Small 24B Instruct 2501
provider: Mistral AI
service_config:
name: mistral-small
name: bentovllm-mistral-small-24b-instruct-2501-service
traffic:
timeout: 300
resources:
Expand All @@ -401,7 +401,7 @@
description: Mistral Large 123B Instruct 2407
provider: Mistral AI
service_config:
name: mistral-large
name: bentovllm-mistral-large-123b-instruct-2407-service
traffic:
timeout: 300
resources:
Expand All @@ -419,7 +419,7 @@
description: Phi 4 14B
provider: Microsoft
service_config:
name: phi4
name: bentovllm-phi4-14b-service
traffic:
timeout: 300
resources:
Expand All @@ -433,7 +433,7 @@
description: Qwen 2.5 7B Instruct
provider: Alibaba
service_config:
name: qwen2.5
name: bentovllm-qwen2.5-7b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -447,7 +447,7 @@
description: Qwen 2.5 14B Instruct
provider: Alibaba
service_config:
name: qwen2.5
name: bentovllm-qwen2.5-14b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -461,7 +461,7 @@
description: Qwen 2.5 32B Instruct
provider: Alibaba
service_config:
name: qwen2.5
name: bentovllm-qwen2.5-32b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -475,7 +475,7 @@
description: Qwen 2.5 72B Instruct
provider: Alibaba
service_config:
name: qwen2.5
name: bentovllm-qwen2.5-72b-instruct-service
traffic:
timeout: 300
resources:
Expand All @@ -489,7 +489,7 @@
description: Qwen 2.5 Coder 7B Instruct
provider: Alibaba
service_config:
name: qwen2.5-coder
name: bentovllm-qwen2.5-coder-7b-instruct-service
resources:
gpu: 1
gpu_type: nvidia-l4
Expand All @@ -499,15 +499,15 @@
model: Qwen/Qwen2.5-Coder-7B-Instruct
max_model_len: 8192
server_config:
enable_auto_tool_choice: True
enable_tool_call_parser: True
enable_auto_tool_choice: true
enable_tool_call_parser: true
tool_call_parser: "llama3_json"
"qwen2.5-coder-32b-instruct":
metadata:
description: Qwen 2.5 Coder 32B Instruct
provider: Alibaba
service_config:
name: qwen2.5-coder
name: bentovllm-qwen2.5-coder-32b-instruct-service
resources:
gpu: 1
gpu_type: nvidia-a100-80gb
Expand All @@ -517,8 +517,8 @@
model: Qwen/Qwen2.5-Coder-32B-Instruct
max_model_len: 8192
server_config:
enable_auto_tool_choice: True
enable_tool_call_parser: True
enable_auto_tool_choice: true
enable_tool_call_parser: true
tool_call_parser: "llama3_json"
"qwen2.5vl-3b-instruct":
metadata:
Expand All @@ -529,7 +529,7 @@
max_model_len: 2048
model: Qwen/Qwen2.5-VL-3B-Instruct
service_config:
name: qwen2.5vl
name: bentovllm-qwen2.5vl-3b-instruct-service
resources:
gpu: 1
gpu_type: nvidia-l4
Expand All @@ -546,7 +546,7 @@
max_model_len: 2048
model: Qwen/Qwen2.5-VL-7B-Instruct
service_config:
name: qwen2.5vl
name: bentovllm-qwen2.5vl-7b-instruct-service
resources:
gpu: 1
gpu_type: nvidia-l4
Expand Down
5 changes: 5 additions & 0 deletions deepseek-r1-671b/.bentoignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
__pycache__/
*.py[cod]
*$py.class
.ipynb_checkpoints
venv/
Loading

0 comments on commit 1eeba4c

Please # to comment.