diff --git a/config.yaml b/config.yaml index e17a1a4..efada1f 100644 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ description: DeepSeek V3 671B provider: DeepSeek service_config: - name: deepseek-v3 + name: bentovllm-deepseek-v3-671b-service traffic: timeout: 300 resources: @@ -20,7 +20,7 @@ description: DeepSeek R1 671B provider: DeepSeek service_config: - name: deepseek-r1 + name: bentovllm-deepseek-r1-671b-service traffic: timeout: 300 resources: @@ -31,16 +31,16 @@ engine_config: model: deepseek-ai/DeepSeek-R1 tensor_parallel_size: 16 - trust_remote_code: True + trust_remote_code: true server_config: - enable_reasoning: True + enable_reasoning: true reasoning_parser: deepseek_r1 "deepseek-r1-distill-llama3.3-70b": metadata: description: DeepSeek R1 Distill Llama 3.3 70B provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-llama3.3-70b-service traffic: timeout: 300 resources: @@ -57,7 +57,7 @@ description: DeepSeek R1 Distill Qwen 2.5 32B provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-qwen2.5-32b-service resources: gpu: 1 gpu_type: nvidia-a100-80gb @@ -73,7 +73,7 @@ description: DeepSeek R1 Distill Qwen 2.5 14B provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-qwen2.5-14b-service traffic: timeout: 300 resources: @@ -89,7 +89,7 @@ description: DeepSeek R1 Distill Qwen 2.5 Math 7B provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-qwen2.5-7b-math-service traffic: timeout: 300 resources: @@ -105,7 +105,7 @@ description: DeepSeek R1 Distill Llama 3.1 8B provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-llama3.1-8b-service traffic: timeout: 300 resources: @@ -121,7 +121,7 @@ description: DeepSeek R1 Distill Llama 3.1 8B Tool Calling provider: DeepSeek service_config: - name: deepseek-r1-distill + name: bentovllm-r1-llama3.1-8b-tool-calling-service traffic: timeout: 300 resources: @@ -133,15 +133,15 @@ model: deepseek-ai/DeepSeek-R1-Distill-Llama-8B max_model_len: 4096 server_config: - enable_auto_tool_choice: True - enable_tool_call_parser: True + enable_auto_tool_choice: true + enable_tool_call_parser: true tool_call_parser: "llama3_json" "gemma2-2b-instruct": metadata: description: Gemma 2 2B Instruct provider: Google service_config: - name: gemma2 + name: bentovllm-gemma2-2b-instruct-service traffic: timeout: 300 resources: @@ -158,7 +158,7 @@ description: Gemma 2 9B Instruct provider: Google service_config: - name: gemma2 + name: bentovllm-gemma2-9b-instruct-service traffic: timeout: 300 resources: @@ -177,7 +177,7 @@ description: Gemma 2 27B Instruct provider: Google service_config: - name: gemma2 + name: bentovllm-gemma2-27b-instruct-service traffic: timeout: 300 resources: @@ -196,7 +196,7 @@ description: Jamba 1.5 Mini provider: AI21 Lab service_config: - name: jamba1.5 + name: bentovllm-jamba1.5-mini-service traffic: timeout: 300 resources: @@ -214,7 +214,7 @@ description: Llama 3.1 8B Instruct provider: Meta service_config: - name: llama3.1 + name: bentovllm-llama3.1-8b-instruct-service traffic: timeout: 300 resources: @@ -231,7 +231,7 @@ description: Llama 3.2 1B Instruct provider: Meta service_config: - name: llama3.2 + name: bentovllm-llama3.2-1b-instruct-service traffic: timeout: 300 resources: @@ -249,7 +249,7 @@ description: Llama 3.1 3B Instruct provider: Meta service_config: - name: llama3.2 + name: bentovllm-llama3.2-3b-instruct-service traffic: timeout: 300 resources: @@ -268,7 +268,7 @@ provider: Meta vision: true service_config: - name: llama3.2 + name: bentovllm-llama3.2-11b-v-instruct-service traffic: timeout: 300 resources: @@ -289,7 +289,7 @@ provider: Meta vision: true service_config: - name: llama3.2 + name: bentovllm-llama3.2-90b-v-instruct-service traffic: timeout: 300 resources: @@ -309,7 +309,7 @@ description: Llama 3.3 70B Instruct provider: Meta service_config: - name: llama3.3 + name: bentovllm-llama3.3-70b-instruct-service traffic: timeout: 300 resources: @@ -327,7 +327,7 @@ provider: Mistral AI vision: true service_config: - name: pixtral + name: bentovllm-pixtral-12b-2409-service traffic: timeout: 300 resources: @@ -348,7 +348,7 @@ description: Mixtral 8x7B v0.1 Mixture of Expert provider: Mistral AI service_config: - name: mixtral + name: bentovllm-mixtral-8x7b-v0.1-service traffic: timeout: 300 resources: @@ -366,7 +366,7 @@ description: Ministral 8B Instruct 2410 provider: Mistral AI service_config: - name: mistral-mini + name: bentovllm-ministral-8b-instruct-2410-service traffic: timeout: 300 resources: @@ -384,7 +384,7 @@ description: Mistral Small 24B Instruct 2501 provider: Mistral AI service_config: - name: mistral-small + name: bentovllm-mistral-small-24b-instruct-2501-service traffic: timeout: 300 resources: @@ -401,7 +401,7 @@ description: Mistral Large 123B Instruct 2407 provider: Mistral AI service_config: - name: mistral-large + name: bentovllm-mistral-large-123b-instruct-2407-service traffic: timeout: 300 resources: @@ -419,7 +419,7 @@ description: Phi 4 14B provider: Microsoft service_config: - name: phi4 + name: bentovllm-phi4-14b-service traffic: timeout: 300 resources: @@ -433,7 +433,7 @@ description: Qwen 2.5 7B Instruct provider: Alibaba service_config: - name: qwen2.5 + name: bentovllm-qwen2.5-7b-instruct-service traffic: timeout: 300 resources: @@ -447,7 +447,7 @@ description: Qwen 2.5 14B Instruct provider: Alibaba service_config: - name: qwen2.5 + name: bentovllm-qwen2.5-14b-instruct-service traffic: timeout: 300 resources: @@ -461,7 +461,7 @@ description: Qwen 2.5 32B Instruct provider: Alibaba service_config: - name: qwen2.5 + name: bentovllm-qwen2.5-32b-instruct-service traffic: timeout: 300 resources: @@ -475,7 +475,7 @@ description: Qwen 2.5 72B Instruct provider: Alibaba service_config: - name: qwen2.5 + name: bentovllm-qwen2.5-72b-instruct-service traffic: timeout: 300 resources: @@ -489,7 +489,7 @@ description: Qwen 2.5 Coder 7B Instruct provider: Alibaba service_config: - name: qwen2.5-coder + name: bentovllm-qwen2.5-coder-7b-instruct-service resources: gpu: 1 gpu_type: nvidia-l4 @@ -499,15 +499,15 @@ model: Qwen/Qwen2.5-Coder-7B-Instruct max_model_len: 8192 server_config: - enable_auto_tool_choice: True - enable_tool_call_parser: True + enable_auto_tool_choice: true + enable_tool_call_parser: true tool_call_parser: "llama3_json" "qwen2.5-coder-32b-instruct": metadata: description: Qwen 2.5 Coder 32B Instruct provider: Alibaba service_config: - name: qwen2.5-coder + name: bentovllm-qwen2.5-coder-32b-instruct-service resources: gpu: 1 gpu_type: nvidia-a100-80gb @@ -517,8 +517,8 @@ model: Qwen/Qwen2.5-Coder-32B-Instruct max_model_len: 8192 server_config: - enable_auto_tool_choice: True - enable_tool_call_parser: True + enable_auto_tool_choice: true + enable_tool_call_parser: true tool_call_parser: "llama3_json" "qwen2.5vl-3b-instruct": metadata: @@ -529,7 +529,7 @@ max_model_len: 2048 model: Qwen/Qwen2.5-VL-3B-Instruct service_config: - name: qwen2.5vl + name: bentovllm-qwen2.5vl-3b-instruct-service resources: gpu: 1 gpu_type: nvidia-l4 @@ -546,7 +546,7 @@ max_model_len: 2048 model: Qwen/Qwen2.5-VL-7B-Instruct service_config: - name: qwen2.5vl + name: bentovllm-qwen2.5vl-7b-instruct-service resources: gpu: 1 gpu_type: nvidia-l4 diff --git a/deepseek-r1-671b/.bentoignore b/deepseek-r1-671b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-671b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-671b/LICENSE b/deepseek-r1-671b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-671b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-671b/pyproject.toml b/deepseek-r1-671b/pyproject.toml deleted file mode 100644 index 53888e6..0000000 --- a/deepseek-r1-671b/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-deepseek-r1-671b-service" -description = "Self-host deepseek-ai/DeepSeek-R1 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/deepseek-r1-671b/requirements.txt b/deepseek-r1-671b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-671b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-671b/service.py b/deepseek-r1-671b/service.py index 51f517f..c835b7b 100644 --- a/deepseek-r1-671b/service.py +++ b/deepseek-r1-671b/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1", "tensor_parallel_size": 16, "trust_remote_code": True} SERVICE_CONFIG = { - "name": "deepseek-r1", + "name": "bentovllm-deepseek-r1-671b-service", "traffic": {"timeout": 300}, "resources": {"gpu": 16, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-llama3.1-8b-tool-calling/.bentoignore b/deepseek-r1-distill-llama3.1-8b-tool-calling/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-llama3.1-8b-tool-calling/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-llama3.1-8b-tool-calling/LICENSE b/deepseek-r1-distill-llama3.1-8b-tool-calling/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-llama3.1-8b-tool-calling/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-llama3.1-8b-tool-calling/pyproject.toml b/deepseek-r1-distill-llama3.1-8b-tool-calling/pyproject.toml deleted file mode 100644 index c6e72dd..0000000 --- a/deepseek-r1-distill-llama3.1-8b-tool-calling/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-llama3.1-8b-tool-calling-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Llama-8B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-llama3.1-8b-tool-calling/requirements.txt b/deepseek-r1-distill-llama3.1-8b-tool-calling/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-llama3.1-8b-tool-calling/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-llama3.1-8b-tool-calling/service.py b/deepseek-r1-distill-llama3.1-8b-tool-calling/service.py index d87bbd3..9e5e221 100644 --- a/deepseek-r1-distill-llama3.1-8b-tool-calling/service.py +++ b/deepseek-r1-distill-llama3.1-8b-tool-calling/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "max_model_len": 4096} SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-llama3.1-8b-tool-calling-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-tesla-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-llama3.1-8b/.bentoignore b/deepseek-r1-distill-llama3.1-8b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-llama3.1-8b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-llama3.1-8b/LICENSE b/deepseek-r1-distill-llama3.1-8b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-llama3.1-8b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-llama3.1-8b/pyproject.toml b/deepseek-r1-distill-llama3.1-8b/pyproject.toml deleted file mode 100644 index 5cf0d0c..0000000 --- a/deepseek-r1-distill-llama3.1-8b/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-llama3.1-8b-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Llama-8B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-llama3.1-8b/requirements.txt b/deepseek-r1-distill-llama3.1-8b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-llama3.1-8b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-llama3.1-8b/service.py b/deepseek-r1-distill-llama3.1-8b/service.py index 9f78c1c..f99bea9 100644 --- a/deepseek-r1-distill-llama3.1-8b/service.py +++ b/deepseek-r1-distill-llama3.1-8b/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", "max_model_len": 4096} SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-llama3.1-8b-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-tesla-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-llama3.3-70b/.bentoignore b/deepseek-r1-distill-llama3.3-70b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-llama3.3-70b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-llama3.3-70b/LICENSE b/deepseek-r1-distill-llama3.3-70b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-llama3.3-70b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-llama3.3-70b/pyproject.toml b/deepseek-r1-distill-llama3.3-70b/pyproject.toml deleted file mode 100644 index fb3786d..0000000 --- a/deepseek-r1-distill-llama3.3-70b/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-llama3.3-70b-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Llama-70B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-llama3.3-70b/requirements.txt b/deepseek-r1-distill-llama3.3-70b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-llama3.3-70b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-llama3.3-70b/service.py b/deepseek-r1-distill-llama3.3-70b/service.py index 26f0e2d..80d2ff9 100644 --- a/deepseek-r1-distill-llama3.3-70b/service.py +++ b/deepseek-r1-distill-llama3.3-70b/service.py @@ -12,7 +12,7 @@ "max_model_len": 8192, } SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-llama3.3-70b-service", "traffic": {"timeout": 300}, "resources": {"gpu": 2, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -28,11 +28,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-qwen2.5-14b/.bentoignore b/deepseek-r1-distill-qwen2.5-14b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-14b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-qwen2.5-14b/LICENSE b/deepseek-r1-distill-qwen2.5-14b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-qwen2.5-14b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-qwen2.5-14b/pyproject.toml b/deepseek-r1-distill-qwen2.5-14b/pyproject.toml deleted file mode 100644 index 44bfc12..0000000 --- a/deepseek-r1-distill-qwen2.5-14b/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-qwen2.5-14b-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Qwen-14B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-qwen2.5-14b/requirements.txt b/deepseek-r1-distill-qwen2.5-14b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-14b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-qwen2.5-14b/service.py b/deepseek-r1-distill-qwen2.5-14b/service.py index 9c4b5c7..9a95ad0 100644 --- a/deepseek-r1-distill-qwen2.5-14b/service.py +++ b/deepseek-r1-distill-qwen2.5-14b/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-qwen2.5-14b-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-qwen2.5-32b/.bentoignore b/deepseek-r1-distill-qwen2.5-32b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-32b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-qwen2.5-32b/LICENSE b/deepseek-r1-distill-qwen2.5-32b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-qwen2.5-32b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-qwen2.5-32b/pyproject.toml b/deepseek-r1-distill-qwen2.5-32b/pyproject.toml deleted file mode 100644 index 6f65a20..0000000 --- a/deepseek-r1-distill-qwen2.5-32b/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-qwen2.5-32b-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Qwen-32B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-qwen2.5-32b/requirements.txt b/deepseek-r1-distill-qwen2.5-32b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-32b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-qwen2.5-32b/service.py b/deepseek-r1-distill-qwen2.5-32b/service.py index 689c475..037ab58 100644 --- a/deepseek-r1-distill-qwen2.5-32b/service.py +++ b/deepseek-r1-distill-qwen2.5-32b/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-qwen2.5-32b-service", "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "traffic": {"timeout": 300}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-r1-distill-qwen2.5-7b-math/.bentoignore b/deepseek-r1-distill-qwen2.5-7b-math/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-7b-math/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-r1-distill-qwen2.5-7b-math/LICENSE b/deepseek-r1-distill-qwen2.5-7b-math/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-r1-distill-qwen2.5-7b-math/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-r1-distill-qwen2.5-7b-math/pyproject.toml b/deepseek-r1-distill-qwen2.5-7b-math/pyproject.toml deleted file mode 100644 index 2216a70..0000000 --- a/deepseek-r1-distill-qwen2.5-7b-math/pyproject.toml +++ /dev/null @@ -1,35 +0,0 @@ -[project] -name = "bentovllm-r1-qwen2.5-7b-math-service" -description = "Self-host deepseek-ai/DeepSeek-R1-Distill-Qwen-7B with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" -type = "deepseek-r1-distill" - diff --git a/deepseek-r1-distill-qwen2.5-7b-math/requirements.txt b/deepseek-r1-distill-qwen2.5-7b-math/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-r1-distill-qwen2.5-7b-math/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-r1-distill-qwen2.5-7b-math/service.py b/deepseek-r1-distill-qwen2.5-7b-math/service.py index 0be1e87..b8b4ade 100644 --- a/deepseek-r1-distill-qwen2.5-7b-math/service.py +++ b/deepseek-r1-distill-qwen2.5-7b-math/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "deepseek-r1-distill", + "name": "bentovllm-r1-qwen2.5-7b-math-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/deepseek-v3-671b/.bentoignore b/deepseek-v3-671b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/deepseek-v3-671b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/deepseek-v3-671b/LICENSE b/deepseek-v3-671b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/deepseek-v3-671b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/deepseek-v3-671b/pyproject.toml b/deepseek-v3-671b/pyproject.toml deleted file mode 100644 index 092641f..0000000 --- a/deepseek-v3-671b/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-deepseek-v3-671b-service" -description = "Self-host deepseek-ai/DeepSeek-V3 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/deepseek-v3-671b/requirements.txt b/deepseek-v3-671b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/deepseek-v3-671b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/deepseek-v3-671b/service.py b/deepseek-v3-671b/service.py index dd0d7b0..d18ec01 100644 --- a/deepseek-v3-671b/service.py +++ b/deepseek-v3-671b/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "deepseek-ai/DeepSeek-V3", "max_model_len": 2048, "tensor_parallel_size": 16} SERVICE_CONFIG = { - "name": "deepseek-v3", + "name": "bentovllm-deepseek-v3-671b-service", "traffic": {"timeout": 300}, "resources": {"gpu": 16, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/gemma2-27b-instruct/.bentoignore b/gemma2-27b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/gemma2-27b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/gemma2-27b-instruct/LICENSE b/gemma2-27b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/gemma2-27b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/gemma2-27b-instruct/README.md b/gemma2-27b-instruct/README.md index f2ac220..331f0ab 100644 --- a/gemma2-27b-instruct/README.md +++ b/gemma2-27b-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/gemma2-27b +cd BentoVLLM/gemma2-27b-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/gemma2-27b-instruct/pyproject.toml b/gemma2-27b-instruct/pyproject.toml deleted file mode 100644 index d16a54a..0000000 --- a/gemma2-27b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-gemma2-27b-instruct-service" -description = "Self-host google/gemma-2-27b-it with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/gemma2-27b-instruct/requirements.txt b/gemma2-27b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/gemma2-27b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/gemma2-27b-instruct/service.py b/gemma2-27b-instruct/service.py index 28b9986..496c50c 100644 --- a/gemma2-27b-instruct/service.py +++ b/gemma2-27b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "google/gemma-2-27b-it", "max_model_len": 2048, "dtype": "half"} SERVICE_CONFIG = { - "name": "gemma2", + "name": "bentovllm-gemma2-27b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "VLLM_ATTENTION_BACKEND", "value": "FLASHINFER"}, {"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -63,7 +59,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set offset = 1 %}\n{% else %}\n {% set offset = 0 %}\n{% endif %}\n\n{% for message in messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {{ '<|' + message['role'] + '|>\\n' + message['content'].strip() + '<|end|>' + '\\n' }}\n\n {% if loop.last and message['role'] == 'user' and add_generation_prompt %}\n {{ '<|assistant|>\\n' }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/gemma2-2b-instruct/.bentoignore b/gemma2-2b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/gemma2-2b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/gemma2-2b-instruct/LICENSE b/gemma2-2b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/gemma2-2b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/gemma2-2b-instruct/README.md b/gemma2-2b-instruct/README.md index f0a16d3..726fb8b 100644 --- a/gemma2-2b-instruct/README.md +++ b/gemma2-2b-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/gemma2-2b +cd BentoVLLM/gemma2-2b-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/gemma2-2b-instruct/pyproject.toml b/gemma2-2b-instruct/pyproject.toml deleted file mode 100644 index b0ecf2f..0000000 --- a/gemma2-2b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-gemma2-2b-instruct-service" -description = "Self-host google/gemma-2-2b-it with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/gemma2-2b-instruct/requirements.txt b/gemma2-2b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/gemma2-2b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/gemma2-2b-instruct/service.py b/gemma2-2b-instruct/service.py index 6531bc9..4c8c6c0 100644 --- a/gemma2-2b-instruct/service.py +++ b/gemma2-2b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "google/gemma-2-2b-it", "max_model_len": 2048, "dtype": "half"} SERVICE_CONFIG = { - "name": "gemma2", + "name": "bentovllm-gemma2-2b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -63,7 +59,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set offset = 1 %}\n{% else %}\n {% set offset = 0 %}\n{% endif %}\n\n{% for message in messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {{ '<|' + message['role'] + '|>\\n' + message['content'].strip() + '<|end|>' + '\\n' }}\n\n {% if loop.last and message['role'] == 'user' and add_generation_prompt %}\n {{ '<|assistant|>\\n' }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/gemma2-9b-instruct/.bentoignore b/gemma2-9b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/gemma2-9b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/gemma2-9b-instruct/LICENSE b/gemma2-9b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/gemma2-9b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/gemma2-9b-instruct/README.md b/gemma2-9b-instruct/README.md index 06dcbe5..d77166d 100644 --- a/gemma2-9b-instruct/README.md +++ b/gemma2-9b-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/gemma2-9b +cd BentoVLLM/gemma2-9b-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/gemma2-9b-instruct/pyproject.toml b/gemma2-9b-instruct/pyproject.toml deleted file mode 100644 index a632c04..0000000 --- a/gemma2-9b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-gemma2-9b-instruct-service" -description = "Self-host google/gemma-2-9b-it with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/gemma2-9b-instruct/requirements.txt b/gemma2-9b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/gemma2-9b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/gemma2-9b-instruct/service.py b/gemma2-9b-instruct/service.py index f46f8e2..ef118cb 100644 --- a/gemma2-9b-instruct/service.py +++ b/gemma2-9b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "google/gemma-2-9b-it", "max_model_len": 2048, "dtype": "half"} SERVICE_CONFIG = { - "name": "gemma2", + "name": "bentovllm-gemma2-9b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "VLLM_ATTENTION_BACKEND", "value": "FLASHINFER"}, {"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -63,7 +59,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set offset = 1 %}\n{% else %}\n {% set offset = 0 %}\n{% endif %}\n\n{% for message in messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {{ '<|' + message['role'] + '|>\\n' + message['content'].strip() + '<|end|>' + '\\n' }}\n\n {% if loop.last and message['role'] == 'user' and add_generation_prompt %}\n {{ '<|assistant|>\\n' }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/jamba1.5-mini/.bentoignore b/jamba1.5-mini/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/jamba1.5-mini/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/jamba1.5-mini/LICENSE b/jamba1.5-mini/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/jamba1.5-mini/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/jamba1.5-mini/pyproject.toml b/jamba1.5-mini/pyproject.toml deleted file mode 100644 index 4947d47..0000000 --- a/jamba1.5-mini/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-jamba1.5-mini-service" -description = "Self-host ai21labs/AI21-Jamba-1.5-Mini with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/jamba1.5-mini/requirements.txt b/jamba1.5-mini/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/jamba1.5-mini/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/jamba1.5-mini/service.py b/jamba1.5-mini/service.py index 8a609a8..6a9b2a0 100644 --- a/jamba1.5-mini/service.py +++ b/jamba1.5-mini/service.py @@ -13,7 +13,7 @@ "tensor_parallel_size": 4, } SERVICE_CONFIG = { - "name": "jamba1.5", + "name": "bentovllm-jamba1.5-mini-service", "traffic": {"timeout": 300}, "resources": {"gpu": 4, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -29,11 +29,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/llama3.1-8b-instruct/.bentoignore b/llama3.1-8b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.1-8b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.1-8b-instruct/LICENSE b/llama3.1-8b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.1-8b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.1-8b-instruct/README.md b/llama3.1-8b-instruct/README.md index f6bb322..6067020 100644 --- a/llama3.1-8b-instruct/README.md +++ b/llama3.1-8b-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/llama3.1-8b +cd BentoVLLM/llama3.1-8b-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/llama3.1-8b-instruct/pyproject.toml b/llama3.1-8b-instruct/pyproject.toml deleted file mode 100644 index 361d4de..0000000 --- a/llama3.1-8b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.1-8b-instruct-service" -description = "Self-host meta-llama/Meta-Llama-3.1-8B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.1-8b-instruct/requirements.txt b/llama3.1-8b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.1-8b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.1-8b-instruct/service.py b/llama3.1-8b-instruct/service.py index 2798fea..fb9522c 100644 --- a/llama3.1-8b-instruct/service.py +++ b/llama3.1-8b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "meta-llama/Meta-Llama-3.1-8B-Instruct", "max_model_len": 2048, "dtype": "half"} SERVICE_CONFIG = { - "name": "llama3.1", + "name": "bentovllm-llama3.1-8b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-tesla-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,16 +24,12 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: model_id = ENGINE_CONFIG["model"] - model = bentoml.models.HuggingFaceModel(model_id, exclude=["*.pth"]) + model = bentoml.models.HuggingFaceModel(model_id) def __init__(self) -> None: from vllm import AsyncEngineArgs, AsyncLLMEngine diff --git a/llama3.2-11b-vision-instruct/.bentoignore b/llama3.2-11b-vision-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.2-11b-vision-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.2-11b-vision-instruct/LICENSE b/llama3.2-11b-vision-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.2-11b-vision-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.2-11b-vision-instruct/README.md b/llama3.2-11b-vision-instruct/README.md index 8e11796..c6102bd 100644 --- a/llama3.2-11b-vision-instruct/README.md +++ b/llama3.2-11b-vision-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/llama3.2-11b-vision +cd BentoVLLM/llama3.2-11b-vision-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/llama3.2-11b-vision-instruct/pyproject.toml b/llama3.2-11b-vision-instruct/pyproject.toml deleted file mode 100644 index 2a1dd07..0000000 --- a/llama3.2-11b-vision-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.2-11b-vision-instruct-service" -description = "Self-host meta-llama/Llama-3.2-11B-Vision-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.2-11b-vision-instruct/requirements.txt b/llama3.2-11b-vision-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.2-11b-vision-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.2-11b-vision-instruct/service.py b/llama3.2-11b-vision-instruct/service.py index f813b8f..f778ec4 100644 --- a/llama3.2-11b-vision-instruct/service.py +++ b/llama3.2-11b-vision-instruct/service.py @@ -14,7 +14,7 @@ "max_num_seqs": 16, } SERVICE_CONFIG = { - "name": "llama3.2", + "name": "bentovllm-llama3.2-11b-v-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -30,16 +30,12 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: model_id = ENGINE_CONFIG["model"] - model = bentoml.models.HuggingFaceModel(model_id, exclude=["*.pth"]) + model = bentoml.models.HuggingFaceModel(model_id) def __init__(self) -> None: from vllm import AsyncEngineArgs, AsyncLLMEngine diff --git a/llama3.2-1b-instruct/.bentoignore b/llama3.2-1b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.2-1b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.2-1b-instruct/LICENSE b/llama3.2-1b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.2-1b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.2-1b-instruct/pyproject.toml b/llama3.2-1b-instruct/pyproject.toml deleted file mode 100644 index 3f51b9d..0000000 --- a/llama3.2-1b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.2-1b-instruct-service" -description = "Self-host meta-llama/Llama-3.2-1B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.2-1b-instruct/requirements.txt b/llama3.2-1b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.2-1b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.2-1b-instruct/service.py b/llama3.2-1b-instruct/service.py index 31f32f8..a4bd8bf 100644 --- a/llama3.2-1b-instruct/service.py +++ b/llama3.2-1b-instruct/service.py @@ -6,9 +6,9 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -ENGINE_CONFIG = {"model": "meta-llama/Llama-3.2-1B-Instruct", "max_model_len": 16384} +ENGINE_CONFIG = {"model": "meta-llama/Llama-3.2-1B-Instruct", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "llama3.2", + "name": "bentovllm-llama3.2-1b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,16 +24,12 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: model_id = ENGINE_CONFIG["model"] - model = bentoml.models.HuggingFaceModel(model_id, exclude=["*.pth"]) + model = bentoml.models.HuggingFaceModel(model_id) def __init__(self) -> None: from vllm import AsyncEngineArgs, AsyncLLMEngine diff --git a/llama3.2-3b-instruct/.bentoignore b/llama3.2-3b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.2-3b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.2-3b-instruct/LICENSE b/llama3.2-3b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.2-3b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.2-3b-instruct/pyproject.toml b/llama3.2-3b-instruct/pyproject.toml deleted file mode 100644 index 8758f01..0000000 --- a/llama3.2-3b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.2-3b-instruct-service" -description = "Self-host meta-llama/Llama-3.2-3B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.2-3b-instruct/requirements.txt b/llama3.2-3b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.2-3b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.2-3b-instruct/service.py b/llama3.2-3b-instruct/service.py index 1a36128..93cc996 100644 --- a/llama3.2-3b-instruct/service.py +++ b/llama3.2-3b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "meta-llama/Llama-3.2-3B-Instruct", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "llama3.2", + "name": "bentovllm-llama3.2-3b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,16 +24,12 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: model_id = ENGINE_CONFIG["model"] - model = bentoml.models.HuggingFaceModel(model_id, exclude=["*.pth"]) + model = bentoml.models.HuggingFaceModel(model_id) def __init__(self) -> None: from vllm import AsyncEngineArgs, AsyncLLMEngine diff --git a/llama3.2-90b-vision-instruct/.bentoignore b/llama3.2-90b-vision-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.2-90b-vision-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.2-90b-vision-instruct/LICENSE b/llama3.2-90b-vision-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.2-90b-vision-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.2-90b-vision-instruct/README.md b/llama3.2-90b-vision-instruct/README.md index c414407..9ee327a 100644 --- a/llama3.2-90b-vision-instruct/README.md +++ b/llama3.2-90b-vision-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/llama3.2-90b-vision +cd BentoVLLM/llama3.2-90b-vision-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/llama3.2-90b-vision-instruct/pyproject.toml b/llama3.2-90b-vision-instruct/pyproject.toml deleted file mode 100644 index 6ac5b00..0000000 --- a/llama3.2-90b-vision-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.2-90b-vision-instruct-service" -description = "Self-host meta-llama/Llama-3.2-90B-Vision-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.2-90b-vision-instruct/requirements.txt b/llama3.2-90b-vision-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.2-90b-vision-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.2-90b-vision-instruct/service.py b/llama3.2-90b-vision-instruct/service.py index 9856200..905c598 100644 --- a/llama3.2-90b-vision-instruct/service.py +++ b/llama3.2-90b-vision-instruct/service.py @@ -14,7 +14,7 @@ "max_num_seqs": 16, } SERVICE_CONFIG = { - "name": "llama3.2", + "name": "bentovllm-llama3.2-90b-v-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 2, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -30,11 +30,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/llama3.3-70b-instruct/.bentoignore b/llama3.3-70b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/llama3.3-70b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/llama3.3-70b-instruct/LICENSE b/llama3.3-70b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/llama3.3-70b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/llama3.3-70b-instruct/README.md b/llama3.3-70b-instruct/README.md index 72926bb..cfa4ea9 100644 --- a/llama3.3-70b-instruct/README.md +++ b/llama3.3-70b-instruct/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/llama3.3-70b +cd BentoVLLM/llama3.3-70b-instruct # Recommend UV, and Python 3.11 uv venv diff --git a/llama3.3-70b-instruct/pyproject.toml b/llama3.3-70b-instruct/pyproject.toml deleted file mode 100644 index f45b55b..0000000 --- a/llama3.3-70b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-llama3.3-70b-instruct-service" -description = "Self-host meta-llama/Llama-3.3-70B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/llama3.3-70b-instruct/requirements.txt b/llama3.3-70b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/llama3.3-70b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/llama3.3-70b-instruct/service.py b/llama3.3-70b-instruct/service.py index 3f37540..af4c7d3 100644 --- a/llama3.3-70b-instruct/service.py +++ b/llama3.3-70b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "meta-llama/Llama-3.3-70B-Instruct", "max_model_len": 2048, "tensor_parallel_size": 2} SERVICE_CONFIG = { - "name": "llama3.3", + "name": "bentovllm-llama3.3-70b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 2, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -24,11 +24,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/ministral-8b-instruct-2410/.bentoignore b/ministral-8b-instruct-2410/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/ministral-8b-instruct-2410/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/ministral-8b-instruct-2410/LICENSE b/ministral-8b-instruct-2410/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/ministral-8b-instruct-2410/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/ministral-8b-instruct-2410/pyproject.toml b/ministral-8b-instruct-2410/pyproject.toml deleted file mode 100644 index 9962d20..0000000 --- a/ministral-8b-instruct-2410/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-ministral-8b-instruct-2410-service" -description = "Self-host mistralai/Ministral-8B-Instruct-2410 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/ministral-8b-instruct-2410/requirements.txt b/ministral-8b-instruct-2410/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/ministral-8b-instruct-2410/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/ministral-8b-instruct-2410/service.py b/ministral-8b-instruct-2410/service.py index 6abae31..36f831e 100644 --- a/ministral-8b-instruct-2410/service.py +++ b/ministral-8b-instruct-2410/service.py @@ -13,7 +13,7 @@ "max_model_len": 4096, } SERVICE_CONFIG = { - "name": "mistral-mini", + "name": "bentovllm-ministral-8b-instruct-2410-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "envs": [{"name": "HF_TOKEN"}], @@ -29,11 +29,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -68,7 +64,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set loop_messages = messages[1:] %}\n {% set system_message = messages[0]['content'].strip() + '\\n\\n' %}\n{% else %}\n {% set loop_messages = messages %}\n {% set system_message = '' %}\n{% endif %}\n\n{{ bos_token }}\n{% for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {% if loop.index0 == 0 %}\n {% set content = system_message + message['content'] %}\n {% else %}\n {% set content = message['content'] %}\n {% endif %}\n\n {% if message['role'] == 'user' %}\n {{ '[INST] ' + content.strip() + ' [/INST]' }}\n {% elif message['role'] == 'assistant' %}\n {{ ' ' + content.strip() + eos_token }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/mistral-large-123b-instruct-2407/.bentoignore b/mistral-large-123b-instruct-2407/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/mistral-large-123b-instruct-2407/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/mistral-large-123b-instruct-2407/LICENSE b/mistral-large-123b-instruct-2407/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/mistral-large-123b-instruct-2407/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/mistral-large-123b-instruct-2407/README.md b/mistral-large-123b-instruct-2407/README.md index 7d65ed7..15526d0 100644 --- a/mistral-large-123b-instruct-2407/README.md +++ b/mistral-large-123b-instruct-2407/README.md @@ -17,7 +17,7 @@ See [here](https://docs.bentoml.com/en/latest/examples/overview.html) for a full ```bash git clone https://github.com/bentoml/BentoVLLM.git -cd BentoVLLM/mistral-large-123b-instruct +cd BentoVLLM/mistral-large-123b-instruct-2407 # Recommend UV, and Python 3.11 uv venv diff --git a/mistral-large-123b-instruct-2407/pyproject.toml b/mistral-large-123b-instruct-2407/pyproject.toml deleted file mode 100644 index 394753b..0000000 --- a/mistral-large-123b-instruct-2407/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-mistral-large-123b-instruct-2407-service" -description = "Self-host mistralai/Mistral-Large-Instruct-2407 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/mistral-large-123b-instruct-2407/requirements.txt b/mistral-large-123b-instruct-2407/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/mistral-large-123b-instruct-2407/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/mistral-large-123b-instruct-2407/service.py b/mistral-large-123b-instruct-2407/service.py index ca66b9d..dbb4e17 100644 --- a/mistral-large-123b-instruct-2407/service.py +++ b/mistral-large-123b-instruct-2407/service.py @@ -13,7 +13,7 @@ "tokenizer_mode": "mistral", } SERVICE_CONFIG = { - "name": "mistral-large", + "name": "bentovllm-mistral-large-123b-instruct-2407-service", "traffic": {"timeout": 300}, "resources": {"gpu": 4, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -29,11 +29,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -68,7 +64,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set loop_messages = messages[1:] %}\n {% set system_message = messages[0]['content'].strip() + '\\n\\n' %}\n{% else %}\n {% set loop_messages = messages %}\n {% set system_message = '' %}\n{% endif %}\n\n{{ bos_token }}\n{% for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {% if loop.index0 == 0 %}\n {% set content = system_message + message['content'] %}\n {% else %}\n {% set content = message['content'] %}\n {% endif %}\n\n {% if message['role'] == 'user' %}\n {{ '[INST] ' + content.strip() + ' [/INST]' }}\n {% elif message['role'] == 'assistant' %}\n {{ ' ' + content.strip() + eos_token }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/mistral-small-24b-instruct-2501/.bentoignore b/mistral-small-24b-instruct-2501/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/mistral-small-24b-instruct-2501/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/mistral-small-24b-instruct-2501/LICENSE b/mistral-small-24b-instruct-2501/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/mistral-small-24b-instruct-2501/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/mistral-small-24b-instruct-2501/pyproject.toml b/mistral-small-24b-instruct-2501/pyproject.toml deleted file mode 100644 index 9e51bfd..0000000 --- a/mistral-small-24b-instruct-2501/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-mistral-small-24b-instruct-2501-service" -description = "Self-host mistralai/Mistral-Small-24B-Instruct-2501 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/mistral-small-24b-instruct-2501/requirements.txt b/mistral-small-24b-instruct-2501/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/mistral-small-24b-instruct-2501/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/mistral-small-24b-instruct-2501/service.py b/mistral-small-24b-instruct-2501/service.py index 9067b43..fbbca6d 100644 --- a/mistral-small-24b-instruct-2501/service.py +++ b/mistral-small-24b-instruct-2501/service.py @@ -12,7 +12,7 @@ "max_model_len": 4096, } SERVICE_CONFIG = { - "name": "mistral-small", + "name": "bentovllm-mistral-small-24b-instruct-2501-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -28,11 +28,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -67,7 +63,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set loop_messages = messages[1:] %}\n {% set system_message = messages[0]['content'].strip() + '\\n\\n' %}\n{% else %}\n {% set loop_messages = messages %}\n {% set system_message = '' %}\n{% endif %}\n\n{{ bos_token }}\n{% for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {% if loop.index0 == 0 %}\n {% set content = system_message + message['content'] %}\n {% else %}\n {% set content = message['content'] %}\n {% endif %}\n\n {% if message['role'] == 'user' %}\n {{ '[INST] ' + content.strip() + ' [/INST]' }}\n {% elif message['role'] == 'assistant' %}\n {{ ' ' + content.strip() + eos_token }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/mixtral-8x7b-v0.1/.bentoignore b/mixtral-8x7b-v0.1/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/mixtral-8x7b-v0.1/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/mixtral-8x7b-v0.1/LICENSE b/mixtral-8x7b-v0.1/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/mixtral-8x7b-v0.1/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/mixtral-8x7b-v0.1/pyproject.toml b/mixtral-8x7b-v0.1/pyproject.toml deleted file mode 100644 index 9cec181..0000000 --- a/mixtral-8x7b-v0.1/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-mixtral-8x7b-v0.1-service" -description = "Self-host mistralai/Mixtral-8x7B-Instruct-v0.1 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/mixtral-8x7b-v0.1/requirements.txt b/mixtral-8x7b-v0.1/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/mixtral-8x7b-v0.1/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/mixtral-8x7b-v0.1/service.py b/mixtral-8x7b-v0.1/service.py index 6bbfaaf..6605da2 100644 --- a/mixtral-8x7b-v0.1/service.py +++ b/mixtral-8x7b-v0.1/service.py @@ -13,7 +13,7 @@ "tokenizer_mode": "mistral", } SERVICE_CONFIG = { - "name": "mixtral", + "name": "bentovllm-mixtral-8x7b-v0.1-service", "traffic": {"timeout": 300}, "resources": {"gpu": 2, "gpu_type": "nvidia-a100-80gb"}, "envs": [{"name": "HF_TOKEN"}], @@ -29,11 +29,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/phi4-14b/.bentoignore b/phi4-14b/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/phi4-14b/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/phi4-14b/LICENSE b/phi4-14b/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/phi4-14b/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/phi4-14b/pyproject.toml b/phi4-14b/pyproject.toml deleted file mode 100644 index b91f2fa..0000000 --- a/phi4-14b/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-phi4-14b-service" -description = "Self-host microsoft/phi-4 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/phi4-14b/requirements.txt b/phi4-14b/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/phi4-14b/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/phi4-14b/service.py b/phi4-14b/service.py index de6cf33..a106f3e 100644 --- a/phi4-14b/service.py +++ b/phi4-14b/service.py @@ -7,7 +7,11 @@ logger.setLevel(logging.INFO) ENGINE_CONFIG = {"model": "microsoft/phi-4", "max_model_len": 8192} -SERVICE_CONFIG = {"name": "phi4", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}} +SERVICE_CONFIG = { + "name": "bentovllm-phi4-14b-service", + "traffic": {"timeout": 300}, + "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, +} SERVER_CONFIG = {} REQUIREMENTS_TXT = [] @@ -19,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: @@ -58,7 +58,7 @@ def __init__(self) -> None: args.max_log_len = 1000 args.response_role = "assistant" args.served_model_name = [self.model_id] - args.chat_template = "{% if messages[0]['role'] == 'system' %}\n {% set offset = 1 %}\n{% else %}\n {% set offset = 0 %}\n{% endif %}\n\n{% for message in messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == offset) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n\n {{ '<|' + message['role'] + '|>\\n' + message['content'].strip() + '<|end|>' + '\\n' }}\n\n {% if loop.last and message['role'] == 'user' and add_generation_prompt %}\n {{ '<|assistant|>\\n' }}\n {% endif %}\n{% endfor %}\n" + args.chat_template = None args.chat_template_content_format = "auto" args.lora_modules = None args.prompt_adapters = None diff --git a/pixtral-12b-2409/.bentoignore b/pixtral-12b-2409/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/pixtral-12b-2409/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/pixtral-12b-2409/LICENSE b/pixtral-12b-2409/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/pixtral-12b-2409/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/pixtral-12b-2409/pyproject.toml b/pixtral-12b-2409/pyproject.toml deleted file mode 100644 index 6873a1c..0000000 --- a/pixtral-12b-2409/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-pixtral-12b-2409-service" -description = "Self-host mistral-community/pixtral-12b-240910 with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/pixtral-12b-2409/requirements.txt b/pixtral-12b-2409/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/pixtral-12b-2409/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/pixtral-12b-2409/service.py b/pixtral-12b-2409/service.py index ede24e1..106095a 100644 --- a/pixtral-12b-2409/service.py +++ b/pixtral-12b-2409/service.py @@ -15,7 +15,7 @@ "max_model_len": 16384, } SERVICE_CONFIG = { - "name": "pixtral", + "name": "bentovllm-pixtral-12b-2409-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, } @@ -30,11 +30,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-14b-instruct/.bentoignore b/qwen2.5-14b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-14b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-14b-instruct/LICENSE b/qwen2.5-14b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-14b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-14b-instruct/pyproject.toml b/qwen2.5-14b-instruct/pyproject.toml deleted file mode 100644 index ac3529c..0000000 --- a/qwen2.5-14b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-14b-instruct-service" -description = "Self-host Qwen/Qwen2.5-14B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-14b-instruct/requirements.txt b/qwen2.5-14b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-14b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-14b-instruct/service.py b/qwen2.5-14b-instruct/service.py index 653e635..30ab06f 100644 --- a/qwen2.5-14b-instruct/service.py +++ b/qwen2.5-14b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-14B-Instruct", "max_model_len": 2048} SERVICE_CONFIG = { - "name": "qwen2.5", + "name": "bentovllm-qwen2.5-14b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, } @@ -23,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-32b-instruct/.bentoignore b/qwen2.5-32b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-32b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-32b-instruct/LICENSE b/qwen2.5-32b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-32b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-32b-instruct/pyproject.toml b/qwen2.5-32b-instruct/pyproject.toml deleted file mode 100644 index 0b80299..0000000 --- a/qwen2.5-32b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-32b-instruct-service" -description = "Self-host Qwen/Qwen2.5-32B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-32b-instruct/requirements.txt b/qwen2.5-32b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-32b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-32b-instruct/service.py b/qwen2.5-32b-instruct/service.py index 63d5710..0f4787e 100644 --- a/qwen2.5-32b-instruct/service.py +++ b/qwen2.5-32b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-32B-Instruct", "max_model_len": 2048} SERVICE_CONFIG = { - "name": "qwen2.5", + "name": "bentovllm-qwen2.5-32b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, } @@ -23,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-72b-instruct/.bentoignore b/qwen2.5-72b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-72b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-72b-instruct/LICENSE b/qwen2.5-72b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-72b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-72b-instruct/pyproject.toml b/qwen2.5-72b-instruct/pyproject.toml deleted file mode 100644 index 5e0269a..0000000 --- a/qwen2.5-72b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-72b-instruct-service" -description = "Self-host Qwen/Qwen2.5-72B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-72b-instruct/requirements.txt b/qwen2.5-72b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-72b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-72b-instruct/service.py b/qwen2.5-72b-instruct/service.py index ea51677..89703bb 100644 --- a/qwen2.5-72b-instruct/service.py +++ b/qwen2.5-72b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-72B-Instruct", "max_model_len": 2048} SERVICE_CONFIG = { - "name": "qwen2.5", + "name": "bentovllm-qwen2.5-72b-instruct-service", "traffic": {"timeout": 300}, "resources": {"gpu": 2, "gpu_type": "nvidia-a100-80gb"}, } @@ -23,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-7b-instruct/.bentoignore b/qwen2.5-7b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-7b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-7b-instruct/LICENSE b/qwen2.5-7b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-7b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-7b-instruct/pyproject.toml b/qwen2.5-7b-instruct/pyproject.toml deleted file mode 100644 index a77bc47..0000000 --- a/qwen2.5-7b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-7b-instruct-service" -description = "Self-host Qwen/Qwen2.5-7B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-7b-instruct/requirements.txt b/qwen2.5-7b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-7b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-7b-instruct/service.py b/qwen2.5-7b-instruct/service.py index c9ce61b..00706bf 100644 --- a/qwen2.5-7b-instruct/service.py +++ b/qwen2.5-7b-instruct/service.py @@ -7,7 +7,11 @@ logger.setLevel(logging.INFO) ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-7B-Instruct", "max_model_len": 2048} -SERVICE_CONFIG = {"name": "qwen2.5", "traffic": {"timeout": 300}, "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}} +SERVICE_CONFIG = { + "name": "bentovllm-qwen2.5-7b-instruct-service", + "traffic": {"timeout": 300}, + "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, +} SERVER_CONFIG = {} REQUIREMENTS_TXT = [] @@ -19,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-coder-32b-instruct/.bentoignore b/qwen2.5-coder-32b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-coder-32b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-coder-32b-instruct/LICENSE b/qwen2.5-coder-32b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-coder-32b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-coder-32b-instruct/pyproject.toml b/qwen2.5-coder-32b-instruct/pyproject.toml deleted file mode 100644 index 4163fde..0000000 --- a/qwen2.5-coder-32b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-coder-32b-instruct-service" -description = "Self-host Qwen/Qwen2.5-Coder-32B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-coder-32b-instruct/requirements.txt b/qwen2.5-coder-32b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-coder-32b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-coder-32b-instruct/service.py b/qwen2.5-coder-32b-instruct/service.py index 8ec6903..3622e5d 100644 --- a/qwen2.5-coder-32b-instruct/service.py +++ b/qwen2.5-coder-32b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-Coder-32B-Instruct", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "qwen2.5-coder", + "name": "bentovllm-qwen2.5-coder-32b-instruct-service", "resources": {"gpu": 1, "gpu_type": "nvidia-a100-80gb"}, "traffic": {"timeout": 300}, } @@ -23,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5-coder-7b-instruct/.bentoignore b/qwen2.5-coder-7b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5-coder-7b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5-coder-7b-instruct/LICENSE b/qwen2.5-coder-7b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5-coder-7b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5-coder-7b-instruct/pyproject.toml b/qwen2.5-coder-7b-instruct/pyproject.toml deleted file mode 100644 index a4cf2e6..0000000 --- a/qwen2.5-coder-7b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5-coder-7b-instruct-service" -description = "Self-host Qwen/Qwen2.5-Coder-7B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5-coder-7b-instruct/requirements.txt b/qwen2.5-coder-7b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5-coder-7b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5-coder-7b-instruct/service.py b/qwen2.5-coder-7b-instruct/service.py index 1329663..336380f 100644 --- a/qwen2.5-coder-7b-instruct/service.py +++ b/qwen2.5-coder-7b-instruct/service.py @@ -8,7 +8,7 @@ ENGINE_CONFIG = {"model": "Qwen/Qwen2.5-Coder-7B-Instruct", "max_model_len": 8192} SERVICE_CONFIG = { - "name": "qwen2.5-coder", + "name": "bentovllm-qwen2.5-coder-7b-instruct-service", "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "traffic": {"timeout": 300}, } @@ -23,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5vl-3b-instruct/.bentoignore b/qwen2.5vl-3b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5vl-3b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5vl-3b-instruct/LICENSE b/qwen2.5vl-3b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5vl-3b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5vl-3b-instruct/pyproject.toml b/qwen2.5vl-3b-instruct/pyproject.toml deleted file mode 100644 index 57d9e22..0000000 --- a/qwen2.5vl-3b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5vl-3b-instruct-service" -description = "Self-host Qwen/Qwen2.5-VL-3B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5vl-3b-instruct/requirements.txt b/qwen2.5vl-3b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5vl-3b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5vl-3b-instruct/service.py b/qwen2.5vl-3b-instruct/service.py index b264263..7fe2f1e 100644 --- a/qwen2.5vl-3b-instruct/service.py +++ b/qwen2.5vl-3b-instruct/service.py @@ -7,7 +7,11 @@ logger.setLevel(logging.INFO) ENGINE_CONFIG = {"max_model_len": 2048, "model": "Qwen/Qwen2.5-VL-3B-Instruct"} -SERVICE_CONFIG = {"name": "qwen2.5vl", "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "traffic": {"timeout": 300}} +SERVICE_CONFIG = { + "name": "bentovllm-qwen2.5vl-3b-instruct-service", + "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, + "traffic": {"timeout": 300}, +} SERVER_CONFIG = {} REQUIREMENTS_TXT = ["qwen-vl-utils[decord]==0.0.8"] @@ -19,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/qwen2.5vl-7b-instruct/.bentoignore b/qwen2.5vl-7b-instruct/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/qwen2.5vl-7b-instruct/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/qwen2.5vl-7b-instruct/LICENSE b/qwen2.5vl-7b-instruct/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/qwen2.5vl-7b-instruct/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/qwen2.5vl-7b-instruct/pyproject.toml b/qwen2.5vl-7b-instruct/pyproject.toml deleted file mode 100644 index ca45b94..0000000 --- a/qwen2.5vl-7b-instruct/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-qwen2.5vl-7b-instruct-service" -description = "Self-host Qwen/Qwen2.5-VL-7B-Instruct with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" diff --git a/qwen2.5vl-7b-instruct/requirements.txt b/qwen2.5vl-7b-instruct/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/qwen2.5vl-7b-instruct/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/qwen2.5vl-7b-instruct/service.py b/qwen2.5vl-7b-instruct/service.py index c8e0945..fbc000d 100644 --- a/qwen2.5vl-7b-instruct/service.py +++ b/qwen2.5vl-7b-instruct/service.py @@ -7,7 +7,11 @@ logger.setLevel(logging.INFO) ENGINE_CONFIG = {"max_model_len": 2048, "model": "Qwen/Qwen2.5-VL-7B-Instruct"} -SERVICE_CONFIG = {"name": "qwen2.5vl", "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, "traffic": {"timeout": 300}} +SERVICE_CONFIG = { + "name": "bentovllm-qwen2.5vl-7b-instruct-service", + "resources": {"gpu": 1, "gpu_type": "nvidia-l4"}, + "traffic": {"timeout": 300}, +} SERVER_CONFIG = {} REQUIREMENTS_TXT = ["qwen-vl-utils[decord]==0.0.8"] @@ -19,11 +23,7 @@ @bentoml.service( **SERVICE_CONFIG, image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") + .requirements_file("requirements.txt") .python_packages(*REQUIREMENTS_TXT), ) class VLLM: diff --git a/{{cookiecutter.model_name}}/.bentoignore b/{{cookiecutter.model_name}}/.bentoignore new file mode 100644 index 0000000..d9cf115 --- /dev/null +++ b/{{cookiecutter.model_name}}/.bentoignore @@ -0,0 +1,5 @@ +__pycache__/ +*.py[cod] +*$py.class +.ipynb_checkpoints +venv/ diff --git a/{{cookiecutter.model_name}}/LICENSE b/{{cookiecutter.model_name}}/LICENSE deleted file mode 100644 index 231bcb0..0000000 --- a/{{cookiecutter.model_name}}/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2020 Atalaya Tech. Inc - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/{{cookiecutter.model_name}}/pyproject.toml b/{{cookiecutter.model_name}}/pyproject.toml deleted file mode 100644 index b864f67..0000000 --- a/{{cookiecutter.model_name}}/pyproject.toml +++ /dev/null @@ -1,33 +0,0 @@ -[project] -name = "bentovllm-{{cookiecutter.model_name}}-service" -description = "Self-host {{cookiecutter.model_id}} with vLLM and BentoML" -readme = "README.md" -requires-python = ">=3.10" -license = { text = "Apache-2.0" } -authors = [{ name = "BentoML Team", email = "contact@bentoml.com" }] -dependencies = [ - "bentoml>=1.3.20", - "vllm==0.7.1", - "kantoku>=0.18.1", - "openai>=1.61.0", - "pyyaml", - "Pillow", - "flashinfer-python", -] -version = "0.0.0" - -[project.urls] -Website = "https://bentoml.com" -Documentation = "https://docs.bentoml.com" -GitHub = "https://github.com/bentoml/BentoVLLM" -Twitter = "https://twitter.com/bentomlai" -Tracker = "https://github.com/bentoml/BentoVLLM" - -[tool.bentoml.build] -service = "service:VLLM" -include = ["LICENCE", "*.py", "*.toml", "*.md"] - -[tool.bentoml.build.labels] -owner = "bentoml-team" -stage = "prebuilt" -registry = "huggingface" \ No newline at end of file diff --git a/{{cookiecutter.model_name}}/requirements.txt b/{{cookiecutter.model_name}}/requirements.txt new file mode 100644 index 0000000..c1c019c --- /dev/null +++ b/{{cookiecutter.model_name}}/requirements.txt @@ -0,0 +1,7 @@ +bentoml>=1.3.20 +vllm==0.7.1 +kantoku>=0.18.1 +openai>=1.61.0 +pyyaml +Pillow +flashinfer-python diff --git a/{{cookiecutter.model_name}}/service.py b/{{cookiecutter.model_name}}/service.py index de66382..854a443 100644 --- a/{{cookiecutter.model_name}}/service.py +++ b/{{cookiecutter.model_name}}/service.py @@ -16,13 +16,7 @@ @bentoml.asgi_app(openai_api_app, path="/v1") @bentoml.service( **SERVICE_CONFIG, - image=bentoml.images.PythonImage(python_version="3.11") - .python_packages("vllm==0.7.1\n") - .python_packages("pyyaml\n") - .python_packages("Pillow\n") - .python_packages("openai\n") - .python_packages("bentoml>=1.3.20\n") - .python_packages(*REQUIREMENTS_TXT), + image=bentoml.images.PythonImage(python_version="3.11").requirements_file("requirements.txt").python_packages(*REQUIREMENTS_TXT) ) class VLLM: model_id = ENGINE_CONFIG["model"]