Skip to content

Commit 1dfdc13

Browse files
DarkLight1337cooleel
authored andcommitted
[Bugfix] Fix PP for ChatGLM and Molmo (vllm-project#9422)
Signed-off-by: Shanshan Wang <shanshan.wang@h2o.ai>
1 parent c4d205b commit 1dfdc13

File tree

7 files changed

+197
-124
lines changed

7 files changed

+197
-124
lines changed

docs/source/models/supported_models.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,7 @@ Text Generation
431431
-
432432
* - :code:`MolmoForCausalLM`
433433
- Molmo
434-
- Image
434+
- T + I
435435
- :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
436436
-
437437
- ✅︎

tests/distributed/test_pipeline_parallel.py

+18-19
Original file line numberDiff line numberDiff line change
@@ -118,11 +118,8 @@ def iter_params(self, model_name: str):
118118
# The values displayed here are only a rough indicator of the size of the model
119119

120120
# yapf: disable
121-
GENERATION_MODEL_SETTINGS = {
122-
# [DETAILED TESTS]
123-
"meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(),
124-
"microsoft/Phi-3-mini-4k-instruct": PPTestSettings.detailed(trust_remote_code=True, multi_node_only=True), # noqa: E501
125-
# [FAST TESTS]
121+
TEXT_GENERATION_MODELS = {
122+
# [Decoder-only]
126123
# Uses Llama
127124
# "BAAI/AquilaChat-7B": PPTestSettings.fast(),
128125
"Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(tp_base=8, trust_remote_code=True), # noqa: E501
@@ -151,6 +148,7 @@ def iter_params(self, model_name: str):
151148
"core42/jais-13b-chat": PPTestSettings.fast(),
152149
# TODO: Implement PP
153150
# "ai21labs/AI21-Jamba-1.5-Mini": PPTestSettings.fast(),
151+
"meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(),
154152
"openbmb/MiniCPM-2B-sft-bf16": PPTestSettings.fast(trust_remote_code=True),
155153
"openbmb/MiniCPM3-4B": PPTestSettings.fast(trust_remote_code=True),
156154
# Uses Llama
@@ -163,6 +161,7 @@ def iter_params(self, model_name: str):
163161
"facebook/opt-iml-max-1.3b": PPTestSettings.fast(),
164162
"OrionStarAI/Orion-14B-Chat": PPTestSettings.fast(trust_remote_code=True),
165163
"microsoft/phi-2": PPTestSettings.fast(),
164+
"microsoft/Phi-3-mini-4k-instruct": PPTestSettings.detailed(trust_remote_code=True, multi_node_only=True), # noqa: E501
166165
"microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(trust_remote_code=True), # noqa: E501
167166
"microsoft/Phi-3.5-MoE-instruct": PPTestSettings.fast(trust_remote_code=True), # noqa: E501
168167
"adept/persimmon-8b-chat": PPTestSettings.fast(),
@@ -174,40 +173,40 @@ def iter_params(self, model_name: str):
174173
"upstage/solar-pro-preview-instruct": PPTestSettings.fast(tp_base=2),
175174
# FIXME: Cannot load tokenizer in latest transformers version
176175
# "xverse/XVERSE-7B-Chat": PPTestSettings.fast(trust_remote_code=True),
176+
# [Encoder-only]
177+
# TODO: Implement PP
178+
# "facebook/bart-base": PPTestSettings.fast(),
177179
}
178180

179-
EMBEDDING_MODEL_SETTINGS = { # type: ignore[var-annotated]
180-
# [FAST TESTS]
181+
EMBEDDING_MODELS = { # type: ignore[var-annotated]
182+
# [Text-only]
181183
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(),
182184
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(),
183185
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(tp_base=4, trust_remote_code=True), # noqa: E501
184186
}
185187

186-
MULTIMODAL_MODEL_SETTINGS = {
187-
# [FAST TESTS]
188+
MULTIMODAL_MODELS = {
189+
# [Decoder-only]
188190
"Salesforce/blip2-opt-2.7b": PPTestSettings.fast(),
189191
"facebook/chameleon-7b": PPTestSettings.fast(),
190192
"adept/fuyu-8b": PPTestSettings.fast(),
193+
"THUDM/glm-4v-9b": PPTestSettings.fast(trust_remote_code=True),
191194
"OpenGVLab/InternVL2-1B": PPTestSettings.fast(trust_remote_code=True),
192195
"llava-hf/llava-1.5-7b-hf": PPTestSettings.fast(),
193196
"llava-hf/llava-v1.6-mistral-7b-hf": PPTestSettings.fast(),
194197
"llava-hf/LLaVA-NeXT-Video-7B-hf": PPTestSettings.fast(),
195198
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf": PPTestSettings.fast(),
196199
"openbmb/MiniCPM-Llama3-V-2_5": PPTestSettings.fast(trust_remote_code=True),
197-
# TODO: Implement PP
198-
# "meta-llama/Llama-3.2-11B-Vision-Instruct": PPTestSettings.fast(),
200+
"allenai/Molmo-7B-D-0924": PPTestSettings.fast(trust_remote_code=True),
199201
"microsoft/Phi-3-vision-128k-instruct": PPTestSettings.fast(trust_remote_code=True), # noqa: E501
200202
"mistralai/Pixtral-12B-2409": PPTestSettings.fast(tp_base=2, tokenizer_mode="mistral"), # noqa: E501
201203
"Qwen/Qwen-VL-Chat": PPTestSettings.fast(trust_remote_code=True),
202204
"Qwen/Qwen2-Audio-7B-Instruct": PPTestSettings.fast(),
203205
"Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(),
204206
"fixie-ai/ultravox-v0_3": PPTestSettings.fast(),
205-
}
206-
207-
CONDITIONAL_GENERATION_MODEL_SETTINGS = { # type: ignore[var-annotated]
208-
# [FAST TESTS]
207+
# [Encoder-decoder]
209208
# TODO: Implement PP
210-
# "facebook/bart-base": PPTestSettings.fast(),
209+
# "meta-llama/Llama-3.2-11B-Vision-Instruct": PPTestSettings.fast(),
211210
}
212211
# yapf: enable
213212

@@ -323,7 +322,7 @@ def _compare_tp(
323322
("model_name", "parallel_setup", "distributed_backend", "task",
324323
"test_options"),
325324
[
326-
params for model_name, settings in GENERATION_MODEL_SETTINGS.items()
325+
params for model_name, settings in TEXT_GENERATION_MODELS.items()
327326
for params in settings.iter_params(model_name)
328327
if model_name in TEST_MODELS
329328
],
@@ -350,7 +349,7 @@ def test_tp_language_generation(
350349
("model_name", "parallel_setup", "distributed_backend", "task",
351350
"test_options"),
352351
[
353-
params for model_name, settings in EMBEDDING_MODEL_SETTINGS.items()
352+
params for model_name, settings in EMBEDDING_MODELS.items()
354353
for params in settings.iter_params(model_name)
355354
if model_name in TEST_MODELS
356355
],
@@ -377,7 +376,7 @@ def test_tp_language_embedding(
377376
("model_name", "parallel_setup", "distributed_backend", "task",
378377
"test_options"),
379378
[
380-
params for model_name, settings in MULTIMODAL_MODEL_SETTINGS.items()
379+
params for model_name, settings in MULTIMODAL_MODELS.items()
381380
for params in settings.iter_params(model_name)
382381
if model_name in TEST_MODELS
383382
],

0 commit comments

Comments
 (0)