Skip to content

Commit d10a8cd

Browse files
Isotr0pyrickyyx
authored andcommitted
[Bugfix] Fix tensor parallel for qwen2 classification model (vllm-project#10297)
Signed-off-by: Isotr0py <2037008807@qq.com> Signed-off-by: rickyx <rickyx@anyscale.com>
1 parent 7977612 commit d10a8cd

File tree

2 files changed

+9
-4
lines changed

2 files changed

+9
-4
lines changed

tests/models/embedding/language/test_cls_models.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ def test_classification_models(
2121
model: str,
2222
dtype: str,
2323
) -> None:
24+
with vllm_runner(model, dtype=dtype) as vllm_model:
25+
vllm_outputs = vllm_model.classify(example_prompts)
26+
2427
with hf_runner(model,
2528
dtype=dtype,
2629
auto_cls=AutoModelForSequenceClassification) as hf_model:
2730
hf_outputs = hf_model.classify(example_prompts)
2831

29-
with vllm_runner(model, dtype=dtype) as vllm_model:
30-
vllm_outputs = vllm_model.classify(example_prompts)
31-
3232
print(hf_outputs, vllm_outputs)
3333

3434
# check logits difference

vllm/model_executor/models/qwen2_cls.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
6969
self.model = Qwen2Model(vllm_config=vllm_config,
7070
prefix=maybe_prefix(prefix, "model"))
7171

72+
# hidden_states from Qwen2Model has been reduced,
73+
# the input of score layer is not parallelized.
7274
self.score = RowParallelLinear(config.hidden_size,
7375
config.num_labels,
74-
quant_config=quant_config)
76+
quant_config=quant_config,
77+
input_is_parallel=False,
78+
bias=False,
79+
prefix=maybe_prefix(prefix, "score"))
7580
self._pooler = Pooler.from_config_with_defaults(
7681
pooler_config,
7782
pooling_type=PoolingType.LAST,

0 commit comments

Comments
 (0)