From bffb11b1526f9c15f09e33fa2d688beaf0c1731e Mon Sep 17 00:00:00 2001 From: hhzhang16 <54051230+hhzhang16@users.noreply.github.com> Date: Mon, 14 Oct 2024 21:29:19 -0700 Subject: [PATCH] [Bugfix] Update InternVL input mapper to support image embeds (#9351) Signed-off-by: Sumit Dubey --- vllm/model_executor/models/internvl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index 9024831df543c..6adb1e29d6568 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -342,6 +342,8 @@ def input_mapper( elif is_list_of(data, Image.Image): # we can't stack here because images may have different num_patches data = [image_pixel_values_mapper(img) for img in data] + else: + return MultiModalInputs({"image_embeds": data}) model_config = ctx.model_config tokenizer = cached_get_tokenizer( model_config.tokenizer,