23
23
# yapf: enable
24
24
# pydantic needs the TypedDict from typing_extensions
25
25
from pydantic import ConfigDict
26
+ from transformers import PreTrainedTokenizer , PreTrainedTokenizerFast
26
27
from typing_extensions import Required , TypeAlias , TypedDict
27
28
28
29
from vllm .config import ModelConfig
31
32
from vllm .multimodal .utils import (async_get_and_parse_audio ,
32
33
async_get_and_parse_image ,
33
34
get_and_parse_audio , get_and_parse_image )
34
- from vllm .transformers_utils .tokenizer import AnyTokenizer
35
+ from vllm .transformers_utils .tokenizer import AnyTokenizer , MistralTokenizer
35
36
36
37
logger = init_logger (__name__ )
37
38
@@ -379,6 +380,9 @@ def _parse_chat_message_content_parts(
379
380
audio_url = _AudioParser (part )["audio_url" ]
380
381
381
382
mm_parser .parse_audio (audio_url ["url" ])
383
+ elif part_type == "refusal" :
384
+ text = _RefusalParser (part )["refusal" ]
385
+ texts .append (text )
382
386
else :
383
387
raise NotImplementedError (f"Unknown part type: { part_type } " )
384
388
@@ -433,6 +437,21 @@ def _parse_chat_message_content(
433
437
return result
434
438
435
439
440
+ def _postprocess_messages (messages : List [ConversationMessage ]) -> None :
441
+ # per the Transformers docs & maintainers, tool call arguments in
442
+ # assistant-role messages with tool_calls need to be dicts not JSON str -
443
+ # this is how tool-use chat templates will expect them moving forwards
444
+ # so, for messages that have tool_calls, parse the string (which we get
445
+ # from openAI format) to dict
446
+ for message in messages :
447
+ if (message ["role" ] == "assistant" and "tool_calls" in message
448
+ and isinstance (message ["tool_calls" ], list )):
449
+
450
+ for item in message ["tool_calls" ]:
451
+ item ["function" ]["arguments" ] = json .loads (
452
+ item ["function" ]["arguments" ])
453
+
454
+
436
455
def parse_chat_messages (
437
456
messages : List [ChatCompletionMessageParam ],
438
457
model_config : ModelConfig ,
@@ -446,6 +465,8 @@ def parse_chat_messages(
446
465
447
466
conversation .extend (sub_messages )
448
467
468
+ _postprocess_messages (conversation )
469
+
449
470
return conversation , mm_tracker .all_mm_data ()
450
471
451
472
@@ -462,41 +483,41 @@ def parse_chat_messages_futures(
462
483
463
484
conversation .extend (sub_messages )
464
485
486
+ _postprocess_messages (conversation )
487
+
465
488
return conversation , mm_tracker .all_mm_data ()
466
489
467
490
468
- def apply_chat_template (
469
- tokenizer : AnyTokenizer ,
491
+ def apply_hf_chat_template (
492
+ tokenizer : Union [ PreTrainedTokenizer , PreTrainedTokenizerFast ] ,
470
493
conversation : List [ConversationMessage ],
471
494
chat_template : Optional [str ],
472
495
* ,
473
496
tokenize : bool = False , # Different from HF's default
474
497
** kwargs : Any ,
475
- ) -> Union [ str , List [ int ]] :
498
+ ) -> str :
476
499
if chat_template is None and tokenizer .chat_template is None :
477
500
raise ValueError (
478
501
"As of transformers v4.44, default chat template is no longer "
479
502
"allowed, so you must provide a chat template if the tokenizer "
480
503
"does not define one." )
481
504
482
- # per the Transformers docs & maintainers, tool call arguments in
483
- # assistant-role messages with tool_calls need to be dicts not JSON str -
484
- # this is how tool-use chat templates will expect them moving forwards
485
- # so, for messages that have tool_calls, parse the string (which we get
486
- # from openAI format) to dict
487
- for message in conversation :
488
- if (message ["role" ] == "assistant" and "tool_calls" in message
489
- and isinstance (message ["tool_calls" ], list )):
505
+ return tokenizer .apply_chat_template (
506
+ conversation = conversation , # type: ignore[arg-type]
507
+ chat_template = chat_template ,
508
+ tokenize = tokenize ,
509
+ ** kwargs ,
510
+ )
490
511
491
- for i in range (len (message ["tool_calls" ])):
492
- args : str = message ["tool_calls" ][i ]["function" ]["arguments" ]
493
- parsed_args : Dict = json .loads (args )
494
- message ["tool_calls" ][i ]["function" ]["arguments" ] = parsed_args
495
512
496
- prompt = tokenizer .apply_chat_template (
497
- conversation = conversation ,
513
+ def apply_mistral_chat_template (
514
+ tokenizer : MistralTokenizer ,
515
+ messages : List [ChatCompletionMessageParam ],
516
+ chat_template : Optional [str ],
517
+ ** kwargs : Any ,
518
+ ) -> List [int ]:
519
+ return tokenizer .apply_chat_template (
520
+ messages = messages ,
498
521
chat_template = chat_template ,
499
- tokenize = tokenize ,
500
522
** kwargs ,
501
523
)
502
- return prompt
0 commit comments