diff --git a/vllm/entrypoints/api_server.py b/vllm/entrypoints/api_server.py index b1e713f9d4f57..8e8de51148686 100644 --- a/vllm/entrypoints/api_server.py +++ b/vllm/entrypoints/api_server.py @@ -76,7 +76,7 @@ async def stream_results() -> AsyncGenerator[bytes, None]: text_outputs = [ output.text for output in request_output.outputs ] - output_tokens = [output.token_ids for output in request_output.outputs] + output_tokens = [list(output.token_ids) for output in request_output.outputs] logprobs = [[{k: asdict(v) for k, v in logprobs.items()} for logprobs in output.logprobs] if output.logprobs is not None else None for output in request_output.outputs] ret = {"text": text_outputs, "output_token_ids": output_tokens, "logprobs": logprobs}