3
3
import asyncio
4
4
import time
5
5
import uuid
6
- from typing import Dict , List
6
+ from contextlib import ExitStack
7
+ from typing import Dict , List , Optional
7
8
8
9
import pytest
9
10
from transformers import AutoTokenizer
14
15
from vllm .platforms import current_platform
15
16
from vllm .usage .usage_lib import UsageContext
16
17
from vllm .v1 .engine import EngineCoreRequest
17
- from vllm .v1 .engine .core_client import EngineCoreClient
18
+ from vllm .v1 .engine .core import EngineCore
19
+ from vllm .v1 .engine .core_client import (AsyncMPClient , EngineCoreClient ,
20
+ SyncMPClient )
18
21
from vllm .v1 .executor .abstract import Executor
19
22
20
23
if not current_platform .is_cuda ():
@@ -63,7 +66,7 @@ def loop_until_done(client: EngineCoreClient, outputs: Dict):
63
66
async def loop_until_done_async (client : EngineCoreClient , outputs : Dict ):
64
67
65
68
while True :
66
- engine_core_outputs = await client .get_output_async ().outputs
69
+ engine_core_outputs = ( await client .get_output_async () ).outputs
67
70
68
71
if len (engine_core_outputs ) == 0 :
69
72
break
@@ -78,14 +81,25 @@ async def loop_until_done_async(client: EngineCoreClient, outputs: Dict):
78
81
break
79
82
80
83
84
+ # Dummy utility function to monkey-patch into engine core.
85
+ def echo (self , msg : str , err_msg : Optional [str ] = None ) -> str :
86
+ print (f"echo util function called: { msg } , { err_msg } " )
87
+ if err_msg is not None :
88
+ raise ValueError (err_msg )
89
+ return msg
90
+
91
+
81
92
@fork_new_process_for_each_test
82
93
@pytest .mark .parametrize ("multiprocessing_mode" , [True , False ])
83
94
def test_engine_core_client (monkeypatch , multiprocessing_mode : bool ):
84
95
85
96
with monkeypatch .context () as m :
86
97
m .setenv ("VLLM_USE_V1" , "1" )
87
98
88
- engine_args = EngineArgs (model = MODEL_NAME , compilation_config = 3 )
99
+ # Monkey-patch core engine utility function to test.
100
+ m .setattr (EngineCore , "echo" , echo , raising = False )
101
+
102
+ engine_args = EngineArgs (model = MODEL_NAME , enforce_eager = True )
89
103
vllm_config = engine_args .create_engine_config (
90
104
UsageContext .UNKNOWN_CONTEXT )
91
105
executor_class = Executor .get_class (vllm_config )
@@ -147,15 +161,30 @@ def test_engine_core_client(monkeypatch, multiprocessing_mode: bool):
147
161
148
162
client .abort_requests ([request .request_id ])
149
163
164
+ if multiprocessing_mode :
165
+ """Utility method invocation"""
150
166
151
- @fork_new_process_for_each_test
152
- @pytest .mark .asyncio
167
+ core_client : SyncMPClient = client
168
+
169
+ result = core_client ._call_utility ("echo" , "testarg" )
170
+ assert result == "testarg"
171
+
172
+ with pytest .raises (Exception ) as e_info :
173
+ core_client ._call_utility ("echo" , None , "help!" )
174
+
175
+ assert str (e_info .value ) == "Call to echo method failed: help!"
176
+
177
+
178
+ @pytest .mark .asyncio (loop_scope = "function" )
153
179
async def test_engine_core_client_asyncio (monkeypatch ):
154
180
155
- with monkeypatch .context () as m :
181
+ with monkeypatch .context () as m , ExitStack () as after :
156
182
m .setenv ("VLLM_USE_V1" , "1" )
157
183
158
- engine_args = EngineArgs (model = MODEL_NAME )
184
+ # Monkey-patch core engine utility function to test.
185
+ m .setattr (EngineCore , "echo" , echo , raising = False )
186
+
187
+ engine_args = EngineArgs (model = MODEL_NAME , enforce_eager = True )
159
188
vllm_config = engine_args .create_engine_config (
160
189
usage_context = UsageContext .UNKNOWN_CONTEXT )
161
190
executor_class = Executor .get_class (vllm_config )
@@ -166,6 +195,7 @@ async def test_engine_core_client_asyncio(monkeypatch):
166
195
executor_class = executor_class ,
167
196
log_stats = True ,
168
197
)
198
+ after .callback (client .shutdown )
169
199
170
200
MAX_TOKENS = 20
171
201
params = SamplingParams (max_tokens = MAX_TOKENS )
@@ -204,3 +234,14 @@ async def test_engine_core_client_asyncio(monkeypatch):
204
234
else :
205
235
assert len (outputs [req_id ]) == MAX_TOKENS , (
206
236
f"{ len (outputs [req_id ])= } , { MAX_TOKENS = } " )
237
+ """Utility method invocation"""
238
+
239
+ core_client : AsyncMPClient = client
240
+
241
+ result = await core_client ._call_utility_async ("echo" , "testarg" )
242
+ assert result == "testarg"
243
+
244
+ with pytest .raises (Exception ) as e_info :
245
+ await core_client ._call_utility_async ("echo" , None , "help!" )
246
+
247
+ assert str (e_info .value ) == "Call to echo method failed: help!"
0 commit comments