Skip to content

Commit acd24a9

Browse files
authored
[BugFix] lazy init _copy_stream to avoid torch init wrong gpu instance (vllm-project#8403)
1 parent 72c1353 commit acd24a9

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

vllm/worker/multi_step_model_runner.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -230,12 +230,15 @@ def __init__(self, base_model_runner: GPUModelRunnerBase, *args, **kwargs):
230230
self._base_model_runner: GPUModelRunnerBase = base_model_runner
231231

232232
self.is_multi_step = self.scheduler_config.is_multi_step
233-
# used to copy tensors from GPU to CPU asynchronously
234-
self._copy_stream = torch.cuda.Stream()
235233
self.pinned_sampled_token_ids: Optional[torch.Tensor] = None
236234

237235
self.pythonization_cache = PythonizationCache()
238236

237+
@functools.cached_property
238+
def _copy_stream(self):
239+
# used to copy tensors from GPU to CPU asynchronously
240+
return torch.cuda.Stream()
241+
239242
def make_model_input_from_broadcasted_tensor_dict(
240243
self, tensor_dict: Dict[str, Any]) -> StatefulModelInput:
241244
model_input = (StatefulModelInput.from_broadcasted_tensor_dict(

0 commit comments

Comments
 (0)