Skip to content

Commit

Permalink
minor
Browse files Browse the repository at this point in the history
  • Loading branch information
WoosukKwon committed Nov 6, 2024
1 parent 59137fa commit e1414df
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions vllm/v1/worker/gpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,12 @@ def __init__(
self.use_cuda_graph = (envs.VLLM_TORCH_COMPILE_LEVEL
== CompilationLevel.PIECEWISE
and not self.model_config.enforce_eager)
# TODO(woosuk): Provide an option to tune the max cudagraph batch size.
self.cudagraph_batch_sizes = [1, 2, 4] + [i for i in range(8, 513, 8)]
max_num_tokens = self.max_num_tokens
self.input_ids = torch.zeros(max_num_tokens,
self.input_ids = torch.zeros(self.max_num_tokens,
dtype=torch.int32,
device=self.device)
self.positions = torch.zeros(max_num_tokens,
self.positions = torch.zeros(self.max_num_tokens,
dtype=torch.int64,
device=self.device)

Expand Down

0 comments on commit e1414df

Please # to comment.