minor

vllm-project · Nov 6, 2024 · e1414df · e1414df
1 parent 59137fa
commit e1414df
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -95,12 +95,12 @@ def __init__(
         self.use_cuda_graph = (envs.VLLM_TORCH_COMPILE_LEVEL
                                == CompilationLevel.PIECEWISE
                                and not self.model_config.enforce_eager)
+        # TODO(woosuk): Provide an option to tune the max cudagraph batch size.
         self.cudagraph_batch_sizes = [1, 2, 4] + [i for i in range(8, 513, 8)]
-        max_num_tokens = self.max_num_tokens
-        self.input_ids = torch.zeros(max_num_tokens,
+        self.input_ids = torch.zeros(self.max_num_tokens,
                                      dtype=torch.int32,
                                      device=self.device)
-        self.positions = torch.zeros(max_num_tokens,
+        self.positions = torch.zeros(self.max_num_tokens,
                                      dtype=torch.int64,
                                      device=self.device)