Skip to content

Commit eeb1a13

Browse files
akxsiddharth9820
authored andcommitted
[Misc] Move device options to a single place (vllm-project#8322)
1 parent 3fef9d5 commit eeb1a13

File tree

3 files changed

+23
-20
lines changed

3 files changed

+23
-20
lines changed

benchmarks/benchmark_latency.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tqdm import tqdm
1111

1212
from vllm import LLM, SamplingParams
13-
from vllm.engine.arg_utils import EngineArgs
13+
from vllm.engine.arg_utils import DEVICE_OPTIONS, EngineArgs
1414
from vllm.inputs import PromptInputs
1515
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
1616
from vllm.utils import FlexibleArgumentParser
@@ -205,13 +205,11 @@ def run_to_completion(profile_dir: Optional[str] = None):
205205
default=None,
206206
help=('path to save the pytorch profiler output. Can be visualized '
207207
'with ui.perfetto.dev or Tensorboard.'))
208-
parser.add_argument(
209-
"--device",
210-
type=str,
211-
default="auto",
212-
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
213-
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
214-
'CPU.')
208+
parser.add_argument("--device",
209+
type=str,
210+
default="auto",
211+
choices=DEVICE_OPTIONS,
212+
help='device type for vLLM execution')
215213
parser.add_argument('--block-size',
216214
type=int,
217215
default=16,

benchmarks/benchmark_throughput.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from transformers import (AutoModelForCausalLM, AutoTokenizer,
1212
PreTrainedTokenizerBase)
1313

14-
from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs
14+
from vllm.engine.arg_utils import DEVICE_OPTIONS, AsyncEngineArgs, EngineArgs
1515
from vllm.entrypoints.openai.api_server import (
1616
build_async_engine_client_from_engine_args)
1717
from vllm.model_executor.layers.quantization import QUANTIZATION_METHODS
@@ -451,13 +451,11 @@ def main(args: argparse.Namespace):
451451
'accuracy issues. FP8_E5M2 (without scaling) is only supported on '
452452
'cuda version greater than 11.8. On ROCm (AMD GPU), FP8_E4M3 is '
453453
'instead supported for common inference criteria.')
454-
parser.add_argument(
455-
"--device",
456-
type=str,
457-
default="auto",
458-
choices=["auto", "cuda", "cpu", "openvino", "tpu", "xpu"],
459-
help='device type for vLLM execution, supporting CUDA, OpenVINO and '
460-
'CPU.')
454+
parser.add_argument("--device",
455+
type=str,
456+
default="auto",
457+
choices=DEVICE_OPTIONS,
458+
help='device type for vLLM execution')
461459
parser.add_argument(
462460
"--num-scheduler-steps",
463461
type=int,

vllm/engine/arg_utils.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,16 @@
2626

2727
ALLOWED_DETAILED_TRACE_MODULES = ["model", "worker", "all"]
2828

29+
DEVICE_OPTIONS = [
30+
"auto",
31+
"cuda",
32+
"neuron",
33+
"cpu",
34+
"openvino",
35+
"tpu",
36+
"xpu",
37+
]
38+
2939

3040
def nullable_str(val: str):
3141
if not val or val == "None":
@@ -553,10 +563,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
553563
parser.add_argument("--device",
554564
type=str,
555565
default=EngineArgs.device,
556-
choices=[
557-
"auto", "cuda", "neuron", "cpu", "openvino",
558-
"tpu", "xpu"
559-
],
566+
choices=DEVICE_OPTIONS,
560567
help='Device type for vLLM execution.')
561568
parser.add_argument('--num-scheduler-steps',
562569
type=int,

0 commit comments

Comments
 (0)