Skip to content

Commit d871453

Browse files
authored
[Misc]Add param max-model-len in benchmark_latency.py (#5629)
1 parent 7d46c8d commit d871453

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

benchmarks/benchmark_latency.py

+7
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def main(args: argparse.Namespace):
2929
tensor_parallel_size=args.tensor_parallel_size,
3030
trust_remote_code=args.trust_remote_code,
3131
dtype=args.dtype,
32+
max_model_len=args.max_model_len,
3233
enforce_eager=args.enforce_eager,
3334
kv_cache_dtype=args.kv_cache_dtype,
3435
quantization_param_path=args.quantization_param_path,
@@ -150,6 +151,12 @@ def run_to_completion(profile_dir: Optional[str] = None):
150151
parser.add_argument('--trust-remote-code',
151152
action='store_true',
152153
help='trust remote code from huggingface')
154+
parser.add_argument(
155+
'--max-model-len',
156+
type=int,
157+
default=None,
158+
help='Maximum length of a sequence (including prompt and output). '
159+
'If None, will be derived from the model.')
153160
parser.add_argument(
154161
'--dtype',
155162
type=str,

0 commit comments

Comments
 (0)