@@ -192,6 +192,7 @@ class EngineArgs:
192
192
collect_detailed_traces : Optional [str ] = None
193
193
disable_async_output_proc : bool = False
194
194
scheduling_policy : Literal ["fcfs" , "priority" ] = "fcfs"
195
+ scheduler_cls : Union [str , Type [object ]] = "vllm.core.scheduler.Scheduler"
195
196
196
197
override_neuron_config : Optional [Dict [str , Any ]] = None
197
198
override_pooler_config : Optional [PoolerConfig ] = None
@@ -938,6 +939,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
938
939
'priority (lower value means earlier handling) and time of '
939
940
'arrival deciding any ties).' )
940
941
942
+ parser .add_argument (
943
+ '--scheduler-cls' ,
944
+ default = EngineArgs .scheduler_cls ,
945
+ help = 'The scheduler class to use. "vllm.core.scheduler.Scheduler" '
946
+ 'is the default scheduler. Can be a class directly or the path to '
947
+ 'a class of form "mod.custom_class".' )
948
+
941
949
parser .add_argument (
942
950
'--override-neuron-config' ,
943
951
type = json .loads ,
@@ -1273,10 +1281,12 @@ def create_engine_config(self,
1273
1281
send_delta_data = (envs .VLLM_USE_RAY_SPMD_WORKER
1274
1282
and parallel_config .use_ray ),
1275
1283
policy = self .scheduling_policy ,
1284
+ scheduler_cls = self .scheduler_cls ,
1276
1285
max_num_partial_prefills = self .max_num_partial_prefills ,
1277
1286
max_long_partial_prefills = self .max_long_partial_prefills ,
1278
1287
long_prefill_token_threshold = self .long_prefill_token_threshold ,
1279
1288
)
1289
+
1280
1290
lora_config = LoRAConfig (
1281
1291
bias_enabled = self .enable_lora_bias ,
1282
1292
max_lora_rank = self .max_lora_rank ,
0 commit comments