Skip to content

Commit 97e7fc1

Browse files
yannicks1jimpang
authored and
jimpang
committed
[Feature] Pluggable platform-specific scheduler (vllm-project#13161)
Signed-off-by: Yannick Schnider <yannick.schnider1@ibm.com> Signed-off-by: Yannick Schnider <Yannick.Schnider1@ibm.com>
1 parent a778a41 commit 97e7fc1

File tree

5 files changed

+56
-3
lines changed

5 files changed

+56
-3
lines changed

.buildkite/test-pipeline.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,7 @@ steps:
531531
- pip uninstall vllm_add_dummy_platform -y
532532
# end platform plugin tests
533533
# other tests continue here:
534+
- pytest -v -s plugins_tests/test_scheduler_plugins.py
534535
- pip install -e ./plugins/vllm_add_dummy_model
535536
- pytest -v -s distributed/test_distributed_oot.py
536537
- pytest -v -s entrypoints/openai/test_oot_registration.py # it needs a clean process
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
3+
from vllm.core.scheduler import Scheduler
4+
5+
6+
class DummyScheduler(Scheduler):
7+
8+
def schedule(self):
9+
raise Exception("Exception raised by DummyScheduler")
10+
11+
12+
def test_scheduler_plugins():
13+
import pytest
14+
15+
from vllm.engine.arg_utils import EngineArgs
16+
from vllm.engine.llm_engine import LLMEngine
17+
from vllm.sampling_params import SamplingParams
18+
19+
with pytest.raises(Exception) as exception_info:
20+
21+
engine_args = EngineArgs(
22+
model="facebook/opt-125m",
23+
enforce_eager=True, # reduce test time
24+
scheduler_cls=DummyScheduler,
25+
)
26+
27+
engine = LLMEngine.from_engine_args(engine_args=engine_args)
28+
29+
sampling_params = SamplingParams(max_tokens=1)
30+
engine.add_request("0", "foo", sampling_params)
31+
engine.step()
32+
33+
assert str(exception_info.value) == "Exception raised by DummyScheduler"

vllm/config.py

+4
Original file line numberDiff line numberDiff line change
@@ -1497,6 +1497,10 @@ class SchedulerConfig:
14971497

14981498
chunked_prefill_enabled: bool = field(init=False)
14991499

1500+
# scheduler class or path. "vllm.core.scheduler.Scheduler" (default)
1501+
# or "mod.custom_class".
1502+
scheduler_cls: Union[str, Type[object]] = "vllm.core.scheduler.Scheduler"
1503+
15001504
def compute_hash(self) -> str:
15011505
"""
15021506
WARNING: Whenever a new field is added to this config,

vllm/engine/arg_utils.py

+10
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ class EngineArgs:
192192
collect_detailed_traces: Optional[str] = None
193193
disable_async_output_proc: bool = False
194194
scheduling_policy: Literal["fcfs", "priority"] = "fcfs"
195+
scheduler_cls: Union[str, Type[object]] = "vllm.core.scheduler.Scheduler"
195196

196197
override_neuron_config: Optional[Dict[str, Any]] = None
197198
override_pooler_config: Optional[PoolerConfig] = None
@@ -938,6 +939,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
938939
'priority (lower value means earlier handling) and time of '
939940
'arrival deciding any ties).')
940941

942+
parser.add_argument(
943+
'--scheduler-cls',
944+
default=EngineArgs.scheduler_cls,
945+
help='The scheduler class to use. "vllm.core.scheduler.Scheduler" '
946+
'is the default scheduler. Can be a class directly or the path to '
947+
'a class of form "mod.custom_class".')
948+
941949
parser.add_argument(
942950
'--override-neuron-config',
943951
type=json.loads,
@@ -1273,10 +1281,12 @@ def create_engine_config(self,
12731281
send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER
12741282
and parallel_config.use_ray),
12751283
policy=self.scheduling_policy,
1284+
scheduler_cls=self.scheduler_cls,
12761285
max_num_partial_prefills=self.max_num_partial_prefills,
12771286
max_long_partial_prefills=self.max_long_partial_prefills,
12781287
long_prefill_token_threshold=self.long_prefill_token_threshold,
12791288
)
1289+
12801290
lora_config = LoRAConfig(
12811291
bias_enabled=self.enable_lora_bias,
12821292
max_lora_rank=self.max_lora_rank,

vllm/engine/llm_engine.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@
1919
from vllm.config import (DecodingConfig, LoRAConfig, ModelConfig,
2020
ObservabilityConfig, ParallelConfig, SchedulerConfig,
2121
VllmConfig)
22-
from vllm.core.scheduler import (ScheduledSequenceGroup, Scheduler,
23-
SchedulerOutputs)
22+
from vllm.core.scheduler import ScheduledSequenceGroup, SchedulerOutputs
2423
from vllm.engine.arg_utils import EngineArgs
2524
from vllm.engine.metrics_types import StatLoggerBase, Stats
2625
from vllm.engine.output_processor.interfaces import (
@@ -58,7 +57,8 @@
5857
BaseTokenizerGroup, init_tokenizer_from_configs)
5958
from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled,
6059
usage_message)
61-
from vllm.utils import Counter, Device, deprecate_kwargs, weak_bind
60+
from vllm.utils import (Counter, Device, deprecate_kwargs,
61+
resolve_obj_by_qualname, weak_bind)
6262
from vllm.version import __version__ as VLLM_VERSION
6363

6464
logger = init_logger(__name__)
@@ -346,6 +346,11 @@ def get_tokenizer_for_seq(sequence: Sequence) -> AnyTokenizer:
346346
# Create the scheduler.
347347
# NOTE: the cache_config here have been updated with the numbers of
348348
# GPU and CPU blocks, which are profiled in the distributed executor.
349+
if isinstance(self.vllm_config.scheduler_config.scheduler_cls, str):
350+
Scheduler = resolve_obj_by_qualname(
351+
self.vllm_config.scheduler_config.scheduler_cls)
352+
else:
353+
Scheduler = self.vllm_config.scheduler_config.scheduler_cls
349354
self.scheduler = [
350355
Scheduler(
351356
self.scheduler_config, self.cache_config, self.lora_config,

0 commit comments

Comments
 (0)