20
20
def test_scheduler_add_seq_group ():
21
21
block_size = 4
22
22
scheduler_config = SchedulerConfig (
23
- 100 ,
24
- 64 ,
25
- 1 ,
23
+ "generate" ,
24
+ max_num_batched_tokens = 100 ,
25
+ max_num_seqs = 64 ,
26
+ max_model_len = 1 ,
26
27
)
27
28
cache_config = CacheConfig (block_size , 1.0 , 1 , cache_dtype = "auto" )
28
29
cache_config .num_cpu_blocks = 4
@@ -42,9 +43,10 @@ def test_scheduler_add_seq_group():
42
43
def test_scheduler_abort_seq_group ():
43
44
block_size = 4
44
45
scheduler_config = SchedulerConfig (
45
- 100 ,
46
- 64 ,
47
- 1 ,
46
+ "generate" ,
47
+ max_num_batched_tokens = 100 ,
48
+ max_num_seqs = 64 ,
49
+ max_model_len = 1 ,
48
50
)
49
51
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
50
52
cache_config .num_cpu_blocks = 4
@@ -70,9 +72,10 @@ def test_scheduler_schedule_simple():
70
72
num_seq_group = 4
71
73
max_model_len = 16
72
74
scheduler_config = SchedulerConfig (
73
- 64 ,
74
- num_seq_group ,
75
- max_model_len ,
75
+ "generate" ,
76
+ max_num_batched_tokens = 64 ,
77
+ max_num_seqs = num_seq_group ,
78
+ max_model_len = max_model_len ,
76
79
)
77
80
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
78
81
cache_config .num_cpu_blocks = 8
@@ -114,9 +117,10 @@ def test_scheduler_prefill_prioritized():
114
117
max_model_len = 30
115
118
max_batched_num_tokens = 30
116
119
scheduler_config = SchedulerConfig (
117
- max_batched_num_tokens ,
118
- 2 ,
119
- max_model_len ,
120
+ "generate" ,
121
+ max_num_batched_tokens = max_batched_num_tokens ,
122
+ max_num_seqs = 2 ,
123
+ max_model_len = max_model_len ,
120
124
)
121
125
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
122
126
cache_config .num_cpu_blocks = 16
@@ -145,9 +149,10 @@ def test_scheduler_schedule_preempt_abort():
145
149
block_size = 4
146
150
max_model_len = 16
147
151
scheduler_config = SchedulerConfig (
148
- 64 ,
149
- 2 ,
150
- max_model_len ,
152
+ "generate" ,
153
+ max_num_batched_tokens = 64 ,
154
+ max_num_seqs = 2 ,
155
+ max_model_len = max_model_len ,
151
156
)
152
157
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
153
158
cache_config .num_cpu_blocks = 2
@@ -204,9 +209,10 @@ def test_scheduler_max_seqs():
204
209
max_seq_group = 2
205
210
max_model_len = 16
206
211
scheduler_config = SchedulerConfig (
207
- 64 ,
208
- max_seq_group ,
209
- max_model_len ,
212
+ "generate" ,
213
+ max_num_batched_tokens = 64 ,
214
+ max_num_seqs = max_seq_group ,
215
+ max_model_len = max_model_len ,
210
216
)
211
217
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
212
218
cache_config .num_cpu_blocks = 8
@@ -248,9 +254,10 @@ def test_scheduler_max_seqs():
248
254
def test_scheduler_delay_factor ():
249
255
block_size = 4
250
256
scheduler_config = SchedulerConfig (
251
- 100 ,
252
- 64 ,
253
- 16 ,
257
+ "generate" ,
258
+ max_num_batched_tokens = 100 ,
259
+ max_num_seqs = 64 ,
260
+ max_model_len = 16 ,
254
261
delay_factor = 0.5 ,
255
262
)
256
263
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
@@ -350,9 +357,10 @@ def initialize_scheduler(
350
357
):
351
358
block_size = block_size
352
359
scheduler_config = SchedulerConfig (
353
- max_token_budget ,
354
- max_num_seqs ,
355
- max_model_len ,
360
+ "generate" ,
361
+ max_num_batched_tokens = max_token_budget ,
362
+ max_num_seqs = max_num_seqs ,
363
+ max_model_len = max_model_len ,
356
364
)
357
365
cache_config = CacheConfig (block_size , 1.0 , 1 , "auto" )
358
366
cache_config .num_cpu_blocks = num_cpu_blocks
0 commit comments