Skip to content

Commit 1aec4d3

Browse files
committed
Revert changes in data.py and scheduler.py
Signed-off-by: Pooya Davoodi <pooya.davoodi@parasail.io>
1 parent 48f7947 commit 1aec4d3

File tree

2 files changed

+9
-40
lines changed

2 files changed

+9
-40
lines changed

vllm/core/scheduler.py

+9-29
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from vllm.core.interfaces import AllocStatus, BlockSpaceManager
1313
from vllm.logger import init_logger
1414
from vllm.lora.request import LoRARequest
15-
from vllm.pooling_params import PoolingParams
1615
from vllm.prompt_adapter.request import PromptAdapterRequest
1716
from vllm.sequence import (Sequence, SequenceData, SequenceGroup,
1817
SequenceGroupMetadata, SequenceGroupMetadataDelta,
@@ -524,7 +523,7 @@ def _schedule_running(
524523
chunked number of tokens are scheduled if
525524
`budget.num_batched_tokens` has not enough capacity to schedule
526525
all tokens.
527-
526+
528527
Returns:
529528
SchedulerRunningOutputs.
530529
"""
@@ -842,10 +841,10 @@ def _schedule_priority_preemption(
842841
self._get_num_new_uncached_and_cached_tokens(
843842
seq_group, SequenceStatus.WAITING, False, budget))
844843

845-
# Only preempt if priority inversion exists
844+
#Only preempt if priority inversion exists
846845
while running_queue and self._get_priority(
847846
running_queue[-1]) > self._get_priority(seq_group):
848-
# Only preempt if waiting sequence cannot be allocated
847+
#Only preempt if waiting sequence cannot be allocated
849848
can_allocate = self.block_manager.can_allocate(seq_group)
850849
if (num_new_tokens_uncached > 0
851850
and can_allocate == AllocStatus.OK
@@ -855,7 +854,7 @@ def _schedule_priority_preemption(
855854
)):
856855
break
857856

858-
# Adjust budget to remove the victim sequence group
857+
#Adjust budget to remove the victim sequence group
859858
vseq_group = running_queue.pop()
860859
num_running_tokens_uncached, _ = (
861860
self._get_num_new_uncached_and_cached_tokens(
@@ -866,11 +865,11 @@ def _schedule_priority_preemption(
866865
budget.subtract_num_seqs(vseq_group.request_id,
867866
num_running_seqs)
868867

869-
# Preempt out the victim sequence group
868+
#Preempt out the victim sequence group
870869
self._preempt(vseq_group, blocks_to_swap_out)
871870
waiting_queue.appendleft(vseq_group)
872871
force_preemption_count += 1
873-
# Put the sequence back into the waiting queue
872+
#Put the sequence back into the waiting queue
874873
waiting_queue.appendleft(seq_group)
875874

876875
waiting_queue = deque(sorted(waiting_queue, key=self._get_priority))
@@ -1037,7 +1036,7 @@ def _schedule_prefills(
10371036

10381037
def _schedule_default(self) -> SchedulerOutputs:
10391038
"""Schedule queued requests.
1040-
1039+
10411040
The current policy is designed to optimize the throughput. First,
10421041
it batches as many prefill requests as possible. And it schedules
10431042
decodes. If there's a pressure on GPU memory, decode requests can
@@ -1142,7 +1141,7 @@ def _schedule_default(self) -> SchedulerOutputs:
11421141

11431142
def _schedule_chunked_prefill(self) -> SchedulerOutputs:
11441143
"""Schedule queued requests.
1145-
1144+
11461145
Chunked prefill allows to chunk prefill requests, batch them together
11471146
with decode requests. This policy 1. schedule as many decoding requests
11481147
as possible. 2. schedule chunked prefill requests that are not
@@ -1351,25 +1350,6 @@ def schedule(
13511350
seqs[0].data.get_len()):
13521351
do_sample = False
13531352

1354-
pooling_params = seq_group.pooling_params
1355-
1356-
# Store instruction_seq in pooling_params.
1357-
instruction_seq = seq.inputs.inputs.get("instruction_seq")
1358-
if instruction_seq is not None:
1359-
if pooling_params is None:
1360-
pooling_params = PoolingParams()
1361-
pooling_params.additional_data = {
1362-
"instruction_seq": instruction_seq
1363-
}
1364-
elif pooling_params.additional_data is None:
1365-
pooling_params.additional_data = {
1366-
"instruction_seq": instruction_seq
1367-
}
1368-
else:
1369-
pooling_params.additional_data[
1370-
"instruction_seq"] = seq.inputs.inputs.get(
1371-
"instruction_seq")
1372-
13731353
# It assumes the scheduled_seq_groups is ordered by
13741354
# prefill < decoding.
13751355
if is_first_prefill or not self.scheduler_config.send_delta_data:
@@ -1380,7 +1360,7 @@ def schedule(
13801360
sampling_params=seq_group.sampling_params,
13811361
block_tables=block_tables,
13821362
do_sample=do_sample,
1383-
pooling_params=pooling_params,
1363+
pooling_params=seq_group.pooling_params,
13841364
token_chunk_size=token_chunk_size,
13851365
lora_request=seq_group.lora_request,
13861366
computed_block_nums=common_computed_block_nums,

vllm/inputs/data.py

-11
Original file line numberDiff line numberDiff line change
@@ -170,14 +170,6 @@ class TokenInputs(TypedDict):
170170
to pass the mm_processor_kwargs to each of them.
171171
"""
172172

173-
instruction_seq: NotRequired[Optional[str]]
174-
"""
175-
The instruction sequence that is usually prepended to the original prompt
176-
when passing to the model. Certain models need to extract this instruction
177-
sequence from the prompt in order to adjust certain operations of the
178-
model such as the attention mask.
179-
"""
180-
181173

182174
def token_inputs(
183175
prompt_token_ids: List[int],
@@ -187,7 +179,6 @@ def token_inputs(
187179
multi_modal_inputs: Optional["MultiModalKwargs"] = None,
188180
multi_modal_placeholders: Optional["MultiModalPlaceholderDict"] = None,
189181
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
190-
instruction_seq: Optional[str] = None,
191182
) -> TokenInputs:
192183
"""Construct :class:`TokenInputs` from optional values."""
193184
inputs = TokenInputs(type="token", prompt_token_ids=prompt_token_ids)
@@ -204,8 +195,6 @@ def token_inputs(
204195
inputs["multi_modal_placeholders"] = multi_modal_placeholders
205196
if mm_processor_kwargs is not None:
206197
inputs["mm_processor_kwargs"] = mm_processor_kwargs
207-
if instruction_seq is not None:
208-
inputs["instruction_seq"] = instruction_seq
209198

210199
return inputs
211200

0 commit comments

Comments
 (0)