|
3 | 3 |
|
4 | 4 | if TYPE_CHECKING:
|
5 | 5 | VLLM_SPYRE_DYNAMO_BACKEND: str = "sendnn_decoder"
|
6 |
| - # TODO uncomment those when scheduler is ready |
7 |
| - # VLLM_SPYRE_WARMUP_PROMPT_LENS: Optional[List[int]] = None |
8 |
| - # VLLM_SPYRE_WARMUP_NEW_TOKENS: Optional[List[int]] = None |
9 |
| - # VLLM_SPYRE_WARMUP_BATCH_SIZES: Optional[List[int]] = None |
10 | 6 |
|
11 | 7 | environment_variables: Dict[str, Callable[[], Any]] = {
|
12 | 8 |
|
13 |
| - # TODO uncomment when scheduler is ready |
14 |
| - # # Defines the prompt lengths the Spyre accelerator should be prepared |
15 |
| - # # for, formatted as comma separated list. |
16 |
| - # "VLLM_SPYRE_WARMUP_PROMPT_LENS": |
17 |
| - # lambda: [ |
18 |
| - # int(p) for p in os.getenv(key='VLLM_SPYRE_WARMUP_PROMPT_LENS', |
19 |
| - # default='64').split(',') |
20 |
| - # ], |
21 |
| - |
22 |
| - # TODO uncomment when scheduler is ready |
23 |
| - # # Defines the max output tokens the Spyre accelerator should be prepared |
24 |
| - # # for, formatted as comma separated list. |
25 |
| - # "VLLM_SPYRE_WARMUP_NEW_TOKENS": |
26 |
| - # lambda: [ |
27 |
| - # int(d) for d in os.getenv(key='VLLM_SPYRE_WARMUP_NEW_TOKENS', |
28 |
| - # default='20').split(',') |
29 |
| - # ], |
30 |
| - |
31 |
| - # TODO uncomment when scheduler is ready |
32 |
| - # # Defines the batch sizes the Spyre accelerator should be prepared |
33 |
| - # # for, formatted as comma separated list. |
34 |
| - # "VLLM_SPYRE_WARMUP_BATCH_SIZES": |
35 |
| - # lambda: [ |
36 |
| - # int(b) for b in os.getenv(key='VLLM_SPYRE_WARMUP_BATCH_SIZES', |
37 |
| - # default='1').split(',') |
38 |
| - # ], |
39 |
| - |
40 | 9 | # Defines the backend that torch.compile will use when using Spyre
|
41 | 10 | # Available options:
|
42 | 11 | # - "sendnn_decoder": Compile for execution on Spyre hardware for
|
|
0 commit comments