4
4
import torch_tensorrt
5
5
from functools import partial
6
6
7
- from typing import Any , Sequence
7
+ from typing import Any , Optional , Sequence
8
8
from torch_tensorrt import EngineCapability , Device
9
9
from torch_tensorrt .fx .utils import LowerPrecision
10
10
17
17
WORKSPACE_SIZE ,
18
18
MIN_BLOCK_SIZE ,
19
19
PASS_THROUGH_BUILD_FAILURES ,
20
+ MAX_AUX_STREAMS ,
21
+ VERSION_COMPATIBLE ,
22
+ OPTIMIZATION_LEVEL ,
20
23
USE_EXPERIMENTAL_RT ,
21
24
)
22
25
@@ -46,6 +49,9 @@ def compile(
46
49
min_block_size = MIN_BLOCK_SIZE ,
47
50
torch_executed_ops = [],
48
51
torch_executed_modules = [],
52
+ max_aux_streams = MAX_AUX_STREAMS ,
53
+ version_compatible = VERSION_COMPATIBLE ,
54
+ optimization_level = OPTIMIZATION_LEVEL ,
49
55
use_experimental_rt = USE_EXPERIMENTAL_RT ,
50
56
** kwargs ,
51
57
):
@@ -93,6 +99,9 @@ def compile(
93
99
workspace_size = workspace_size ,
94
100
min_block_size = min_block_size ,
95
101
torch_executed_ops = torch_executed_ops ,
102
+ max_aux_streams = max_aux_streams ,
103
+ version_compatible = version_compatible ,
104
+ optimization_level = optimization_level ,
96
105
use_experimental_rt = use_experimental_rt ,
97
106
** kwargs ,
98
107
)
@@ -117,6 +126,9 @@ def create_backend(
117
126
min_block_size : int = MIN_BLOCK_SIZE ,
118
127
torch_executed_ops : Sequence [str ] = set (),
119
128
pass_through_build_failures : bool = PASS_THROUGH_BUILD_FAILURES ,
129
+ max_aux_streams : Optional [int ] = MAX_AUX_STREAMS ,
130
+ version_compatible : bool = VERSION_COMPATIBLE ,
131
+ optimization_level : Optional [int ] = OPTIMIZATION_LEVEL ,
120
132
use_experimental_rt : bool = USE_EXPERIMENTAL_RT ,
121
133
** kwargs ,
122
134
):
@@ -129,6 +141,10 @@ def create_backend(
129
141
min_block_size: Minimum number of operators per TRT-Engine Block
130
142
torch_executed_ops: Sequence of operations to run in Torch, regardless of converter coverage
131
143
pass_through_build_failures: Whether to fail on TRT engine build errors (True) or not (False)
144
+ max_aux_streams: Maximum number of allowed auxiliary TRT streams for each engine
145
+ version_compatible: Provide version forward-compatibility for engine plan files
146
+ optimization_level: Builder optimization 0-5, higher levels imply longer build time,
147
+ searching for more optimization options. TRT defaults to 3
132
148
use_experimental_rt: Whether to use the new experimental TRTModuleNext for TRT engines
133
149
Returns:
134
150
Backend for torch.compile
@@ -143,6 +159,9 @@ def create_backend(
143
159
min_block_size = min_block_size ,
144
160
torch_executed_ops = torch_executed_ops ,
145
161
pass_through_build_failures = pass_through_build_failures ,
162
+ max_aux_streams = max_aux_streams ,
163
+ version_compatible = version_compatible ,
164
+ optimization_level = optimization_level ,
146
165
use_experimental_rt = use_experimental_rt ,
147
166
)
148
167
0 commit comments