|
4 | 4 | Compiling a Transformer using torch.compile and TensorRT
|
5 | 5 | ==============================================================
|
6 | 6 |
|
7 |
| -This interactive script is intended as a sample of the `torch_tensorrt.compile` workflow with `torch.compile` on a transformer-based model.""" |
| 7 | +This interactive script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a transformer-based model.""" |
8 | 8 |
|
9 | 9 | # %%
|
10 | 10 | # Imports and Model Definition
|
|
45 | 45 | torch_executed_ops = {}
|
46 | 46 |
|
47 | 47 | # %%
|
48 |
| -# Compilation with `torch_tensorrt.compile` |
| 48 | +# Compilation with `torch.compile` |
49 | 49 | # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
50 | 50 |
|
| 51 | +# Define backend compilation keyword arguments |
| 52 | +compilation_kwargs = { |
| 53 | + "enabled_precisions": enabled_precisions, |
| 54 | + "debug": debug, |
| 55 | + "workspace_size": workspace_size, |
| 56 | + "min_block_size": min_block_size, |
| 57 | + "torch_executed_ops": torch_executed_ops, |
| 58 | +} |
| 59 | + |
51 | 60 | # Build and compile the model with torch.compile, using Torch-TensorRT backend
|
52 |
| -optimized_model = torch_tensorrt.compile( |
| 61 | +optimized_model = torch.compile( |
53 | 62 | model,
|
54 |
| - ir="torch_compile", |
55 |
| - inputs=inputs, |
56 |
| - enabled_precisions=enabled_precisions, |
57 |
| - debug=debug, |
58 |
| - workspace_size=workspace_size, |
59 |
| - min_block_size=min_block_size, |
60 |
| - torch_executed_ops=torch_executed_ops, |
| 63 | + backend="torch_tensorrt", |
| 64 | + options=compilation_kwargs, |
61 | 65 | )
|
| 66 | +optimized_model(*inputs) |
62 | 67 |
|
63 | 68 | # %%
|
64 | 69 | # Equivalently, we could have run the above via the convenience frontend, as so:
|
65 |
| -# `torch_tensorrt.compile(model, ir="dynamo_compile", inputs=inputs, ...)` |
| 70 | +# `torch_tensorrt.compile(model, ir="torch_compile", inputs=inputs, **compilation_kwargs)` |
66 | 71 |
|
67 | 72 | # %%
|
68 | 73 | # Inference
|
|
0 commit comments