-
Notifications
You must be signed in to change notification settings - Fork 364
feat: Upgrade TRT to 8.4 #1152
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
feat: Upgrade TRT to 8.4 #1152
Conversation
BREAKING CHANGE: Removing deprecated settings like min timing iterations
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
// Lower threshold because FP16 | ||
ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results, trt_results, 2e-1)); | ||
} | ||
// TEST(Partitioning, ComputeResNet50FallbackGraphCorrectly) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bowang007 Can you work with @peri044 here to determine the best testing strategy here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure.
@peri044 can you open a second PR into |
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code conforms to C++ style guidelines
This reverts commit d736499.
…A memory options Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to C++ style guidelines:
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/tmp/changes.txt
index 5aeac3b..775c71d 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -225,11 +225,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
info.convert_info.engine_settings.workspace_size = workspace_size;
- TORCHTRT_CHECK(dla_sram_size >= 4096, "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
+ TORCHTRT_CHECK(
+ dla_sram_size >= 4096,
+ "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
info.convert_info.engine_settings.dla_sram_size = dla_sram_size;
- TORCHTRT_CHECK(dla_local_dram_size >= 4096, "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
+ TORCHTRT_CHECK(
+ dla_local_dram_size >= 4096,
+ "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
info.convert_info.engine_settings.dla_local_dram_size = dla_local_dram_size;
- TORCHTRT_CHECK(dla_global_dram_size >= 4096, "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
+ TORCHTRT_CHECK(
+ dla_global_dram_size >= 4096,
+ "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
info.convert_info.engine_settings.dla_global_dram_size = dla_global_dram_size;
return info;
}
diff --git a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/tmp/changes.txt
index 310f23d..a171190 100644
--- a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -67,7 +67,8 @@ void RegisterTRTCompileSpec() {
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_sram_size);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_local_dram_size);
- ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
+ ADD_FIELD_GET_SET_REGISTRATION(
+ TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
diff --git a/workspace/core/conversion/conversionctx/ConversionCtx.cpp b/tmp/changes.txt
index 688aaa7..d123ee4 100644
--- a/workspace/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/tmp/changes.txt
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
}
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
- if (settings.workspace_size != 0){
+ if (settings.workspace_size != 0) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
}
@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
"DLA supports only fp16 or int8 precision");
cfg->setDLACore(settings.device.dla_core);
- if (settings.dla_sram_size != 1048576){
+ if (settings.dla_sram_size != 1048576) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
}
- if (settings.dla_local_dram_size != 1073741824){
+ if (settings.dla_local_dram_size != 1073741824) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
}
- if (settings.dla_global_dram_size != 536870912){
+ if (settings.dla_global_dram_size != 536870912) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
}
}
diff --git a/workspace/cpp/bin/torchtrtc/main.cpp b/tmp/changes.txt
index 075af18..a0a91be 100644
--- a/workspace/cpp/bin/torchtrtc/main.cpp
+++ b/tmp/changes.txt
@@ -117,8 +117,7 @@ int main(int argc, char** argv) {
parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"});
args::ValueFlag<uint64_t> workspace_size(
parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"});
- args::ValueFlag<uint64_t> dla_sram_size(
- parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
+ args::ValueFlag<uint64_t> dla_sram_size(parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
args::ValueFlag<uint64_t> dla_local_dram_size(
parser, "dla_local_dram_size", "DLA Local DRAM size", {"dla-local-dram-size"});
args::ValueFlag<uint64_t> dla_global_dram_size(
ERROR: Some files do not conform to style guidelines
…nd added DLA memory size configurations Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
Signed-off-by: Dheeraj Peri <peri.dheeraj@gmail.com>
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are some changes that do not conform to C++ style guidelines:
diff --git a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/tmp/changes.txt
index 5aeac3b..775c71d 100644
--- a/workspace/py/torch_tensorrt/csrc/tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -225,11 +225,17 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
info.convert_info.engine_settings.workspace_size = workspace_size;
- TORCHTRT_CHECK(dla_sram_size >= 4096, "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
+ TORCHTRT_CHECK(
+ dla_sram_size >= 4096,
+ "DLA managed SRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 MiB");
info.convert_info.engine_settings.dla_sram_size = dla_sram_size;
- TORCHTRT_CHECK(dla_local_dram_size >= 4096, "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
+ TORCHTRT_CHECK(
+ dla_local_dram_size >= 4096,
+ "DLA Local DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 1 GiB");
info.convert_info.engine_settings.dla_local_dram_size = dla_local_dram_size;
- TORCHTRT_CHECK(dla_global_dram_size >= 4096, "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
+ TORCHTRT_CHECK(
+ dla_global_dram_size >= 4096,
+ "DLA Global DRAM size must be at least 4 KiB and must be a power of 2. This defaults to 512 MiB");
info.convert_info.engine_settings.dla_global_dram_size = dla_global_dram_size;
return info;
}
diff --git a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/tmp/changes.txt
index 9165b21..ba2e168 100644
--- a/workspace/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp
+++ b/tmp/changes.txt
@@ -65,7 +65,8 @@ void RegisterTRTCompileSpec() {
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_sram_size);
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_local_dram_size);
- ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
+ ADD_FIELD_GET_SET_REGISTRATION(
+ TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, dla_global_dram_size);
ADD_FIELD_GET_SET_REGISTRATION(
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
}
diff --git a/workspace/core/conversion/conversionctx/ConversionCtx.cpp b/tmp/changes.txt
index 688aaa7..d123ee4 100644
--- a/workspace/core/conversion/conversionctx/ConversionCtx.cpp
+++ b/tmp/changes.txt
@@ -107,7 +107,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
}
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
- if (settings.workspace_size != 0){
+ if (settings.workspace_size != 0) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kWORKSPACE, settings.workspace_size);
}
@@ -124,13 +124,13 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
settings.enabled_precisions.find(nvinfer1::DataType::kFLOAT) == settings.enabled_precisions.end(),
"DLA supports only fp16 or int8 precision");
cfg->setDLACore(settings.device.dla_core);
- if (settings.dla_sram_size != 1048576){
+ if (settings.dla_sram_size != 1048576) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_MANAGED_SRAM, settings.dla_sram_size);
}
- if (settings.dla_local_dram_size != 1073741824){
+ if (settings.dla_local_dram_size != 1073741824) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_LOCAL_DRAM, settings.dla_local_dram_size);
}
- if (settings.dla_global_dram_size != 536870912){
+ if (settings.dla_global_dram_size != 536870912) {
cfg->setMemoryPoolLimit(nvinfer1::MemoryPoolType::kDLA_GLOBAL_DRAM, settings.dla_global_dram_size);
}
}
diff --git a/workspace/core/conversion/converters/converter_util.cpp b/tmp/changes.txt
index a6a2bbd..7452615 100644
--- a/workspace/core/conversion/converters/converter_util.cpp
+++ b/tmp/changes.txt
@@ -207,13 +207,13 @@ nvinfer1::ITensor* clamp(
nvinfer1::ITensor* lower_bound,
nvinfer1::ITensor* upper_bound,
std::string const& name) {
-
auto max_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMAX, x, lower_bound, "max layer for " + name);
TORCHTRT_CHECK(max_layer, "Unable to create max layer for clamp");
LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp");
auto max_itensor = max_layer->getOutput(0);
- auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+ auto min_layer =
+ add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
TORCHTRT_CHECK(min_layer, "Unable to create min layer for clamp");
LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp");
auto min_itensor = min_layer->getOutput(0);
@@ -227,13 +227,13 @@ nvinfer1::ITensor* clamp_to_input_dim(
nvinfer1::ITensor* input_dim,
int nbdims,
std::string const& name) {
-
auto zero = torch::zeros({nbdims}).to(torch::kI32);
auto zero_itensor = tensor_to_const(ctx, zero);
auto one = torch::ones({nbdims}).to(torch::kI32);
auto one_itensor = tensor_to_const(ctx, one);
- auto upper_bound_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
+ auto upper_bound_layer =
+ add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, input_dim, one_itensor, "sub layer for " + name);
TORCHTRT_CHECK(upper_bound_layer, "Unable to create sub layer for clamp to inputDim");
LOG_DEBUG(ctx->logger, "Create " << upper_bound_layer->getName() << " for clamp to inputDim");
auto upper_bound = upper_bound_layer->getOutput(0);
@@ -243,7 +243,8 @@ nvinfer1::ITensor* clamp_to_input_dim(
LOG_DEBUG(ctx->logger, "Create " << max_layer->getName() << " for clamp to inputDim");
auto max_itensor = max_layer->getOutput(0);
- auto min_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
+ auto min_layer =
+ add_elementwise(ctx, nvinfer1::ElementWiseOperation::kMIN, max_itensor, upper_bound, "min layer for " + name);
TORCHTRT_CHECK(min_layer, "Unable to create min_layer for clamp to inputDim");
LOG_DEBUG(ctx->logger, "Create " << min_layer->getName() << " for clamp to inputDim");
auto min_itensor = min_layer->getOutput(0);
@@ -257,7 +258,6 @@ nvinfer1::ITensor* normalize_indices(
nvinfer1::ITensor* indices,
int nbdims,
std::string const& name) {
-
auto zero = torch::zeros({nbdims}).to(torch::kI32);
auto neg = -torch::ones({nbdims}).to(torch::kI32);
auto zero_itensor = tensor_to_const(ctx, zero);
@@ -307,17 +307,20 @@ nvinfer1::ITensor* get_slice_size(
at::Tensor one_tensor = torch::ones({nbdims}).to(torch::kI32);
auto one_itensor = tensor_to_const(ctx, one_tensor);
- auto sub_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
+ auto sub_layer =
+ add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUB, end, start, "get_slice_size sub layer for " + name);
TORCHTRT_CHECK(sub_layer, "Unable to create sub layer in calculate_output_size");
LOG_DEBUG(ctx->logger, "Create " << sub_layer->getName() << " for calculate_output_size");
auto sub_itensor = sub_layer->getOutput(0);
- auto div_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
+ auto div_layer = add_elementwise(
+ ctx, nvinfer1::ElementWiseOperation::kDIV, sub_itensor, stride, "get_slice_size div layer for " + name);
TORCHTRT_CHECK(div_layer, "Unable to create div layer in calculate_output_size");
LOG_DEBUG(ctx->logger, "Create " << div_layer->getName() << " for calculate_output_size");
auto div_itensor = div_layer->getOutput(0);
- auto add_layer = add_elementwise(ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
+ auto add_layer = add_elementwise(
+ ctx, nvinfer1::ElementWiseOperation::kSUM, div_itensor, one_itensor, "get_slice_size sum layer for " + name);
TORCHTRT_CHECK(add_layer, "Unable to create add layer in calculate_output_size");
LOG_DEBUG(ctx->logger, "Create " << add_layer->getName() << " for calculate_output_size");
auto size_itensor = add_layer->getOutput(0);
diff --git a/workspace/core/conversion/converters/impl/select.cpp b/tmp/changes.txt
index 3599ab9..d33f09a 100644
--- a/workspace/core/conversion/converters/impl/select.cpp
+++ b/tmp/changes.txt
@@ -103,121 +103,118 @@ nvinfer1::ITensor* roll(
auto select_registrations TORCHTRT_UNUSED =
RegisterNodeConversionPatterns()
- .pattern(
- {"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
- [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
- auto in = args[0].ITensorOrFreeze(ctx);
- auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
- auto dim = args[1].unwrapToInt();
- // Handle negative axis by refering to nbDims of input Tensor
- dim = dim < 0 ? dim + maxDim : dim;
- auto ind = (int32_t)args[2].unwrapToInt();
- // Along the specified dimension, handle negative index by subtracting along length of dimension.
- ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
- LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
- LOG_DEBUG("Dimension to select: " << dim);
- LOG_DEBUG("Index: " << ind);
-
- // index to access needs to be an at::Tensor
- at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
- auto const_out = tensor_to_const(ctx, indices);
-
- // IGatherLayer takes in input tensor, the indices, and the axis
- // of input tensor to take indices from
- auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
- TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
- auto out = gather_layer->getOutput(0);
+ .pattern({"aten::select.int(Tensor(a) self, int dim, int index) -> (Tensor(a))",
+ [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+ auto in = args[0].ITensorOrFreeze(ctx);
+ auto maxDim = static_cast<int64_t>(in->getDimensions().nbDims);
+ auto dim = args[1].unwrapToInt();
+ // Handle negative axis by refering to nbDims of input Tensor
+ dim = dim < 0 ? dim + maxDim : dim;
+ auto ind = (int32_t)args[2].unwrapToInt();
+ // Along the specified dimension, handle negative index by subtracting along length of dimension.
+ ind = ind < 0 ? ind + in->getDimensions().d[dim] : ind;
+ LOG_DEBUG("Gather input dimensions: " << in->getDimensions());
+ LOG_DEBUG("Dimension to select: " << dim);
+ LOG_DEBUG("Index: " << ind);
+
+ // index to access needs to be an at::Tensor
+ at::Tensor indices = torch::tensor({ind}).to(torch::kI32);
+ auto const_out = tensor_to_const(ctx, indices);
+
+ // IGatherLayer takes in input tensor, the indices, and the axis
+ // of input tensor to take indices from
+ auto gather_layer = ctx->net->addGather(*in, *const_out, dim);
+ TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+ auto out = gather_layer->getOutput(0);
+
+ LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+
+ if (out->getDimensions().nbDims != 1) {
+ // IShuffleLayer removes redundant dimensions
+ auto shuffle_layer = ctx->net->addShuffle(*out);
+ TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+ shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
+ shuffle_layer->setName(util::node_info(n).c_str());
+ out = shuffle_layer->getOutput(0);
+ }
+
+ out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+
+ LOG_DEBUG("Output tensor shape: " << out->getDimensions());
- LOG_DEBUG("Gather tensor shape: " << out->getDimensions());
+ return true;
+ }})
+ .pattern({"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
+ [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+ auto in = args[0].ITensor();
+ auto axis = args[1].unwrapToInt();
+ auto start = (int32_t)args[2].unwrapToInt();
+ auto length = (int32_t)args[3].unwrapToInt();
- if (out->getDimensions().nbDims != 1) {
- // IShuffleLayer removes redundant dimensions
- auto shuffle_layer = ctx->net->addShuffle(*out);
- TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
- shuffle_layer->setReshapeDimensions(util::squeezeDims(out->getDimensions(), dim));
- shuffle_layer->setName(util::node_info(n).c_str());
- out = shuffle_layer->getOutput(0);
- }
+ // index to access needs to be an at::Tensor
+ at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
+ auto weights = Weights(ctx, indices);
- out = ctx->AssociateValueAndTensor(n->outputs()[0], out);
+ // IConstantLayer to convert indices from Weights to ITensor
+ auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+ TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+ auto const_out = const_layer->getOutput(0);
- LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+ // IGatherLayer takes in input tensor, the indices, and the axis
+ // of input tensor to take indices from
+ auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+ TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+ auto gather_out = gather_layer->getOutput(0);
- return true;
- }})
- .pattern(
- {"aten::narrow(Tensor(a) self, int dim, int start, int length) -> Tensor(a)",
- [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
- auto in = args[0].ITensor();
- auto axis = args[1].unwrapToInt();
- auto start = (int32_t)args[2].unwrapToInt();
- auto length = (int32_t)args[3].unwrapToInt();
-
- // index to access needs to be an at::Tensor
- at::Tensor indices = torch::arange(start, start + length, 1).to(torch::kI32);
- auto weights = Weights(ctx, indices);
-
- // IConstantLayer to convert indices from Weights to ITensor
- auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
- TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
- auto const_out = const_layer->getOutput(0);
-
- // IGatherLayer takes in input tensor, the indices, and the axis
- // of input tensor to take indices from
- auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
- TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
- auto gather_out = gather_layer->getOutput(0);
-
- // IShuffleLayer removes redundant dimensions
- auto shuffle_layer = ctx->net->addShuffle(*gather_out);
- TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
- shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
- shuffle_layer->setName(util::node_info(n).c_str());
- auto shuffle_out = shuffle_layer->getOutput(0);
+ // IShuffleLayer removes redundant dimensions
+ auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+ TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+ shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+ shuffle_layer->setName(util::node_info(n).c_str());
+ auto shuffle_out = shuffle_layer->getOutput(0);
- auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+ auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
- LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+ LOG_DEBUG("Output tensor shape: " << out->getDimensions());
- return true;
- }})
- .pattern(
- {"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
- [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
- auto in = args[0].ITensor();
- auto axis = args[1].unwrapToInt();
- torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
- int32_t startIdx = start.item().to<int32_t>();
- auto length = (int32_t)args[3].unwrapToInt();
-
- // index to access needs to be an at::Tensor
- at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
- auto weights = Weights(ctx, indices);
-
- // IConstantLayer to convert indices from Weights to ITensor
- auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
- TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
- auto const_out = const_layer->getOutput(0);
-
- // IGatherLayer takes in input tensor, the indices, and the axis
- // of input tensor to take indices from
- auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
- TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
- auto gather_out = gather_layer->getOutput(0);
-
- // IShuffleLayer removes redundant dimensions
- auto shuffle_layer = ctx->net->addShuffle(*gather_out);
- TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
- shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
- shuffle_layer->setName(util::node_info(n).c_str());
- auto shuffle_out = shuffle_layer->getOutput(0);
-
- auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
-
- LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+ return true;
+ }})
+ .pattern({"aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, int length) -> Tensor(a)",
+ [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+ auto in = args[0].ITensor();
+ auto axis = args[1].unwrapToInt();
+ torch::Tensor start = args[2].IValue()->toTensor().to(torch::kI32);
+ int32_t startIdx = start.item().to<int32_t>();
+ auto length = (int32_t)args[3].unwrapToInt();
+
+ // index to access needs to be an at::Tensor
+ at::Tensor indices = torch::arange(startIdx, startIdx + length, 1).to(torch::kI32);
+ auto weights = Weights(ctx, indices);
+
+ // IConstantLayer to convert indices from Weights to ITensor
+ auto const_layer = ctx->net->addConstant(weights.shape, weights.data);
+ TORCHTRT_CHECK(const_layer, "Unable to create constant layer from node: " << *n);
+ auto const_out = const_layer->getOutput(0);
+
+ // IGatherLayer takes in input tensor, the indices, and the axis
+ // of input tensor to take indices from
+ auto gather_layer = ctx->net->addGather(*in, *const_out, axis);
+ TORCHTRT_CHECK(gather_layer, "Unable to create gather layer from node: " << *n);
+ auto gather_out = gather_layer->getOutput(0);
+
+ // IShuffleLayer removes redundant dimensions
+ auto shuffle_layer = ctx->net->addShuffle(*gather_out);
+ TORCHTRT_CHECK(shuffle_layer, "Unable to create shuffle layer from node: " << *n);
+ shuffle_layer->setReshapeDimensions(util::unpadDims(gather_out->getDimensions()));
+ shuffle_layer->setName(util::node_info(n).c_str());
+ auto shuffle_out = shuffle_layer->getOutput(0);
+
+ auto out = ctx->AssociateValueAndTensor(n->outputs()[0], shuffle_out);
+
+ LOG_DEBUG("Output tensor shape: " << out->getDimensions());
- return true;
- }})
+ return true;
+ }})
.pattern(
{"aten::embedding(Tensor weight, Tensor indices, int padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -239,30 +236,29 @@ auto select_registrations TORCHTRT_UNUSED =
return true;
}})
- .pattern(
- {"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
- [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
- auto in = args[0].ITensor();
- auto shifts = args[1].unwrapToIntList().vec();
- auto dims = args[2].unwrapToIntList().vec();
-
- TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
- if (ctx->input_is_dynamic) {
- TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
- } else {
- auto in_shape = util::toVec(in->getDimensions());
- for (size_t i = 0; i < dims.size(); i++) {
- auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
- TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
- in = roll(ctx, in, shifts[i], dim, in_shape);
- }
- auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
-
- LOG_DEBUG("Output tensor shape: " << out->getDimensions());
-
- return true;
- }
- }})
+ .pattern({"aten::roll(Tensor self, int[1] shifts, int[1] dims=[]) -> (Tensor)",
+ [](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
+ auto in = args[0].ITensor();
+ auto shifts = args[1].unwrapToIntList().vec();
+ auto dims = args[2].unwrapToIntList().vec();
+
+ TORCHTRT_CHECK(dims.size() == shifts.size(), "dims.size() should be equal to shifts.size()");
+ if (ctx->input_is_dynamic) {
+ TORCHTRT_THROW_ERROR("aten::roll is currently not support in dynamic input shape compilation");
+ } else {
+ auto in_shape = util::toVec(in->getDimensions());
+ for (size_t i = 0; i < dims.size(); i++) {
+ auto dim = dims[i] < 0 ? (in_shape.size() + dims[i]) : dims[i];
+ TORCHTRT_CHECK(dim < in_shape.size(), "Dimension out of range");
+ in = roll(ctx, in, shifts[i], dim, in_shape);
+ }
+ auto out = ctx->AssociateValueAndTensor(n->outputs()[0], in);
+
+ LOG_DEBUG("Output tensor shape: " << out->getDimensions());
+
+ return true;
+ }
+ }})
.pattern(
{"aten::index.Tensor(Tensor self, Tensor?[] indices) -> (Tensor)",
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
@@ -319,7 +315,8 @@ auto select_registrations TORCHTRT_UNUSED =
int startIdx = 0;
auto startIdxIVal = args[2].IValue();
if (!startIdxIVal->isNone()) {
- startIdx = startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
+ startIdx =
+ startIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : startIdxIVal->toInt();
startIdx = maxDim == -1 ? startIdx : std::min(startIdx, maxDim);
}
// Handle case when given tensor index is negative
@@ -331,7 +328,8 @@ auto select_registrations TORCHTRT_UNUSED =
int endIdx = maxDim; // -1 for dynamic shape
auto endIdxIVal = args[3].IValue();
if (!endIdxIVal->isNone()) {
- int truncate_value = endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
+ int truncate_value =
+ endIdxIVal->toInt() > std::numeric_limits<int32_t>::max() ? maxDim : endIdxIVal->toInt();
endIdx = maxDim == -1 ? truncate_value : std::min(truncate_value, maxDim);
}
if (maxDim > 0) {
@@ -385,7 +383,8 @@ auto select_registrations TORCHTRT_UNUSED =
// update start and end
nvinfer1::ITensor* out_start;
nvinfer1::ITensor* out_end;
- auto start_end = normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
+ auto start_end =
+ normalize_start_and_end(ctx, ishape_tensor, start_itensor, end_itensor, nbdims, node_name);
out_start = start_end[0];
out_end = start_end[1];
@@ -397,7 +396,7 @@ auto select_registrations TORCHTRT_UNUSED =
slice_layer->setInput(2, *size_itensor); // size, must be set if input is dynamic
}
auto slice_out = slice_layer->getOutput(0);
-
+
auto out = ctx->AssociateValueAndTensor(n->outputs()[0], slice_out);
LOG_DEBUG("Slice layer output shape: " << out->getDimensions());
diff --git a/workspace/core/conversion/converters/converter_util.h b/tmp/changes.txt
index cdf2ee5..b155499 100644
--- a/workspace/core/conversion/converters/converter_util.h
+++ b/tmp/changes.txt
@@ -1,8 +1,8 @@
#pragma once
+#include <limits>
#include <map>
#include <string>
-#include <limits>
#include "core/conversion/conversionctx/ConversionCtx.h"
#include "core/conversion/converters/Weights.h"
diff --git a/workspace/tests/core/conversion/converters/test_cast.cpp b/tmp/changes.txt
index 092cdb3..d26c7a0 100644
--- a/workspace/tests/core/conversion/converters/test_cast.cpp
+++ b/tmp/changes.txt
@@ -135,7 +135,6 @@ TEST(Converters, ATenBoolToINT32TensorConvertsCorrectly) {
ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}
-
TEST(Converters, ATenToSingleConvertsCorrectly) {
const auto graph = R"IR(
graph(%y.1 : Tensor):
@@ -164,7 +163,6 @@ TEST(Converters, ATenToSingleConvertsCorrectly) {
ASSERT_TRUE(torch_tensorrt::tests::util::almostEqual(jit_results[0], trt, 2e-6));
}
-
TEST(Converters, ATenTypeAsConvertsCorrectly) {
const auto graph = R"IR(
graph(%0 : Tensor,
diff --git a/workspace/cpp/bin/torchtrtc/main.cpp b/tmp/changes.txt
index 6c207d7..51ec2c5 100644
--- a/workspace/cpp/bin/torchtrtc/main.cpp
+++ b/tmp/changes.txt
@@ -117,8 +117,7 @@ int main(int argc, char** argv) {
parser, "num_iters", "Number of averaging timing iterations used to select kernels", {"num-avg-timing-iters"});
args::ValueFlag<uint64_t> workspace_size(
parser, "workspace_size", "Maximum size of workspace given to TensorRT", {"workspace-size"});
- args::ValueFlag<uint64_t> dla_sram_size(
- parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
+ args::ValueFlag<uint64_t> dla_sram_size(parser, "dla_sram_size", "DLA managed SRAM size", {"dla-sram-size"});
args::ValueFlag<uint64_t> dla_local_dram_size(
parser, "dla_local_dram_size", "DLA Local DRAM size", {"dla-local-dram-size"});
args::ValueFlag<uint64_t> dla_global_dram_size(
ERROR: Some files do not conform to style guidelines
Description
Upgrade TRT to 8.4
Type of change
Checklist: