-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathTensorRTModel.cpp
103 lines (83 loc) · 2.89 KB
/
TensorRTModel.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#include "TensorRTModel.hpp"
class Logger : public nvinfer1::ILogger
{
void log(Severity severity, const char* msg) noexcept override
{
// suppress info-level messages
if (severity != Severity::kINFO)
std::cout << msg << std::endl;
}
} gLogger;
TensorRTModel::TensorRTModel()
{
printf("TensorRTModel initialized!\n");
}
TensorRTModel::~TensorRTModel()
{
delete engine;
delete runtime;
}
std::string readBufferFromFile(const std::string& path)
{
std::ifstream file(path, std::ios::binary);
return { std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>() };
}
std::vector<char> readEngine(const std::string& enginePath)
{
std::ifstream engineFile(enginePath, std::ios::binary);
return std::vector<char>((std::istreambuf_iterator<char>(engineFile)), std::istreambuf_iterator<char>());
}
void TensorRTModel::loadModel(const char *modelPath)
{
std::cout << "TensorRT version: "
<< NV_TENSORRT_MAJOR << "."
<< NV_TENSORRT_MINOR << "."
<< NV_TENSORRT_PATCH << "."
<< NV_TENSORRT_BUILD << std::endl;
// Create a TensorRT runtime
if (runtime != nullptr) {
printf("Runtime already initialized\n");
return;
}
runtime = nvinfer1::createInferRuntime(gLogger);
std::vector<char> engineData = readEngine(modelPath);
if (engine != nullptr) {
printf("Engine already initialized\n");
return;
}
engine = runtime->deserializeCudaEngine(engineData.data(), engineData.size());
engineData.clear();
if (engine == nullptr) {
printf("Error loading engine\n");
return;
}
}
void TensorRTModel::predict(unsigned char *image, int height, int width, int channels)
{
if (engine == nullptr) {
printf("Engine not initialized\n");
return;
}
printf("Running TensorRT inference on GPU\n");
// Allocate GPU memory for the input and output buffers
float* gpu_input;
float* gpu_output;
cudaMalloc((void**)&gpu_input, sizeof(float) * height * width * channels);
cudaMalloc((void**)&gpu_output, sizeof(float) * height * width * channels);
// Create an execution context
nvinfer1::IExecutionContext* context = engine->createExecutionContext();
// Set the input and output buffers for the execution context
void* buffers[2] = { gpu_input, gpu_output };
// Perform inference
context->execute(1, buffers);
// Copy the output data to the CPU memory
float* cpu_output = new float[height * width * channels];
cudaMemcpy(cpu_output, gpu_output, sizeof(float) * height * width * channels, cudaMemcpyDeviceToHost);
// Clean up
//!TODO: This is possibly the image output from the model so it has to be returned
delete[] cpu_output;
cudaFree(gpu_input);
cudaFree(gpu_output);
delete context;
printf("TensorRT inference done!\n");
}