升级到 TensorRT 7 后,加载或构建 cuda 引擎偶尔会崩溃
Loading or building cuda engine crashes occassionaly after upgrading to TensorRT 7
我正在尝试 运行 C++ 中的 TensorRT 推理。有时,在尝试构建新引擎或从文件加载引擎时,代码会崩溃。它偶尔会发生(有时 运行s 没有任何问题)。我按照以下步骤准备网络:
initLibNvInferPlugins(&gLogger.getTRTLogger(), "");
if (mParams.loadEngine.size() > 0)
{
std::vector<char> trtModelStream;
size_t size{0};
std::ifstream file(mParams.loadEngine, std::ios::binary);
if (file.good())
{
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream.resize(size);
file.read(trtModelStream.data(), size);
file.close();
}
IRuntime* infer_Runtime = nvinfer1::createInferRuntime(gLogger);
if (mParams.dlaCore >= 0)
{
infer_Runtime->setDLACore(mParams.dlaCore);
}
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
infer_Runtime->deserializeCudaEngine(trtModelStream.data(), size, nullptr), samplesCommon::InferDeleter());
gLogInfo << "TRT Engine loaded from: " << mParams.loadEngine << endl;
infer_Runtime->destroy();
if (!mEngine)
{
return false;
}
else
{
return true;
}
}
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(gLogger.getTRTLogger()));
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
auto parser = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, gLogger.getTRTLogger()));
mEngine = nullptr;
parser->parseFromFile(
locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast<int>(gLogger.getReportableSeverity()));
// Calibrator life time needs to last until after the engine is built.
std::unique_ptr<IInt8Calibrator> calibrator;
config->setAvgTimingIterations(1);
config->setMinTimingIterations(1);
config->setMaxWorkspaceSize(4_GiB);
builder->setMaxBatchSize(mParams.batchSize);
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
这里出现错误:
[05/12/2021-16:46:42] [I] [TRT] Detected 1 inputs and 1 output network tensors.
16:46:42: The program has unexpectedly finished.
加载现有引擎时此行崩溃:
mEngine = std::shared_ptr<nvinfer1::ICudaEngine(
infer_Runtime->deserializeCudaEngine(trtModelStream.data(), size, nullptr), samplesCommon::InferDeleter());
或者在构建引擎时:
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
更多信息:
TensorRT 7.2.3
Ubuntu 18.04
cuDNN 8.1.1
CUDA 11.1 update1
ONNX 1.6.0
Pytorch 1.5.0
终于明白了!
我重写了 CMake.txt 并添加了所有必需的库和路径并删除了重复的库和路径。这可能是 cuBLAS 中的库冲突。
我正在尝试 运行 C++ 中的 TensorRT 推理。有时,在尝试构建新引擎或从文件加载引擎时,代码会崩溃。它偶尔会发生(有时 运行s 没有任何问题)。我按照以下步骤准备网络:
initLibNvInferPlugins(&gLogger.getTRTLogger(), "");
if (mParams.loadEngine.size() > 0)
{
std::vector<char> trtModelStream;
size_t size{0};
std::ifstream file(mParams.loadEngine, std::ios::binary);
if (file.good())
{
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream.resize(size);
file.read(trtModelStream.data(), size);
file.close();
}
IRuntime* infer_Runtime = nvinfer1::createInferRuntime(gLogger);
if (mParams.dlaCore >= 0)
{
infer_Runtime->setDLACore(mParams.dlaCore);
}
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
infer_Runtime->deserializeCudaEngine(trtModelStream.data(), size, nullptr), samplesCommon::InferDeleter());
gLogInfo << "TRT Engine loaded from: " << mParams.loadEngine << endl;
infer_Runtime->destroy();
if (!mEngine)
{
return false;
}
else
{
return true;
}
}
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(gLogger.getTRTLogger()));
const auto explicitBatch = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(explicitBatch));
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
auto parser = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, gLogger.getTRTLogger()));
mEngine = nullptr;
parser->parseFromFile(
locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), static_cast<int>(gLogger.getReportableSeverity()));
// Calibrator life time needs to last until after the engine is built.
std::unique_ptr<IInt8Calibrator> calibrator;
config->setAvgTimingIterations(1);
config->setMinTimingIterations(1);
config->setMaxWorkspaceSize(4_GiB);
builder->setMaxBatchSize(mParams.batchSize);
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
这里出现错误:
[05/12/2021-16:46:42] [I] [TRT] Detected 1 inputs and 1 output network tensors.
16:46:42: The program has unexpectedly finished.
加载现有引擎时此行崩溃:
mEngine = std::shared_ptr<nvinfer1::ICudaEngine(
infer_Runtime->deserializeCudaEngine(trtModelStream.data(), size, nullptr), samplesCommon::InferDeleter());
或者在构建引擎时:
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
builder->buildEngineWithConfig(*network, *config), samplesCommon::InferDeleter());
更多信息:
TensorRT 7.2.3
Ubuntu 18.04
cuDNN 8.1.1
CUDA 11.1 update1
ONNX 1.6.0
Pytorch 1.5.0
终于明白了! 我重写了 CMake.txt 并添加了所有必需的库和路径并删除了重复的库和路径。这可能是 cuBLAS 中的库冲突。