测试算法库程序

main
熊继淙 1 year ago
parent c105f6f3cd
commit 88d37b5034

@ -0,0 +1,265 @@
// TestTRTInterDll.cpp : 此文件包含 "main" 函数。程序执行将在此处开始并结束。
//
#include <iostream>
#include <filesystem>
#include <iostream>
#include <string>
#include "utils.h"
#include "MI_Interface.h"
#include <future>
namespace fs = std::filesystem;
bool buffer2Mat(const MN_VisionImage::MS_ImageParam& _inImg, cv::Mat& _mat);
void test01()
{
std::vector<std::string> imagePath;
imagePath.emplace_back("./imageDatas/daisy.jpg");
imagePath.emplace_back("./imageDatas/dandelion.jpg");
imagePath.emplace_back("./imageDatas/sunflower.jpg");
imagePath.emplace_back("./imageDatas/tulip.jpg");
std::vector<cv::Mat> matImgs;
for (auto _var : imagePath)
{
matImgs.emplace_back(cv::imread(_var, 1));
}
utils::InitParameter params;
params.m_modelType = utils::ME_ModelType::E_RESNET34;
params.class_names = utils::dataSets::flower_labels;
//initParameters.class_names = utils::dataSets::voc20;
params.num_class = 5; // for flower_labels
//initParameters.num_class = 20; // for voc2012
params.batch_size = 1;
params.dst_h = 224;
params.dst_w = 224;
memcpy(&params.mImage.m_data, matImgs[0].data, sizeof(matImgs[0].rows * matImgs[0].cols * matImgs[0].channels()));
params.mImage.m_height = matImgs[0].rows;
params.mImage.m_width = matImgs[0].cols;
params.mImage.m_channels = 3;
params.input_output_names = { "input", "output" };
params.conf_thresh = 0.25f;
params.iou_thresh = 0.45f;
params.save_path = "./imageDatas";
params.meanVec = { 0.406, 0.456, 0.485 };
params.stdVec = { 0.225, 0.224, 0.229 };
MI_VisionInterface* resnet34Ptr = getInterfacePtr(params);
if (!resnet34Ptr)
{
return;
}
utils::InitParameter params1;
params1.m_modelType = utils::ME_ModelType::E_RESNET50;
params1.class_names = utils::dataSets::flower_labels;
//initParameters.class_names = utils::dataSets::voc20;
params1.num_class = 5; // for flower_labels
//initParameters.num_class = 20; // for voc2012
params1.batch_size = 1;
params1.dst_h = 224;
params1.dst_w = 224;
memcpy(&params1.mImage.m_data, matImgs[0].data, sizeof(matImgs[0].rows * matImgs[0].cols * matImgs[0].channels()));
params1.mImage.m_height = matImgs[0].rows;
params1.mImage.m_width = matImgs[0].cols;
params1.mImage.m_channels = 3;
params1.input_output_names = { "input", "output" };
params1.conf_thresh = 0.25f;
params1.iou_thresh = 0.45f;
params1.save_path = "./imageDatas";
params1.meanVec = { 0.406, 0.456, 0.485 };
params1.stdVec = { 0.225, 0.224, 0.229 };
MI_VisionInterface* resnet50Ptr = getInterfacePtr(params1);
if (!resnet50Ptr)
{
return;
}
// 线程测试,初始化在一个线程,推理在另外一个线程内
std::string onnxFile = "./imageDatas/resnet34_0407.onnx";
std::string onnxFile1 = "./imageDatas/resnet50.onnx";
#if 0
bool bRet = false;
bRet = resnetPtr->initEngine(onnxFile);
if (!bRet)
{
return;
}
bRet = resnetPtr->check();
if (!bRet)
{
return;
}
std::vector<MR_Result> detectResVec;
bRet = resnetPtr->doTRTInfer(matImgs, &detectResVec, nullptr);
if (!bRet)
{
return;
}
#endif // 0
// 初始化线程
auto initRes = std::async(std::launch::async, [&] {
bool bRet = false;
// 加载第一个模型初始化并推理
bRet = resnet34Ptr->initEngine(onnxFile);
if (!bRet)
{
return false;
}
// 加载第二个模型初始化并推理
bRet = resnet50Ptr->initEngine(onnxFile1);
if (!bRet)
{
return false;
}
return true;
});
initRes.wait();
auto initRes1 = std::async(std::launch::async, [&] {
bool bRet = false;
bRet = resnet34Ptr->check();
if (!bRet)
{
return false;
}
std::vector<utils::MR_Result> detectResVec1;
bRet = resnet34Ptr->doTRTInfer(matImgs, &detectResVec1, nullptr);
if (!bRet)
{
return false;
}
bRet = resnet50Ptr->check();
if (!bRet)
{
return false;
}
std::vector<utils::MR_Result> detectResVec2;
bRet = resnet50Ptr->doTRTInfer(matImgs, &detectResVec2, nullptr);
if (!bRet)
{
return false;
}
return true;
});
//initRes1.wait();
auto initRes2 = std::async(std::launch::async, [&] {
bool bRet = false;
bRet = resnet34Ptr->check();
if (!bRet)
{
return false;
}
std::vector<utils::MR_Result> detectResVec1;
bRet = resnet34Ptr->doTRTInfer(matImgs, &detectResVec1, nullptr);
if (!bRet)
{
return false;
}
bRet = resnet50Ptr->check();
if (!bRet)
{
return false;
}
std::vector<utils::MR_Result> detectResVec2;
bRet = resnet50Ptr->doTRTInfer(matImgs, &detectResVec2, nullptr);
if (!bRet)
{
return false;
}
return true;
});
//initRes2.wait();
}
void test02()
{
cv::Mat img = cv::imread("./imageDatas/daisy.jpg", 1);
MN_VisionImage::MS_ImageParam bufImg((uchar*)img.data, img.cols, img.rows, MN_VisionImage::ME_ImageType::E_RGB);
cv::Mat convertImg;
bool bRet = buffer2Mat(bufImg, convertImg);
cv::imshow("src", img);
cv::imshow("image", convertImg);
cv::waitKey(0);
}
int main()
{
test01();
system("pause");
return 0;
}
// 运行程序: Ctrl + F5 或调试 >“开始执行(不调试)”菜单
// 调试程序: F5 或调试 >“开始调试”菜单
bool buffer2Mat(const MN_VisionImage::MS_ImageParam& _inImg, cv::Mat& _mat)
{
uchar* pBuf = _inImg.m_data.get(); //获取图像数据首地址
int nW = _inImg.m_width;
int nH = _inImg.m_height;
int nChannel = _inImg.m_channels;
if (pBuf == nullptr || nW <= 1 || nH <= 1)
{
// LOG_ERROR("convert buffer to mat, input image error. \n");
return false;
}
if (_inImg.mImgType == MN_VisionImage::ME_ImageType::E_GRAY)
{
_mat = cv::Mat(nH, nW, CV_8UC1, pBuf);
}
else if (_inImg.mImgType == MN_VisionImage::ME_ImageType::E_RGBA)
{
_mat = cv::Mat(nH, nW, CV_8UC4, pBuf);
}
else
{
_mat = cv::Mat(nH, nW, CV_8UC3, pBuf);
}
if (_mat.data == nullptr || _mat.cols <= 1 || _mat.rows <= 1)
{
// LOG_ERROR("convert buffer to mat, convert image failed. \n");
return false;
}
return true;
}
// 0.3 * 0.3 - 12寸晶圆 一次性识别9颗 -- 测试推理时间
/*
1.
2. --/
3. --
*/

@ -0,0 +1,147 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{3e72c625-2f8b-4fb6-aa05-70a5c3a44bb9}</ProjectGuid>
<RootNamespace>TestTRTInterDll</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>MultiByte</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="cuda11_6_Release_x64.props" />
<Import Project="tensorrt_860_release_x64.props" />
<Import Project="vs2019-opencv-release-X64.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>false</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<LanguageStandard>stdcpp17</LanguageStandard>
<LanguageStandard_C>Default</LanguageStandard_C>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>false</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>.\trtinfer_lib\include;.\trtinfer_lib\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<Optimization>Disabled</Optimization>
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
<LanguageStandard>stdcpp17</LanguageStandard>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>.\trtinfer_lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>.\trtinfer_lib\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="TestTRTInterDll.cpp" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

@ -0,0 +1,22 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="源文件">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="头文件">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="资源文件">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="TestTRTInterDll.cpp">
<Filter>源文件</Filter>
</ClCompile>
</ItemGroup>
</Project>

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\lib\x64\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

@ -0,0 +1,5 @@
daisy
dandelion
rose
sunflower
tulip

File diff suppressed because it is too large Load Diff

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 256 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

@ -0,0 +1,590 @@
[2024-04-24 14:10:05.326] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 14:10:18.478] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:10:20.401] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:10:21.501] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:10:23.149] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:10:24.470] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:10:29.415] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:10:29.426] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:10:29.503] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:10:29.516] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:10:29.517] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:10:29.518] <thread 28644> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:10:54.633] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:10:54.634] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:10:54.634] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:10:54.634] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:10:54.635] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:10:54.636] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:11:03.643] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:11:03.643] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:11:03.643] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:11:03.643] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:11:03.644] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:11:03.645] <thread 22540> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:21:13.820] <thread 26732> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 14:21:16.855] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:21:20.801] <thread 26732> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:21:30.715] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:21:47.950] <thread 26732> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:21:56.426] <thread 26732> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:22:20.751] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:22:20.751] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:22:20.752] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:22:20.752] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:22:20.752] <thread 8620> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:23:23.249] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 14:23:27.717] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:23:29.141] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:23:30.553] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:23:37.512] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:23:37.512] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:23:37.513] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:23:37.513] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:23:37.516] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:23:37.517] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:24:11.946] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:24:11.946] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:24:11.947] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:24:11.947] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:24:11.949] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:24:11.950] <thread 4208> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:30:58.451] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 14:31:00.901] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:31:03.057] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:31:04.749] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:31:06.295] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:31:06.296] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:31:06.296] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:31:06.297] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:31:06.298] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:06.298] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:06.299] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:31:06.299] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:31:06.301] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:06.302] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:06.303] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:31:06.305] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:31:29.839] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:31:29.839] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:31:29.839] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:31:29.840] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:29.840] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:31:29.840] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:31:29.840] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:29.841] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:29.841] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:31:29.842] <thread 5748> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:31:29.842] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:31:34.113] <thread 11120> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:34:44.910] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 14:34:46.422] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:34:47.848] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 14:34:49.635] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 14:34:50.939] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:34:50.943] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:34:50.944] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:34:50.943] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:34:50.945] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:34:50.946] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:34:50.946] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:34:50.946] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:34:50.946] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:34:50.947] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:34:50.947] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:34:50.948] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:35:33.809] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:36:17.884] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:36:17.884] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 14:36:17.885] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 14:36:17.885] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:36:17.886] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:36:17.886] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:36:17.886] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:36:17.886] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 14:36:17.887] <thread 27336> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 14:36:43.975] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
[2024-04-24 14:36:49.419] <thread 26484> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 15:08:22.000] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
[2024-04-24 15:08:23.646] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 15:08:32.437] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
[2024-04-24 15:08:34.620] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
trt model has existed.
[2024-04-24 15:08:37.449] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 15:08:37.451] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 15:08:37.452] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 15:08:37.451] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 15:08:37.454] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:08:37.455] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:08:37.456] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 15:08:37.455] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 15:08:37.456] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:08:37.457] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:08:37.458] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 15:08:37.459] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 15:08:57.068] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 15:08:58.514] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 15:09:05.783] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:09:05.784] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 15:09:05.784] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:09:05.785] <thread 20500> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
The context's info:
[2024-04-24 15:09:16.908] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
The engine's info:
[2024-04-24 15:09:16.909] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 0, input
[2024-04-24 15:09:16.910] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:09:16.910] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
idx = 1, output
[2024-04-24 15:09:16.911] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
[2024-04-24 15:09:16.912] <thread 21616> [info]
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
The context's info:

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup>
<Link>
<AdditionalLibraryDirectories>..\MF_TRTInfer\lib\tensorrt_lib\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>..\MF_TRTInfer\lib\tensorrt_lib\lib\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>..\MF_TRTInfer\lib\tensorrt_lib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemGroup />
</Project>

@ -0,0 +1,381 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef BATCH_STREAM_H
#define BATCH_STREAM_H
#include "NvInfer.h"
#include "common.h"
#include <algorithm>
#include <stdio.h>
#include <vector>
class IBatchStream
{
public:
virtual void reset(int firstBatch) = 0;
virtual bool next() = 0;
virtual void skip(int skipCount) = 0;
virtual float* getBatch() = 0;
virtual float* getLabels() = 0;
virtual int getBatchesRead() const = 0;
virtual int getBatchSize() const = 0;
virtual nvinfer1::Dims getDims() const = 0;
};
class MNISTBatchStream : public IBatchStream
{
public:
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, const std::string& labelsFile,
const std::vector<std::string>& directories)
: mBatchSize{batchSize}
, mMaxBatches{maxBatches}
, mDims{3, {1, 28, 28}} //!< We already know the dimensions of MNIST images.
{
readDataFile(locateFile(dataFile, directories));
readLabelsFile(locateFile(labelsFile, directories));
}
void reset(int firstBatch) override
{
mBatchCount = firstBatch;
}
bool next() override
{
if (mBatchCount >= mMaxBatches)
{
return false;
}
++mBatchCount;
return true;
}
void skip(int skipCount) override
{
mBatchCount += skipCount;
}
float* getBatch() override
{
return mData.data() + (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
}
float* getLabels() override
{
return mLabels.data() + (mBatchCount * mBatchSize);
}
int getBatchesRead() const override
{
return mBatchCount;
}
int getBatchSize() const override
{
return mBatchSize;
}
nvinfer1::Dims getDims() const override
{
return nvinfer1::Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
}
private:
void readDataFile(const std::string& dataFilePath)
{
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
int magicNumber, numImages, imageH, imageW;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2051 && "Magic Number does not match the expected value for an MNIST image set");
// Read number of images and dimensions
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
numImages = samplesCommon::swapEndianness(numImages);
imageH = samplesCommon::swapEndianness(imageH);
imageW = samplesCommon::swapEndianness(imageW);
// The MNIST data is made up of unsigned bytes, so we need to cast to float and normalize.
int numElements = numImages * imageH * imageW;
std::vector<uint8_t> rawData(numElements);
file.read(reinterpret_cast<char*>(rawData.data()), numElements * sizeof(uint8_t));
mData.resize(numElements);
std::transform(
rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast<float>(val) / 255.f; });
}
void readLabelsFile(const std::string& labelsFilePath)
{
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
int magicNumber, numImages;
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
// All values in the MNIST files are big endian.
magicNumber = samplesCommon::swapEndianness(magicNumber);
ASSERT(magicNumber == 2049 && "Magic Number does not match the expected value for an MNIST labels file");
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
numImages = samplesCommon::swapEndianness(numImages);
std::vector<uint8_t> rawLabels(numImages);
file.read(reinterpret_cast<char*>(rawLabels.data()), numImages * sizeof(uint8_t));
mLabels.resize(numImages);
std::transform(
rawLabels.begin(), rawLabels.end(), mLabels.begin(), [](uint8_t val) { return static_cast<float>(val); });
}
int mBatchSize{0};
int mBatchCount{0}; //!< The batch that will be read on the next invocation of next()
int mMaxBatches{0};
nvinfer1::Dims mDims{};
std::vector<float> mData{};
std::vector<float> mLabels{};
};
class BatchStream : public IBatchStream
{
public:
BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::string const& suffix,
std::vector<std::string> const& directories)
: mBatchSize(batchSize)
, mMaxBatches(maxBatches)
, mPrefix(prefix)
, mSuffix(suffix)
, mDataDir(directories)
{
std::ifstream file(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), std::ios::binary);
ASSERT(file.good());
int d[4];
file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
mDims.nbDims = 4; // The number of dimensions.
mDims.d[0] = d[0]; // Batch Size
mDims.d[1] = d[1]; // Channels
mDims.d[2] = d[2]; // Height
mDims.d[3] = d[3]; // Width
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0);
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::vector<std::string> const& directories)
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories)
{
}
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims const& dims, std::string const& listFile,
std::vector<std::string> const& directories)
: mBatchSize(batchSize)
, mMaxBatches(maxBatches)
, mDims(dims)
, mListFile(listFile)
, mDataDir(directories)
{
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
mBatch.resize(mBatchSize * mImageSize, 0);
mLabels.resize(mBatchSize, 0);
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
mFileLabels.resize(mDims.d[0], 0);
reset(0);
}
// Resets data members
void reset(int firstBatch) override
{
mBatchCount = 0;
mFileCount = 0;
mFileBatchPos = mDims.d[0];
skip(firstBatch);
}
// Advance to next batch and return true, or return false if there is no batch left.
bool next() override
{
if (mBatchCount == mMaxBatches)
{
return false;
}
for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
{
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
if (mFileBatchPos == mDims.d[0] && !update())
{
return false;
}
// copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
std::copy_n(
getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
}
mBatchCount++;
return true;
}
// Skips the batches
void skip(int skipCount) override
{
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0])
{
mFileCount += skipCount * mBatchSize / mDims.d[0];
return;
}
int x = mBatchCount;
for (int i = 0; i < skipCount; i++)
{
next();
}
mBatchCount = x;
}
float* getBatch() override
{
return mBatch.data();
}
float* getLabels() override
{
return mLabels.data();
}
int getBatchesRead() const override
{
return mBatchCount;
}
int getBatchSize() const override
{
return mBatchSize;
}
nvinfer1::Dims getDims() const override
{
return mDims;
}
private:
float* getFileBatch()
{
return mFileBatch.data();
}
float* getFileLabels()
{
return mFileLabels.data();
}
bool update()
{
if (mListFile.empty())
{
std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
std::ifstream file(inputFileName.c_str(), std::ios::binary);
if (!file)
{
return false;
}
int d[4];
file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]);
file.read(reinterpret_cast<char*>(getFileBatch()), sizeof(float) * mDims.d[0] * mImageSize);
file.read(reinterpret_cast<char*>(getFileLabels()), sizeof(float) * mDims.d[0]);
}
else
{
std::vector<std::string> fNames;
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
if (!file)
{
return false;
}
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
file.seekg(((mBatchCount * mBatchSize)) * 7);
for (int i = 1; i <= mBatchSize; i++)
{
std::string sName;
std::getline(file, sName);
sName = sName + ".ppm";
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
fNames.emplace_back(sName);
}
mFileCount++;
const int imageC = 3;
const int imageH = 300;
const int imageW = 300;
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(fNames.size());
for (uint32_t i = 0; i < fNames.size(); ++i)
{
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
}
std::vector<float> data(samplesCommon::volume(mDims));
const float scale = 2.0 / 255.0;
const float bias = 1.0;
long int volChl = mDims.d[2] * mDims.d[3];
// Normalize input data
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i)
{
for (int c = 0; c < mDims.d[1]; ++c)
{
for (int j = 0; j < volChl; ++j)
{
data[i * volImg + c * volChl + j] = scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
}
}
}
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
}
mFileBatchPos = 0;
return true;
}
int mBatchSize{0};
int mMaxBatches{0};
int mBatchCount{0};
int mFileCount{0};
int mFileBatchPos{0};
int mImageSize{0};
std::vector<float> mBatch; //!< Data for the batch
std::vector<float> mLabels; //!< Labels for the batch
std::vector<float> mFileBatch; //!< List of image files
std::vector<float> mFileLabels; //!< List of label files
std::string mPrefix; //!< Batch file name prefix
std::string mSuffix; //!< Batch file name suffix
nvinfer1::Dims mDims; //!< Input dimensions
std::string mListFile; //!< File name of the list of image names
std::vector<std::string> mDataDir; //!< Directories where the files can be found
};
#endif

@ -0,0 +1,136 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ENTROPY_CALIBRATOR_H
#define ENTROPY_CALIBRATOR_H
#include "BatchStream.h"
#include "NvInfer.h"
//! \class EntropyCalibratorImpl
//!
//! \brief Implements common functionality for Entropy calibrators.
//!
template <typename TBatchStream>
class EntropyCalibratorImpl
{
public:
EntropyCalibratorImpl(TBatchStream const& stream, int firstBatch, std::string const& networkName,
const char* inputBlobName, bool readCache = true)
: mStream{stream}
, mCalibrationTableName("CalibrationTable" + networkName)
, mInputBlobName(inputBlobName)
, mReadCache(readCache)
{
nvinfer1::Dims dims = mStream.getDims();
mInputCount = samplesCommon::volume(dims);
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
mStream.reset(firstBatch);
}
virtual ~EntropyCalibratorImpl()
{
CHECK(cudaFree(mDeviceInput));
}
int getBatchSize() const noexcept
{
return mStream.getBatchSize();
}
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept
{
if (!mStream.next())
{
return false;
}
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
ASSERT(!strcmp(names[0], mInputBlobName));
bindings[0] = mDeviceInput;
return true;
}
const void* readCalibrationCache(size_t& length) noexcept
{
mCalibrationCache.clear();
std::ifstream input(mCalibrationTableName, std::ios::binary);
input >> std::noskipws;
if (mReadCache && input.good())
{
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
std::back_inserter(mCalibrationCache));
}
length = mCalibrationCache.size();
return length ? mCalibrationCache.data() : nullptr;
}
void writeCalibrationCache(const void* cache, size_t length) noexcept
{
std::ofstream output(mCalibrationTableName, std::ios::binary);
output.write(reinterpret_cast<const char*>(cache), length);
}
private:
TBatchStream mStream;
size_t mInputCount;
std::string mCalibrationTableName;
const char* mInputBlobName;
bool mReadCache{true};
void* mDeviceInput{nullptr};
std::vector<char> mCalibrationCache;
};
//! \class Int8EntropyCalibrator2
//!
//! \brief Implements Entropy calibrator 2.
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
//!
template <typename TBatchStream>
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
{
public:
Int8EntropyCalibrator2(TBatchStream const& stream, int32_t firstBatch, const char* networkName,
const char* inputBlobName, bool readCache = true)
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache)
{
}
int getBatchSize() const noexcept override
{
return mImpl.getBatchSize();
}
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override
{
return mImpl.getBatch(bindings, names, nbBindings);
}
const void* readCalibrationCache(size_t& length) noexcept override
{
return mImpl.readCalibrationCache(length);
}
void writeCalibrationCache(const void* cache, size_t length) noexcept override
{
mImpl.writeCalibrationCache(cache, length);
}
private:
EntropyCalibratorImpl<TBatchStream> mImpl;
};
#endif // ENTROPY_CALIBRATOR_H

@ -0,0 +1,138 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ERROR_RECORDER_H
#define ERROR_RECORDER_H
#include "NvInferRuntimeCommon.h"
#include "logger.h"
#include <atomic>
#include <cstdint>
#include <exception>
#include <mutex>
#include <vector>
using nvinfer1::IErrorRecorder;
using nvinfer1::ErrorCode;
//!
//! A simple implementation of the IErrorRecorder interface for
//! use by samples. This interface also can be used as a reference
//! implementation.
//! The sample Error recorder is based on a vector that pairs the error
//! code and the error string into a single element. It also uses
//! standard mutex's and atomics in order to make sure that the code
//! works in a multi-threaded environment.
//!
class SampleErrorRecorder : public IErrorRecorder
{
using errorPair = std::pair<ErrorCode, std::string>;
using errorStack = std::vector<errorPair>;
public:
SampleErrorRecorder() = default;
~SampleErrorRecorder() noexcept override {}
int32_t getNbErrors() const noexcept final
{
return mErrorStack.size();
}
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final
{
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first;
};
IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final
{
return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str();
}
// This class can never overflow since we have dynamic resize via std::vector usage.
bool hasOverflowed() const noexcept final
{
return false;
}
// Empty the errorStack.
void clear() noexcept final
{
try
{
// grab a lock so that there is no addition while clearing.
std::lock_guard<std::mutex> guard(mStackLock);
mErrorStack.clear();
}
catch (const std::exception& e)
{
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
};
//! Simple helper function that
bool empty() const noexcept
{
return mErrorStack.empty();
}
bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final
{
try
{
std::lock_guard<std::mutex> guard(mStackLock);
sample::gLogError << "Error[" << static_cast<int32_t>(val) << "]: " << desc << std::endl;
mErrorStack.push_back(errorPair(val, desc));
}
catch (const std::exception& e)
{
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
}
// All errors are considered fatal.
return true;
}
// Atomically increment or decrement the ref counter.
IErrorRecorder::RefCount incRefCount() noexcept final
{
return ++mRefCount;
}
IErrorRecorder::RefCount decRefCount() noexcept final
{
return --mRefCount;
}
private:
// Simple helper functions.
const errorPair& operator[](size_t index) const noexcept
{
return mErrorStack[index];
}
bool invalidIndexCheck(int32_t index) const noexcept
{
// By converting signed to unsigned, we only need a single check since
// negative numbers turn into large positive greater than the size.
size_t sIndex = index;
return sIndex >= mErrorStack.size();
}
// Mutex to hold when locking mErrorStack.
std::mutex mStackLock;
// Reference count of the class. Destruction of the class when mRefCount
// is not zero causes undefined behavior.
std::atomic<int32_t> mRefCount{0};
// The error stack that holds the errors recorded by TensorRT.
errorStack mErrorStack;
}; // class SampleErrorRecorder
#endif // ERROR_RECORDER_H

@ -0,0 +1,164 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_ARGS_PARSER_H
#define TENSORRT_ARGS_PARSER_H
#ifdef _MSC_VER
#include "getOptWin.h"
#else
#include <getopt.h>
#endif
#include <iostream>
#include <string>
#include <vector>
namespace samplesCommon
{
//!
//! \brief The SampleParams structure groups the basic parameters required by
//! all sample networks.
//!
struct SampleParams
{
int32_t batchSize{1}; //!< Number of inputs in a batch
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
bool int8{false}; //!< Allow runnning the network in Int8 mode.
bool fp16{false}; //!< Allow running the network in FP16 mode.
std::vector<std::string> dataDirs; //!< Directory paths where sample data files are stored
std::vector<std::string> inputTensorNames;
std::vector<std::string> outputTensorNames;
};
//!
//! \brief The CaffeSampleParams structure groups the additional parameters required by
//! networks that use caffe
//!
struct CaffeSampleParams : public SampleParams
{
std::string prototxtFileName; //!< Filename of prototxt design file of a network
std::string weightsFileName; //!< Filename of trained weights file of a network
std::string meanFileName; //!< Filename of mean file of a network
};
//!
//! \brief The OnnxSampleParams structure groups the additional parameters required by
//! networks that use ONNX
//!
struct OnnxSampleParams : public SampleParams
{
std::string onnxFileName; //!< Filename of ONNX file of a network
};
//!
//! \brief The UffSampleParams structure groups the additional parameters required by
//! networks that use Uff
//!
struct UffSampleParams : public SampleParams
{
std::string uffFileName; //!< Filename of uff file of a network
};
//!
//! /brief Struct to maintain command-line arguments.
//!
struct Args
{
bool runInInt8{false};
bool runInFp16{false};
bool help{false};
int32_t useDLACore{-1};
int32_t batch{1};
std::vector<std::string> dataDirs;
std::string saveEngine;
std::string loadEngine;
bool useILoop{false};
};
//!
//! \brief Populates the Args struct with the provided command-line parameters.
//!
//! \throw invalid_argument if any of the arguments are not valid
//!
//! \return boolean If return value is true, execution can continue, otherwise program should exit
//!
inline bool parseArgs(Args& args, int32_t argc, char* argv[])
{
while (1)
{
int32_t arg;
static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'},
{"int8", no_argument, 0, 'i'}, {"fp16", no_argument, 0, 'f'}, {"useILoop", no_argument, 0, 'l'},
{"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'},
{"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, {nullptr, 0, nullptr, 0}};
int32_t option_index = 0;
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
if (arg == -1)
{
break;
}
switch (arg)
{
case 'h': args.help = true; return true;
case 'd':
if (optarg)
{
args.dataDirs.push_back(optarg);
}
else
{
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
return false;
}
break;
case 's':
if (optarg)
{
args.saveEngine = optarg;
}
break;
case 'o':
if (optarg)
{
args.loadEngine = optarg;
}
break;
case 'i': args.runInInt8 = true; break;
case 'f': args.runInFp16 = true; break;
case 'l': args.useILoop = true; break;
case 'u':
if (optarg)
{
args.useDLACore = std::stoi(optarg);
}
break;
case 'b':
if (optarg)
{
args.batch = std::stoi(optarg);
}
break;
default: return false;
}
}
return true;
}
} // namespace samplesCommon
#endif // TENSORRT_ARGS_PARSER_H

@ -0,0 +1,421 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_BUFFERS_H
#define TENSORRT_BUFFERS_H
#include "NvInfer.h"
#include "common.h"
#include "half.h"
#include <cassert>
#include <cuda_runtime_api.h>
#include <iostream>
#include <iterator>
#include <memory>
#include <new>
#include <numeric>
#include <string>
#include <vector>
namespace samplesCommon
{
//!
//! \brief The GenericBuffer class is a templated class for buffers.
//!
//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation,
//! deallocation, querying of buffers on both the device and the host.
//! It can handle data of arbitrary types because it stores byte buffers.
//! The template parameters AllocFunc and FreeFunc are used for the
//! allocation and deallocation of the buffer.
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
//! and returns bool. ptr is a pointer to where the allocated buffer address should be stored.
//! size is the amount of memory in bytes to allocate.
//! The boolean indicates whether or not the memory allocation was successful.
//! FreeFunc must be a functor that takes in (void* ptr) and returns void.
//! ptr is the allocated buffer address. It must work with nullptr input.
//!
template <typename AllocFunc, typename FreeFunc>
class GenericBuffer
{
public:
//!
//! \brief Construct an empty buffer.
//!
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
: mSize(0)
, mCapacity(0)
, mType(type)
, mBuffer(nullptr)
{
}
//!
//! \brief Construct a buffer with the specified allocation size in bytes.
//!
GenericBuffer(size_t size, nvinfer1::DataType type)
: mSize(size)
, mCapacity(size)
, mType(type)
{
if (!allocFn(&mBuffer, this->nbBytes()))
{
throw std::bad_alloc();
}
}
GenericBuffer(GenericBuffer&& buf)
: mSize(buf.mSize)
, mCapacity(buf.mCapacity)
, mType(buf.mType)
, mBuffer(buf.mBuffer)
{
buf.mSize = 0;
buf.mCapacity = 0;
buf.mType = nvinfer1::DataType::kFLOAT;
buf.mBuffer = nullptr;
}
GenericBuffer& operator=(GenericBuffer&& buf)
{
if (this != &buf)
{
freeFn(mBuffer);
mSize = buf.mSize;
mCapacity = buf.mCapacity;
mType = buf.mType;
mBuffer = buf.mBuffer;
// Reset buf.
buf.mSize = 0;
buf.mCapacity = 0;
buf.mBuffer = nullptr;
}
return *this;
}
//!
//! \brief Returns pointer to underlying array.
//!
void* data()
{
return mBuffer;
}
//!
//! \brief Returns pointer to underlying array.
//!
const void* data() const
{
return mBuffer;
}
//!
//! \brief Returns the size (in number of elements) of the buffer.
//!
size_t size() const
{
return mSize;
}
//!
//! \brief Returns the size (in bytes) of the buffer.
//!
size_t nbBytes() const
{
return this->size() * samplesCommon::getElementSize(mType);
}
//!
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity.
//!
void resize(size_t newSize)
{
mSize = newSize;
if (mCapacity < newSize)
{
freeFn(mBuffer);
if (!allocFn(&mBuffer, this->nbBytes()))
{
throw std::bad_alloc{};
}
mCapacity = newSize;
}
}
//!
//! \brief Overload of resize that accepts Dims
//!
void resize(const nvinfer1::Dims& dims)
{
return this->resize(samplesCommon::volume(dims));
}
~GenericBuffer()
{
freeFn(mBuffer);
}
private:
size_t mSize{0}, mCapacity{0};
nvinfer1::DataType mType;
void* mBuffer;
AllocFunc allocFn;
FreeFunc freeFn;
};
class DeviceAllocator
{
public:
bool operator()(void** ptr, size_t size) const
{
return cudaMalloc(ptr, size) == cudaSuccess;
}
};
class DeviceFree
{
public:
void operator()(void* ptr) const
{
cudaFree(ptr);
}
};
class HostAllocator
{
public:
bool operator()(void** ptr, size_t size) const
{
*ptr = malloc(size);
return *ptr != nullptr;
}
};
class HostFree
{
public:
void operator()(void* ptr) const
{
free(ptr);
}
};
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
//!
//! \brief The ManagedBuffer class groups together a pair of corresponding device and host buffers.
//!
class ManagedBuffer
{
public:
DeviceBuffer deviceBuffer;
HostBuffer hostBuffer;
};
//!
//! \brief The BufferManager class handles host and device buffer allocation and deallocation.
//!
//! \details This RAII class handles host and device buffer allocation and deallocation,
//! memcpy between host and device buffers to aid with inference,
//! and debugging dumps to validate inference. The BufferManager class is meant to be
//! used to simplify buffer management and any interactions between buffers and the engine.
//!
class BufferManager
{
public:
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
//!
//! \brief Create a BufferManager for handling buffer interactions with engine.
//!
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine, const int batchSize = 0,
const nvinfer1::IExecutionContext* context = nullptr)
: mEngine(engine)
, mBatchSize(batchSize)
{
// Full Dims implies no batch size.
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
// Create host and device buffers
for (int i = 0; i < mEngine->getNbBindings(); i++)
{
auto dims = context ? context->getBindingDimensions(i) : mEngine->getBindingDimensions(i);
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
nvinfer1::DataType type = mEngine->getBindingDataType(i);
int vecDim = mEngine->getBindingVectorizedDim(i);
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
{
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
vol *= scalarsPerVec;
}
vol *= samplesCommon::volume(dims);
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
manBuf->deviceBuffer = DeviceBuffer(vol, type);
manBuf->hostBuffer = HostBuffer(vol, type);
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
mManagedBuffers.emplace_back(std::move(manBuf));
}
}
//!
//! \brief Returns a vector of device buffers that you can use directly as
//! bindings for the execute and enqueue methods of IExecutionContext.
//!
std::vector<void*>& getDeviceBindings()
{
return mDeviceBindings;
}
//!
//! \brief Returns a vector of device buffers.
//!
const std::vector<void*>& getDeviceBindings() const
{
return mDeviceBindings;
}
//!
//! \brief Returns the device buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getDeviceBuffer(const std::string& tensorName) const
{
return getBuffer(false, tensorName);
}
//!
//! \brief Returns the host buffer corresponding to tensorName.
//! Returns nullptr if no such tensor can be found.
//!
void* getHostBuffer(const std::string& tensorName) const
{
return getBuffer(true, tensorName);
}
//!
//! \brief Returns the size of the host and device buffers that correspond to tensorName.
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
//!
size_t size(const std::string& tensorName) const
{
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return kINVALID_SIZE_VALUE;
return mManagedBuffers[index]->hostBuffer.nbBytes();
}
//!
//! \brief Templated print function that dumps buffers of arbitrary type to std::ostream.
//! rowCount parameter controls how many elements are on each line.
//! A rowCount of 1 means that there is only 1 element on each line.
//!
template <typename T>
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount)
{
assert(rowCount != 0);
assert(bufSize % sizeof(T) == 0);
T* typedBuf = static_cast<T*>(buf);
size_t numItems = bufSize / sizeof(T);
for (int i = 0; i < static_cast<int>(numItems); i++)
{
// Handle rowCount == 1 case
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
os << typedBuf[i] << std::endl;
else if (rowCount == 1)
os << typedBuf[i];
// Handle rowCount > 1 case
else if (i % rowCount == 0)
os << typedBuf[i];
else if (i % rowCount == rowCount - 1)
os << " " << typedBuf[i] << std::endl;
else
os << " " << typedBuf[i];
}
}
//!
//! \brief Copy the contents of input host buffers to input device buffers synchronously.
//!
void copyInputToDevice()
{
memcpyBuffers(true, false, false);
}
//!
//! \brief Copy the contents of output device buffers to output host buffers synchronously.
//!
void copyOutputToHost()
{
memcpyBuffers(false, true, false);
}
//!
//! \brief Copy the contents of input host buffers to input device buffers asynchronously.
//!
void copyInputToDeviceAsync(const cudaStream_t& stream = 0)
{
memcpyBuffers(true, false, true, stream);
}
//!
//! \brief Copy the contents of output device buffers to output host buffers asynchronously.
//!
void copyOutputToHostAsync(const cudaStream_t& stream = 0)
{
memcpyBuffers(false, true, true, stream);
}
~BufferManager() = default;
private:
void* getBuffer(const bool isHost, const std::string& tensorName) const
{
int index = mEngine->getBindingIndex(tensorName.c_str());
if (index == -1)
return nullptr;
return (isHost ? mManagedBuffers[index]->hostBuffer.data() : mManagedBuffers[index]->deviceBuffer.data());
}
void memcpyBuffers(const bool copyInput, const bool deviceToHost, const bool async, const cudaStream_t& stream = 0)
{
for (int i = 0; i < mEngine->getNbBindings(); i++)
{
void* dstPtr
= deviceToHost ? mManagedBuffers[i]->hostBuffer.data() : mManagedBuffers[i]->deviceBuffer.data();
const void* srcPtr
= deviceToHost ? mManagedBuffers[i]->deviceBuffer.data() : mManagedBuffers[i]->hostBuffer.data();
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
if ((copyInput && mEngine->bindingIsInput(i)) || (!copyInput && !mEngine->bindingIsInput(i)))
{
if (async)
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
else
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
}
}
}
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
std::vector<std::unique_ptr<ManagedBuffer>> mManagedBuffers; //!< The vector of pointers to managed buffers
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed for engine execution
};
} // namespace samplesCommon
#endif // TENSORRT_BUFFERS_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,124 @@
#!/usr/bin/python
#
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Script to dump TensorFlow weights in TRT v1 and v2 dump format.
# The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later.
import sys
import struct
import argparse
try:
import tensorflow as tf
from tensorflow.python import pywrap_tensorflow
except ImportError as err:
sys.stderr.write("""Error: Failed to import module ({})""".format(err))
sys.exit()
parser = argparse.ArgumentParser(description="TensorFlow Weight Dumper")
parser.add_argument(
"-m",
"--model",
required=True,
help="The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908",
)
parser.add_argument("-o", "--output", required=True, help="The weight file to dump all the weights to.")
parser.add_argument("-1", "--wtsv1", required=False, default=False, type=bool, help="Dump the weights in the wts v1.")
opt = parser.parse_args()
if opt.wtsv1:
print("Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:")
print("Line 0: <number of buffers in the file>")
print("Line 1-Num: [buffer name] [buffer type] [buffer size] <hex values>")
else:
print("Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:")
print("Line 0: <number of buffers in the file>")
print("Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] <buffer shaped size bytes of data>")
inputbase = opt.model
outputbase = opt.output
def float_to_hex(f):
return hex(struct.unpack("<I", struct.pack("<f", f))[0])
def getTRTType(tensor):
if tf.as_dtype(tensor.dtype) == tf.float32:
return 0
if tf.as_dtype(tensor.dtype) == tf.float16:
return 1
if tf.as_dtype(tensor.dtype) == tf.int8:
return 2
if tf.as_dtype(tensor.dtype) == tf.int32:
return 3
print("Tensor data type of %s is not supported in TensorRT" % (tensor.dtype))
sys.exit()
try:
# Open output file
if opt.wtsv1:
outputFileName = outputbase + ".wts"
else:
outputFileName = outputbase + ".wts2"
outputFile = open(outputFileName, "w")
# read vars from checkpoint
reader = pywrap_tensorflow.NewCheckpointReader(inputbase)
var_to_shape_map = reader.get_variable_to_shape_map()
# Record count of weights
count = 0
for key in sorted(var_to_shape_map):
count += 1
outputFile.write("%s\n" % (count))
# Dump the weights in either v1 or v2 format
for key in sorted(var_to_shape_map):
tensor = reader.get_tensor(key)
file_key = key.replace("/", "_")
typeOfElem = getTRTType(tensor)
val = tensor.shape
if opt.wtsv1:
val = tensor.size
print("%s %s %s " % (file_key, typeOfElem, val))
flat_tensor = tensor.flatten()
outputFile.write("%s 0 %s " % (file_key, val))
if opt.wtsv1:
for weight in flat_tensor:
hexval = float_to_hex(float(weight))
outputFile.write("%s " % (hexval[2:]))
else:
outputFile.write(flat_tensor.tobytes())
outputFile.write("\n")
outputFile.close()
except Exception as e: # pylint: disable=broad-except
print(str(e))
if "corrupted compressed block contents" in str(e):
print("It's likely that your checkpoint file has been compressed " "with SNAPPY.")
if "Data loss" in str(e) and (any([e in inputbase for e in [".index", ".meta", ".data"]])):
proposed_file = ".".join(inputbase.split(".")[0:-1])
v2_file_error_template = """
It's likely that this is a V2 checkpoint and you need to provide the filename
*prefix*. Try removing the '.' and extension. Try:
inspect checkpoint --file_name = {}"""
print(v2_file_error_template.format(proposed_file))

@ -0,0 +1,248 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "getOptions.h"
#include "logger.h"
#include <algorithm>
#include <cassert>
#include <cctype>
#include <cstring>
#include <set>
namespace nvinfer1
{
namespace utility
{
//! Matching for TRTOptions is defined as follows:
//!
//! If A and B both have longName set, A matches B if and only if A.longName ==
//! B.longName and (A.shortName == B.shortName if both have short name set).
//!
//! If A only has shortName set and B only has longName set, then A does not
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
//! the definition of a TRTOption in the input to getOptions. As such, if the
//! definition only has shortName set, it will never be equal to a TRTOption
//! that does not have shortName set (and same for longName).
//!
//! If A and B both have shortName set but B does not have longName set, A
//! matches B if and only if A.shortName == B.shortName.
//!
//! If A has neither long or short name set, A matches B if and only if B has
//! neither long or short name set.
bool matches(const TRTOption& a, const TRTOption& b)
{
if (!a.longName.empty() && !b.longName.empty())
{
if (a.shortName && b.shortName)
{
return (a.longName == b.longName) && (a.shortName == b.shortName);
}
return a.longName == b.longName;
}
// If only one of them is not set, this will return false anyway.
return a.shortName == b.shortName;
}
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
//! TRTOptions, -1 if not found.
int getTRTOptionIndex(const std::vector<TRTOption>& options, const TRTOption& opt)
{
for (size_t i = 0; i < options.size(); ++i)
{
if (matches(opt, options[i]))
{
return i;
}
}
return -1;
}
//! validateTRTOption will return a string containing an error message if options
//! contain non-numeric characters, or if there are duplicate option names found.
//! Otherwise, returns the empty string.
std::string validateTRTOption(
const std::set<char>& seenShortNames, const std::set<std::string>& seenLongNames, const TRTOption& opt)
{
if (opt.shortName != 0)
{
if (!std::isalnum(opt.shortName))
{
return "Short name '" + std::to_string(opt.shortName) + "' is non-alphanumeric";
}
if (seenShortNames.find(opt.shortName) != seenShortNames.end())
{
return "Short name '" + std::to_string(opt.shortName) + "' is a duplicate";
}
}
if (!opt.longName.empty())
{
for (const char& c : opt.longName)
{
if (!std::isalnum(c) && c != '-' && c != '_')
{
return "Long name '" + opt.longName + "' contains characters that are not '-', '_', or alphanumeric";
}
}
if (seenLongNames.find(opt.longName) != seenLongNames.end())
{
return "Long name '" + opt.longName + "' is a duplicate";
}
}
return "";
}
//! validateTRTOptions will return a string containing an error message if any
//! options contain non-numeric characters, or if there are duplicate option
//! names found. Otherwise, returns the empty string.
std::string validateTRTOptions(const std::vector<TRTOption>& options)
{
std::set<char> seenShortNames;
std::set<std::string> seenLongNames;
for (size_t i = 0; i < options.size(); ++i)
{
const std::string errMsg = validateTRTOption(seenShortNames, seenLongNames, options[i]);
if (!errMsg.empty())
{
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
}
seenShortNames.insert(options[i].shortName);
seenLongNames.insert(options[i].longName);
}
return "";
}
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
//! fields set accordingly. Assumes that options is validated.
//! ErrMsg will be set if:
//! - an argument is null
//! - an argument is empty
//! - an argument does not have option (i.e. "-" and "--")
//! - a short argument has more than 1 character
//! - the last argument in the list requires a value
TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector<TRTOption>& options)
{
TRTParsedArgs parsedArgs;
parsedArgs.values.resize(options.size());
for (int i = 1; i < argc; ++i) // index of current command-line argument
{
if (argv[i] == nullptr)
{
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
}
const std::string argStr(argv[i]);
if (argStr.empty())
{
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
}
// No starting hyphen means it is a positional argument
if (argStr[0] != '-')
{
parsedArgs.positionalArgs.push_back(argStr);
continue;
}
if (argStr == "-" || argStr == "--")
{
return TRTParsedArgs{"Argument does not specify an option at index " + std::to_string(i)};
}
// If only 1 hyphen, char after is the flag.
TRTOption opt{' ', "", false, ""};
std::string value;
if (argStr[1] != '-')
{
// Must only have 1 char after the hyphen
if (argStr.size() > 2)
{
return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)};
}
opt.shortName = argStr[1];
}
else
{
opt.longName = argStr.substr(2);
// We need to support --foo=bar syntax, so look for '='
const size_t eqIndex = opt.longName.find('=');
if (eqIndex < opt.longName.size())
{
value = opt.longName.substr(eqIndex + 1);
opt.longName = opt.longName.substr(0, eqIndex);
}
}
const int idx = getTRTOptionIndex(options, opt);
if (idx < 0)
{
continue;
}
if (options[idx].valueRequired)
{
if (!value.empty())
{
parsedArgs.values[idx].second.push_back(value);
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
continue;
}
if (i + 1 >= argc)
{
return TRTParsedArgs{"Last argument requires value, but none given"};
}
const std::string nextArg(argv[i + 1]);
if (nextArg.size() >= 1 && nextArg[0] == '-')
{
sample::gLogWarning << "Warning: Using '" << nextArg << "' as a value for '" << argStr
<< "', Should this be its own flag?" << std::endl;
}
parsedArgs.values[idx].second.push_back(nextArg);
i += 1; // Next argument already consumed
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
}
else
{
parsedArgs.values[idx].first += 1;
}
}
return parsedArgs;
}
TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options)
{
const std::string errMsg = validateTRTOptions(options);
if (!errMsg.empty())
{
return TRTParsedArgs{errMsg};
}
return parseArgs(argc, argv, options);
}
} // namespace utility
} // namespace nvinfer1

@ -0,0 +1,128 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_GET_OPTIONS_H
#define TRT_GET_OPTIONS_H
#include <string>
#include <utility>
#include <vector>
namespace nvinfer1
{
namespace utility
{
//! TRTOption defines a command line option. At least 1 of shortName and longName
//! must be defined.
//! If bool initialization is undefined behavior on your system, valueRequired
//! must also be explicitly defined.
//! helpText is optional.
struct TRTOption
{
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
std::string longName; //!< Option name in long (double hyphen) form (i.e. --foo, --bar)
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, --foo bar)
std::string helpText; //!< Text to show when printing out the command usage
};
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
//! argument list (argv).
//!
//! errMsg is a string containing an error message if any errors occurred. If it
//! is empty, no errors occurred.
//!
//! values stores a vector of pairs for each option (ordered by order in the
//! input). Each pair contains an int (the number of occurrences) and a vector
//! of strings (a list of values). The user should know which of these to use,
//! and which options required values. For non-value options, only occurrences is
//! populated. For value-required options, occurrences == # of values. Values do
//! not need to be unique.
//!
//! positionalArgs stores additional arguments that are passed in without an
//! option (these must not start with a hyphen).
struct TRTParsedArgs
{
std::string errMsg;
std::vector<std::pair<int, std::vector<std::string>>> values;
std::vector<std::string> positionalArgs;
};
//! Parse the input arguments passed to main() and extract options as well as
//! positional arguments.
//!
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
//!
//! If there is a single preceding hyphen, there should be exactly 1 character
//! after the hyphen, which is interpreted as the option.
//!
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
//! is interpreted as the option.
//!
//! If the option requires a value, the next argument is used as the value.
//!
//! Positional arguments must not start with a hyphen.
//!
//! If an argument requires a value, the next argument is interpreted as the
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
//! "--bar" as a value for option "foo" if "foo" requires a value).
//! We also support --name=value syntax. In this case, 'value' would be used as
//! the value, NOT the next argument.
//!
//! For options:
//! { { 'a', "", false },
//! { 'b', "", false },
//! { 0, "cee", false },
//! { 'd', "", true },
//! { 'e', "", true },
//! { 'f', "foo", true } }
//!
//! ./main hello world -a -a --cee -d 12 -f 34
//! and
//! ./main hello world -a -a --cee -d 12 --foo 34
//!
//! will result in:
//!
//! TRTParsedArgs {
//! errMsg: "",
//! values: { { 2, {} },
//! { 0, {} },
//! { 1, {} },
//! { 1, {"12"} },
//! { 0, {} },
//! { 1, {"34"} } }
//! positionalArgs: {"hello", "world"},
//! }
//!
//! Non-POSIX behavior:
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
//! option must have its own hyphen prefix.
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
//! whitespace-separated from the option it is for.
//!
//! @param[in] argc The number of arguments passed to main (including the
//! file name, which is disregarded)
//! @param[in] argv The arguments passed to main (including the file name,
//! which is disregarded)
//! @param[in] options List of TRTOptions to parse
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
//! the fields.
TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options);
} // namespace utility
} // namespace nvinfer1
#endif // TRT_GET_OPTIONS_H

@ -0,0 +1,568 @@
/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
/*
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*
* Sponsored in part by the Defense Advanced Research Projects
* Agency (DARPA) and Air Force Research Laboratory, Air Force
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
*/
/*-
* Copyright (c) 2000 The NetBSD Foundation, Inc.
* All rights reserved.
*
* This code is derived from software contributed to The NetBSD Foundation
* by Dieter Baron and Thomas Klausner.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
#include "getoptWin.h"
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <windows.h>
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
#ifdef REPLACE_GETOPT
int opterr = 1; /* if error message should be printed */
int optind = 1; /* index into parent argv vector */
int optopt = '?'; /* character checked for validity */
#undef optreset /* see getopt.h */
#define optreset __mingw_optreset
int optreset; /* reset getopt */
char* optarg; /* argument associated with option */
#endif
#define PRINT_ERROR ((opterr) && (*options != ':'))
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
/* return values */
#define BADCH (int) '?'
#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
#define INORDER (int) 1
#ifndef __CYGWIN__
#define __progname __argv[0]
#else
extern char __declspec(dllimport) * __progname;
#endif
#ifdef __CYGWIN__
static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char* const*, char const*, const struct option*, int*, int);
static int parse_long_options(char* const*, char const*, const struct option*, int*, int);
static int gcd(int, int);
static void permute_args(int, int, int, char* const*);
static char* place = EMSG; /* option letter processing */
/* XXX: set optreset to 1 rather than these two */
static int nonopt_start = -1; /* first non option argument (for permute) */
static int nonopt_end = -1; /* first option after non options (for permute) */
/* Error messages */
static char const recargchar[] = "option requires an argument -- %c";
static char const recargstring[] = "option requires an argument -- %s";
static char const ambig[] = "ambiguous option -- %.*s";
static char const noarg[] = "option doesn't take an argument -- %.*s";
static char const illoptchar[] = "unknown option -- %c";
static char const illoptstring[] = "unknown option -- %s";
static void _vwarnx(char const* fmt, va_list ap)
{
(void) fprintf(stderr, "%s: ", __progname);
if (fmt != NULL)
(void) vfprintf(stderr, fmt, ap);
(void) fprintf(stderr, "\n");
}
static void warnx(char const* fmt, ...)
{
va_list ap;
va_start(ap, fmt);
_vwarnx(fmt, ap);
va_end(ap);
}
/*
* Compute the greatest common divisor of a and b.
*/
static int gcd(int a, int b)
{
int c;
c = a % b;
while (c != 0)
{
a = b;
b = c;
c = a % b;
}
return (b);
}
/*
* Exchange the block from nonopt_start to nonopt_end with the block
* from nonopt_end to opt_end (keeping the same order of arguments
* in each block).
*/
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
{
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
char* swap;
/*
* compute lengths of blocks and number and size of cycles
*/
nnonopts = panonopt_end - panonopt_start;
nopts = opt_end - panonopt_end;
ncycle = gcd(nnonopts, nopts);
cyclelen = (opt_end - panonopt_start) / ncycle;
for (i = 0; i < ncycle; i++)
{
cstart = panonopt_end + i;
pos = cstart;
for (j = 0; j < cyclelen; j++)
{
if (pos >= panonopt_end)
pos -= nnonopts;
else
pos += nopts;
swap = nargv[pos];
/* LINTED const cast */
((char**) nargv)[pos] = nargv[cstart];
/* LINTED const cast */
((char**) nargv)[cstart] = swap;
}
}
}
/*
* parse_long_options --
* Parse long options in argc/argv argument vector.
* Returns -1 if short_too is set and the option does not match long_options.
*/
static int parse_long_options(
char* const* nargv, char const* options, const struct option* long_options, int* idx, int short_too)
{
char *current_argv, *has_equal;
size_t current_argv_len;
int i, ambiguous, match;
#define IDENTICAL_INTERPRETATION(_x, _y) \
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \
&& long_options[(_x)].val == long_options[(_y)].val)
current_argv = place;
match = -1;
ambiguous = 0;
optind++;
if ((has_equal = strchr(current_argv, '=')) != NULL)
{
/* argument found (--option=arg) */
current_argv_len = has_equal - current_argv;
has_equal++;
}
else
current_argv_len = strlen(current_argv);
for (i = 0; long_options[i].name; i++)
{
/* find matching long option */
if (strncmp(current_argv, long_options[i].name, current_argv_len))
continue;
if (strlen(long_options[i].name) == current_argv_len)
{
/* exact match */
match = i;
ambiguous = 0;
break;
}
/*
* If this is a known short option, don't allow
* a partial match of a single character.
*/
if (short_too && current_argv_len == 1)
continue;
if (match == -1) /* partial match */
match = i;
else if (!IDENTICAL_INTERPRETATION(i, match))
ambiguous = 1;
}
if (ambiguous)
{
/* ambiguous abbreviation */
if (PRINT_ERROR)
warnx(ambig, (int) current_argv_len, current_argv);
optopt = 0;
return (BADCH);
}
if (match != -1)
{ /* option found */
if (long_options[match].has_arg == no_argument && has_equal)
{
if (PRINT_ERROR)
warnx(noarg, (int) current_argv_len, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
return (BADARG);
}
if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
{
if (has_equal)
optarg = has_equal;
else if (long_options[match].has_arg == required_argument)
{
/*
* optional argument doesn't use next nargv
*/
optarg = nargv[optind++];
}
}
if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
{
/*
* Missing argument; leading ':' indicates no error
* should be generated.
*/
if (PRINT_ERROR)
warnx(recargstring, current_argv);
/*
* XXX: GNU sets optopt to val regardless of flag
*/
if (long_options[match].flag == NULL)
optopt = long_options[match].val;
else
optopt = 0;
--optind;
return (BADARG);
}
}
else
{ /* unknown option */
if (short_too)
{
--optind;
return (-1);
}
if (PRINT_ERROR)
warnx(illoptstring, current_argv);
optopt = 0;
return (BADCH);
}
if (idx)
*idx = match;
if (long_options[match].flag)
{
*long_options[match].flag = long_options[match].val;
return (0);
}
else
return (long_options[match].val);
#undef IDENTICAL_INTERPRETATION
}
/*
* getopt_internal --
* Parse argc/argv argument vector. Called by user level routines.
*/
static int getopt_internal(
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx, int flags)
{
char const* oli; /* option letter list index */
int optchar, short_too;
static int posixly_correct = -1;
if (options == NULL)
return (-1);
/*
* XXX Some GNU programs (like cvs) set optind to 0 instead of
* XXX using optreset. Work around this braindamage.
*/
if (optind == 0)
optind = optreset = 1;
/*
* Disable GNU extensions if POSIXLY_CORRECT is set or options
* string begins with a '+'.
*
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
* optreset != 0 for GNU compatibility.
*/
if (posixly_correct == -1 || optreset != 0)
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
if (*options == '-')
flags |= FLAG_ALLARGS;
else if (posixly_correct || *options == '+')
flags &= ~FLAG_PERMUTE;
if (*options == '+' || *options == '-')
options++;
optarg = NULL;
if (optreset)
nonopt_start = nonopt_end = -1;
start:
if (optreset || !*place)
{ /* update scanning pointer */
optreset = 0;
if (optind >= nargc)
{ /* end of argument vector */
place = EMSG;
if (nonopt_end != -1)
{
/* do permutation, if we have to */
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
else if (nonopt_start != -1)
{
/*
* If we skipped non-options, set optind
* to the first of them.
*/
optind = nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
{
place = EMSG; /* found non-option */
if (flags & FLAG_ALLARGS)
{
/*
* GNU extension:
* return non-option as argument to option 1
*/
optarg = nargv[optind++];
return (INORDER);
}
if (!(flags & FLAG_PERMUTE))
{
/*
* If no permutation wanted, stop parsing
* at first non-option.
*/
return (-1);
}
/* do permutation */
if (nonopt_start == -1)
nonopt_start = optind;
else if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
nonopt_start = optind - (nonopt_end - nonopt_start);
nonopt_end = -1;
}
optind++;
/* process next argument */
goto start;
}
if (nonopt_start != -1 && nonopt_end == -1)
nonopt_end = optind;
/*
* If we have "-" do nothing, if "--" we are done.
*/
if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
{
optind++;
place = EMSG;
/*
* We found an option (--), so if we skipped
* non-options, we have to permute.
*/
if (nonopt_end != -1)
{
permute_args(nonopt_start, nonopt_end, optind, nargv);
optind -= nonopt_end - nonopt_start;
}
nonopt_start = nonopt_end = -1;
return (-1);
}
}
/*
* Check long options if:
* 1) we were passed some
* 2) the arg is not just "-"
* 3) either the arg starts with -- we are getopt_long_only()
*/
if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
{
short_too = 0;
if (*place == '-')
place++; /* --foo long option */
else if (*place != ':' && strchr(options, *place) != NULL)
short_too = 1; /* could be short option too */
optchar = parse_long_options(nargv, options, long_options, idx, short_too);
if (optchar != -1)
{
place = EMSG;
return (optchar);
}
}
if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
|| (oli = strchr(options, optchar)) == NULL)
{
/*
* If the user specified "-" and '-' isn't listed in
* options, return -1 (non-option) as per POSIX.
* Otherwise, it is an unknown option character (or ':').
*/
if (optchar == (int) '-' && *place == '\0')
return (-1);
if (!*place)
++optind;
if (PRINT_ERROR)
warnx(illoptchar, optchar);
optopt = optchar;
return (BADCH);
}
if (long_options != NULL && optchar == 'W' && oli[1] == ';')
{
/* -W long-option */
if (*place) /* no space */
/* NOTHING */;
else if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else /* white space */
place = nargv[optind];
optchar = parse_long_options(nargv, options, long_options, idx, 0);
place = EMSG;
return (optchar);
}
if (*++oli != ':')
{ /* doesn't take argument */
if (!*place)
++optind;
}
else
{ /* takes (optional) argument */
optarg = NULL;
if (*place) /* no white space */
optarg = place;
else if (oli[1] != ':')
{ /* arg not optional */
if (++optind >= nargc)
{ /* no arg */
place = EMSG;
if (PRINT_ERROR)
warnx(recargchar, optchar);
optopt = optchar;
return (BADARG);
}
else
optarg = nargv[optind];
}
place = EMSG;
++optind;
}
/* dump back option letter */
return (optchar);
}
#ifdef REPLACE_GETOPT
/*
* getopt --
* Parse argc/argv argument vector.
*
* [eventually this will replace the BSD getopt]
*/
int getopt(int nargc, char* const* nargv, char const* options)
{
/*
* We don't pass FLAG_PERMUTE to getopt_internal() since
* the BSD getopt(3) (unlike GNU) has never done this.
*
* Furthermore, since many privileged programs call getopt()
* before dropping privileges it makes sense to keep things
* as simple (and bug-free) as possible.
*/
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
}
#endif /* REPLACE_GETOPT */
/*
* getopt_long --
* Parse argc/argv argument vector.
*/
int getopt_long(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
}
/*
* getopt_long_only --
* Parse argc/argv argument vector.
*/
int getopt_long_only(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
{
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
}

@ -0,0 +1,124 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef __GETOPT_H__
/**
* DISCLAIMER
* This file has no copyright assigned and is placed in the Public Domain.
* This file is a part of the w64 mingw-runtime package.
*
* The w64 mingw-runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/
#define __GETOPT_H__
/* All the headers include this file. */
#include <crtdefs.h>
#if defined(WINGETOPT_SHARED_LIB)
#if defined(BUILDING_WINGETOPT_DLL)
#define WINGETOPT_API __declspec(dllexport)
#else
#define WINGETOPT_API __declspec(dllimport)
#endif
#else
#define WINGETOPT_API
#endif
#ifdef __cplusplus
extern "C"
{
#endif
WINGETOPT_API extern int optind; /* index of first non-option in argv */
WINGETOPT_API extern int optopt; /* single option character, as parsed */
WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
/* (user may set to zero, to suppress) */
WINGETOPT_API extern char* optarg; /* pointer to argument of current option */
extern int getopt(int nargc, char* const* nargv, char const* options);
#ifdef _BSD_SOURCE
/*
* BSD adds the non-standard `optreset' feature, for reinitialisation
* of `getopt' parsing. We support this feature, for applications which
* proclaim their BSD heritage, before including this header; however,
* to maintain portability, developers are advised to avoid it.
*/
#define optreset __mingw_optreset
extern int optreset;
#endif
#ifdef __cplusplus
}
#endif
/*
* POSIX requires the `getopt' API to be specified in `unistd.h';
* thus, `unistd.h' includes this header. However, we do not want
* to expose the `getopt_long' or `getopt_long_only' APIs, when
* included in this manner. Thus, close the standard __GETOPT_H__
* declarations block, and open an additional __GETOPT_LONG_H__
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
* to declare the extended API.
*/
#endif /* !defined(__GETOPT_H__) */
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
#define __GETOPT_LONG_H__
#ifdef __cplusplus
extern "C"
{
#endif
struct option /* specification for a long form option... */
{
char const* name; /* option name, without leading hyphens */
int has_arg; /* does it take an argument? */
int* flag; /* where to save its status, or NULL */
int val; /* its associated status value */
};
enum /* permitted values for its `has_arg' field... */
{
no_argument = 0, /* option never takes an argument */
required_argument, /* option always requires an argument */
optional_argument /* option may take an argument */
};
extern int getopt_long(
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
extern int getopt_long_only(
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
/*
* Previous MinGW implementation had...
*/
#ifndef HAVE_DECL_GETOPT
/*
* ...for the long form API only; keep this for compatibility.
*/
#define HAVE_DECL_GETOPT 1
#endif
#ifdef __cplusplus
}
#endif
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */

File diff suppressed because it is too large Load Diff

@ -0,0 +1,41 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "logger.h"
#include "ErrorRecorder.h"
#include "logging.h"
using namespace nvinfer1;
SampleErrorRecorder gRecorder;
namespace sample
{
Logger gLogger{ Logger::Severity::kINFO };
LogStreamConsumer gLogVerbose{ LOG_VERBOSE(gLogger) };
LogStreamConsumer gLogInfo{ LOG_INFO(gLogger) };
LogStreamConsumer gLogWarning{ LOG_WARN(gLogger) };
LogStreamConsumer gLogError{ LOG_ERROR(gLogger) };
LogStreamConsumer gLogFatal{ LOG_FATAL(gLogger) };
void setReportableSeverity(Logger::Severity severity)
{
gLogger.setReportableSeverity(severity);
gLogVerbose.setReportableSeverity(severity);
gLogInfo.setReportableSeverity(severity);
gLogWarning.setReportableSeverity(severity);
gLogError.setReportableSeverity(severity);
gLogFatal.setReportableSeverity(severity);
}
} // namespace sample

@ -0,0 +1,37 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef LOGGER_H
#define LOGGER_H
#include "logging.h"
class SampleErrorRecorder;
extern SampleErrorRecorder gRecorder;
namespace sample
{
extern Logger gLogger;
extern LogStreamConsumer gLogVerbose;
extern LogStreamConsumer gLogInfo;
extern LogStreamConsumer gLogWarning;
extern LogStreamConsumer gLogError;
extern LogStreamConsumer gLogFatal;
void setReportableSeverity(Logger::Severity severity);
} // namespace sample
#endif // LOGGER_H

@ -0,0 +1,579 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include "sampleOptions.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <ostream>
#include <sstream>
#include <string>
namespace sample
{
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf
{
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mOutput(stream)
, mPrefix(prefix)
, mShouldLog(shouldLog)
{
}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
: mOutput(other.mOutput)
, mPrefix(other.mPrefix)
, mShouldLog(other.mShouldLog)
{
}
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
LogStreamConsumerBuffer() = delete;
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
~LogStreamConsumerBuffer() override
{
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
// if the pointer to the beginning is not equal to the pointer to the current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr())
{
putOutput();
}
}
//!
//! synchronizes the stream buffer and returns 0 on success
//! synchronizing the stream buffer consists of inserting the buffer contents into the stream,
//! resetting the buffer and flushing the stream
//!
int32_t sync() override
{
putOutput();
return 0;
}
void putOutput()
{
if (mShouldLog)
{
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
mOutput << "[";
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into the stream
mOutput << mPrefix << str();
}
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
void setShouldLog(bool shouldLog)
{
mShouldLog = shouldLog;
}
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog{};
}; // class LogStreamConsumerBuffer
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase
{
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mBuffer(stream, prefix, shouldLog)
{
}
protected:
std::mutex mLogMutex;
LogStreamConsumerBuffer mBuffer;
}; // class LogStreamConsumerBase
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
{
public:
//!
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
//! Reportable severity determines if the messages are severe enough to be logged.
//!
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity)
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(severity <= reportableSeverity)
, mSeverity(severity)
{
}
LogStreamConsumer(LogStreamConsumer&& other) noexcept
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(other.mShouldLog)
, mSeverity(other.mSeverity)
{
}
LogStreamConsumer(const LogStreamConsumer& other) = delete;
LogStreamConsumer() = delete;
~LogStreamConsumer() override = default;
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
void setReportableSeverity(Severity reportableSeverity)
{
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
std::mutex& getMutex()
{
return mLogMutex;
}
bool getShouldLog() const
{
return mShouldLog;
}
private:
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
bool mShouldLog;
Severity mSeverity;
}; // class LogStreamConsumer
template <typename T>
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj)
{
if (logger.getShouldLog())
{
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << obj;
}
return logger;
}
//!
//! Special handling std::endl
//!
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, std::ostream& (*f)(std::ostream&) )
{
if (logger.getShouldLog())
{
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
os << f;
}
return logger;
}
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, const nvinfer1::Dims& dims)
{
if (logger.getShouldLog())
{
std::lock_guard<std::mutex> guard(logger.getMutex());
auto& os = static_cast<std::ostream&>(logger);
for (int32_t i = 0; i < dims.nbDims; ++i)
{
os << (i ? "x" : "") << dims.d[i];
}
}
return logger;
}
//!
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.
//!
class Logger : public nvinfer1::ILogger
{
public:
explicit Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity)
{
}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult
{
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger() noexcept
{
return *this;
}
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) noexcept override
{
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of this level or higher.
//!
void setReportableSeverity(Severity severity) noexcept
{
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom
{
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started)
, mName(name)
, mCmdline(cmdline)
{
}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting with
//! "TensorRT" and containing dot-separated strings containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
{
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv)
{
// Append TensorRT version as info
const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
auto cmdline = genCmdlineString(argc, argv);
return defineTest(vname, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom)
{
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(TestAtom const& testAtom, TestResult result)
{
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int32_t reportPass(TestAtom const& testAtom)
{
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int32_t reportFail(TestAtom const& testAtom)
{
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int32_t reportWaive(TestAtom const& testAtom)
{
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int32_t reportTest(TestAtom const& testAtom, bool pass)
{
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const
{
return mReportableSeverity;
}
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the given severity
//!
static const char* severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message with the given result
//!
static const char* testResultString(TestResult result)
{
switch (result)
{
case TestResult::kRUNNING: return "RUNNING";
case TestResult::kPASSED: return "PASSED";
case TestResult::kFAILED: return "FAILED";
case TestResult::kWAIVED: return "WAIVED";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
//!
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(TestAtom const& testAtom, TestResult result)
{
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int32_t argc, char const* const* argv)
{
std::stringstream ss;
for (int32_t i = 0; i < argc; i++)
{
if (i > 0)
{
ss << " ";
}
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
}; // class Logger
namespace
{
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
//! ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
} // namespace sample
#endif // TENSORRT_LOGGING_H

@ -0,0 +1,152 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef PARSER_ONNX_CONFIG_H
#define PARSER_ONNX_CONFIG_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
#include "NvOnnxParser.h"
#define ONNX_DEBUG 1
/**
* \class ParserOnnxConfig
* \brief Configuration Manager Class Concrete Implementation
*
* \note:
*
*/
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig
{
protected:
std::string mModelFilename{};
std::string mTextFilename{};
std::string mFullTextFilename{};
nvinfer1::DataType mModelDtype;
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
bool mPrintLayercInfo;
public:
ParserOnnxConfig()
: mModelDtype(nvinfer1::DataType::kFLOAT)
, mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))
, mPrintLayercInfo(false)
{
#ifdef ONNX_DEBUG
if (isDebug())
{
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~ParserOnnxConfig() override
{
#ifdef ONNX_DEBUG
if (isDebug())
{
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
void setModelDtype(const nvinfer1::DataType modelDtype) noexcept override
{
mModelDtype = modelDtype;
}
nvinfer1::DataType getModelDtype() const noexcept override
{
return mModelDtype;
}
const char* getModelFileName() const noexcept override
{
return mModelFilename.c_str();
}
void setModelFileName(const char* onnxFilename) noexcept override
{
mModelFilename = std::string(onnxFilename);
}
nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept override
{
return mVerbosity;
}
void addVerbosity() noexcept override
{
++mVerbosity;
}
void reduceVerbosity() noexcept override
{
--mVerbosity;
}
void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept override
{
mVerbosity = verbosity;
}
const char* getTextFileName() const noexcept override
{
return mTextFilename.c_str();
}
void setTextFileName(const char* textFilename) noexcept override
{
mTextFilename = std::string(textFilename);
}
const char* getFullTextFileName() const noexcept override
{
return mFullTextFilename.c_str();
}
void setFullTextFileName(const char* fullTextFilename) noexcept override
{
mFullTextFilename = std::string(fullTextFilename);
}
bool getPrintLayerInfo() const noexcept override
{
return mPrintLayercInfo;
}
void setPrintLayerInfo(bool src) noexcept override
{
mPrintLayercInfo = src;
} //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
virtual bool isDebug() const noexcept
{
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
void destroy() noexcept override
{
delete this;
}
}; // class ParserOnnxConfig
#endif

@ -0,0 +1,224 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_SAFE_COMMON_H
#define TENSORRT_SAFE_COMMON_H
#include "cuda_runtime.h"
#include "NvInferRuntimeCommon.h"
#include <cstdlib>
#include <iostream>
#include <memory>
#include <numeric>
#include <stdexcept>
#include <string>
// For safeLoadLibrary
#ifdef _MSC_VER
// Needed so that the max/min definitions in windows.h do not conflict with std::max/min.
#define NOMINMAX
#include <windows.h>
#undef NOMINMAX
#else
#include <dlfcn.h>
#endif
#undef CHECK
#define CHECK(status) \
do \
{ \
auto ret = (status); \
if (ret != 0) \
{ \
std::cerr << "Cuda failure: " << ret << std::endl; \
abort(); \
} \
} while (0)
#undef SAFE_ASSERT
#define SAFE_ASSERT(condition) \
do \
{ \
if (!(condition)) \
{ \
std::cerr << "Assertion failure: " << #condition << std::endl; \
abort(); \
} \
} while (0)
namespace samplesCommon
{
template <typename T>
inline std::shared_ptr<T> infer_object(T* obj)
{
if (!obj)
{
throw std::runtime_error("Failed to create object");
}
return std::shared_ptr<T>(obj);
}
inline uint32_t elementSize(nvinfer1::DataType t)
{
switch (t)
{
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT: return 4;
case nvinfer1::DataType::kHALF: return 2;
case nvinfer1::DataType::kINT8: return 1;
case nvinfer1::DataType::kUINT8: return 1;
case nvinfer1::DataType::kBOOL: return 1;
case nvinfer1::DataType::kFP8: return 1;
}
return 0;
}
template <typename A, typename B>
inline A divUp(A x, B n)
{
return (x + n - 1) / n;
}
inline int64_t volume(nvinfer1::Dims const& d)
{
return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies<int64_t>{});
}
// Return m rounded up to nearest multiple of n
template <typename T>
inline T roundUp(T m, T n)
{
return ((m + n - 1) / n) * n;
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
inline int64_t volume(nvinfer1::Dims dims, int32_t vecDim, int32_t comps, int32_t batch)
{
if (vecDim >= 0)
{
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
}
return samplesCommon::volume(dims) * std::max(batch, 1);
}
//!
//! \class TrtCudaGraphSafe
//! \brief Managed CUDA graph
//!
class TrtCudaGraphSafe
{
public:
explicit TrtCudaGraphSafe() = default;
TrtCudaGraphSafe(const TrtCudaGraphSafe&) = delete;
TrtCudaGraphSafe& operator=(const TrtCudaGraphSafe&) = delete;
TrtCudaGraphSafe(TrtCudaGraphSafe&&) = delete;
TrtCudaGraphSafe& operator=(TrtCudaGraphSafe&&) = delete;
~TrtCudaGraphSafe()
{
if (mGraphExec)
{
cudaGraphExecDestroy(mGraphExec);
}
}
void beginCapture(cudaStream_t& stream)
{
// cudaStreamCaptureModeGlobal is the only allowed mode in SAFE CUDA
CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal));
}
bool launch(cudaStream_t& stream)
{
return cudaGraphLaunch(mGraphExec, stream) == cudaSuccess;
}
void endCapture(cudaStream_t& stream)
{
CHECK(cudaStreamEndCapture(stream, &mGraph));
CHECK(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
CHECK(cudaGraphDestroy(mGraph));
}
void endCaptureOnError(cudaStream_t& stream)
{
// There are two possibilities why stream capture would fail:
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
// (2) TRT reports a failure.
// In case (1), the returning mGraph should be nullptr.
// In case (2), the returning mGraph is not nullptr, but it should not be used.
const auto ret = cudaStreamEndCapture(stream, &mGraph);
if (ret == cudaErrorStreamCaptureInvalidated)
{
SAFE_ASSERT(mGraph == nullptr);
}
else
{
SAFE_ASSERT(ret == cudaSuccess);
SAFE_ASSERT(mGraph != nullptr);
CHECK(cudaGraphDestroy(mGraph));
mGraph = nullptr;
}
// Clean up any CUDA error.
cudaGetLastError();
sample::gLogError << "The CUDA graph capture on the stream has failed." << std::endl;
}
private:
cudaGraph_t mGraph{};
cudaGraphExec_t mGraphExec{};
};
inline void safeLoadLibrary(const std::string& path)
{
#ifdef _MSC_VER
void* handle = LoadLibrary(path.c_str());
#else
int32_t flags{RTLD_LAZY};
void* handle = dlopen(path.c_str(), flags);
#endif
if (handle == nullptr)
{
#ifdef _MSC_VER
sample::gLogError << "Could not load plugin library: " << path << std::endl;
#else
sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl;
#endif
}
}
inline std::vector<std::string> safeSplitString(std::string str, char delimiter = ',')
{
std::vector<std::string> splitVect;
std::stringstream ss(str);
std::string substr;
while (ss.good())
{
getline(ss, substr, delimiter);
splitVect.emplace_back(std::move(substr));
}
return splitVect;
}
} // namespace samplesCommon
#endif // TENSORRT_SAFE_COMMON_H

@ -0,0 +1,338 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef SampleConfig_H
#define SampleConfig_H
#include <cstring>
#include <iostream>
#include <string>
#include "NvInfer.h"
#include "NvOnnxConfig.h"
class SampleConfig : public nvonnxparser::IOnnxConfig
{
public:
enum class InputDataFormat : int
{
kASCII = 0,
kPPM = 1
};
private:
std::string mModelFilename;
std::string mEngineFilename;
std::string mTextFilename;
std::string mFullTextFilename;
std::string mImageFilename;
std::string mReferenceFilename;
std::string mOutputFilename;
std::string mCalibrationFilename;
std::string mTimingCacheFilename;
int64_t mLabel{-1};
int64_t mMaxBatchSize{32};
int64_t mCalibBatchSize{0};
int64_t mMaxNCalibBatch{0};
int64_t mFirstCalibBatch{0};
int64_t mUseDLACore{-1};
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
bool mTF32{true};
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
bool mPrintLayercInfo{false};
bool mDebugBuilder{false};
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
uint64_t mTopK{0};
float mFailurePercentage{-1.0f};
float mTolerance{0.0f};
float mAbsTolerance{1e-5f};
public:
SampleConfig()
{
#ifdef ONNX_DEBUG
if (isDebug())
{
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
}
#endif
}
protected:
~SampleConfig() override
{
#ifdef ONNX_DEBUG
if (isDebug())
{
std::cout << "SampleConfig::dtor(): " << this << std::endl;
}
#endif
}
public:
void setModelDtype(const nvinfer1::DataType mdt) noexcept override
{
mModelDtype = mdt;
}
nvinfer1::DataType getModelDtype() const noexcept override
{
return mModelDtype;
}
bool getTF32() const noexcept
{
return mTF32;
}
void setTF32(bool enabled) noexcept
{
mTF32 = enabled;
}
const char* getModelFileName() const noexcept override
{
return mModelFilename.c_str();
}
void setModelFileName(const char* onnxFilename) noexcept override
{
mModelFilename = std::string(onnxFilename);
}
Verbosity getVerbosityLevel() const noexcept override
{
return mVerbosity;
}
void addVerbosity() noexcept override
{
++mVerbosity;
}
void reduceVerbosity() noexcept override
{
--mVerbosity;
}
void setVerbosityLevel(Verbosity v) noexcept override
{
mVerbosity = v;
}
const char* getEngineFileName() const noexcept
{
return mEngineFilename.c_str();
}
void setEngineFileName(const char* engineFilename) noexcept
{
mEngineFilename = std::string(engineFilename);
}
const char* getTextFileName() const noexcept override
{
return mTextFilename.c_str();
}
void setTextFileName(const char* textFilename) noexcept override
{
mTextFilename = std::string(textFilename);
}
const char* getFullTextFileName() const noexcept override
{
return mFullTextFilename.c_str();
}
void setFullTextFileName(const char* fullTextFilename) noexcept override
{
mFullTextFilename = std::string(fullTextFilename);
}
void setLabel(int64_t label) noexcept
{
mLabel = label;
} //!< set the Label
int64_t getLabel() const noexcept
{
return mLabel;
} //!< get the Label
bool getPrintLayerInfo() const noexcept override
{
return mPrintLayercInfo;
}
void setPrintLayerInfo(bool b) noexcept override
{
mPrintLayercInfo = b;
} //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
void setMaxBatchSize(int64_t maxBatchSize) noexcept
{
mMaxBatchSize = maxBatchSize;
} //!< set the Max Batch Size
int64_t getMaxBatchSize() const noexcept
{
return mMaxBatchSize;
} //!< get the Max Batch Size
void setCalibBatchSize(int64_t CalibBatchSize) noexcept
{
mCalibBatchSize = CalibBatchSize;
} //!< set the calibration batch size
int64_t getCalibBatchSize() const noexcept
{
return mCalibBatchSize;
} //!< get calibration batch size
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept
{
mMaxNCalibBatch = MaxNCalibBatch;
} //!< set Max Number of Calibration Batches
int64_t getMaxNCalibBatch() const noexcept
{
return mMaxNCalibBatch;
} //!< get the Max Number of Calibration Batches
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept
{
mFirstCalibBatch = FirstCalibBatch;
} //!< set the first calibration batch
int64_t getFirstCalibBatch() const noexcept
{
return mFirstCalibBatch;
} //!< get the first calibration batch
void setUseDLACore(int64_t UseDLACore) noexcept
{
mUseDLACore = UseDLACore;
} //!< set the DLA core to use
int64_t getUseDLACore() const noexcept
{
return mUseDLACore;
} //!< get the DLA core to use
void setDebugBuilder() noexcept
{
mDebugBuilder = true;
} //!< enable the Debug info, while building the engine.
bool getDebugBuilder() const noexcept
{
return mDebugBuilder;
} //!< get the boolean variable, corresponding to the debug builder
const char* getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
{
return mImageFilename.c_str();
}
void setImageFileName(const char* imageFilename) noexcept //!< get the Image file name
{
mImageFilename = std::string(imageFilename);
}
const char* getReferenceFileName() const noexcept
{
return mReferenceFilename.c_str();
}
void setReferenceFileName(const char* referenceFilename) noexcept //!< set reference file name
{
mReferenceFilename = std::string(referenceFilename);
}
void setInputDataFormat(InputDataFormat idt) noexcept
{
mInputDataFormat = idt;
} //!< specifies expected data format of the image file (PPM or ASCII)
InputDataFormat getInputDataFormat() const noexcept
{
return mInputDataFormat;
} //!< returns the expected data format of the image file.
const char* getOutputFileName() const noexcept //!< specifies the file to save the results
{
return mOutputFilename.c_str();
}
void setOutputFileName(const char* outputFilename) noexcept //!< get the output file name
{
mOutputFilename = std::string(outputFilename);
}
const char* getCalibrationFileName() const noexcept
{
return mCalibrationFilename.c_str();
} //!< specifies the file containing the list of image files for int8 calibration
void setCalibrationFileName(const char* calibrationFilename) noexcept //!< get the int 8 calibration list file name
{
mCalibrationFilename = std::string(calibrationFilename);
}
uint64_t getTopK() const noexcept
{
return mTopK;
}
void setTopK(uint64_t topK) noexcept
{
mTopK = topK;
} //!< If this options is specified, return the K top probabilities.
float getFailurePercentage() const noexcept
{
return mFailurePercentage;
}
void setFailurePercentage(float f) noexcept
{
mFailurePercentage = f;
}
float getAbsoluteTolerance() const noexcept
{
return mAbsTolerance;
}
void setAbsoluteTolerance(float a) noexcept
{
mAbsTolerance = a;
}
float getTolerance() const noexcept
{
return mTolerance;
}
void setTolerance(float t) noexcept
{
mTolerance = t;
}
const char* getTimingCacheFilename() const noexcept
{
return mTimingCacheFilename.c_str();
}
void setTimingCacheFileName(const char* timingCacheFilename) noexcept
{
mTimingCacheFilename = std::string(timingCacheFilename);
}
bool isDebug() const noexcept
{
#if ONNX_DEBUG
return (std::getenv("ONNX_DEBUG") ? true : false);
#else
return false;
#endif
}
void destroy() noexcept override
{
delete this;
}
}; // class SampleConfig
#endif

@ -0,0 +1,554 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_DEVICE_H
#define TRT_SAMPLE_DEVICE_H
#include <cassert>
#include <cuda.h>
#include <cuda_runtime.h>
#include <iostream>
#include <thread>
#include "sampleUtils.h"
namespace sample
{
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr)
{
if (ret != cudaSuccess)
{
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
abort();
}
}
class TrtCudaEvent;
namespace
{
void cudaSleep(void* sleep)
{
std::this_thread::sleep_for(std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
}
} // namespace
//!
//! \class TrtCudaStream
//! \brief Managed CUDA stream
//!
class TrtCudaStream
{
public:
TrtCudaStream()
{
cudaCheck(cudaStreamCreate(&mStream));
}
TrtCudaStream(const TrtCudaStream&) = delete;
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
TrtCudaStream(TrtCudaStream&&) = delete;
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
~TrtCudaStream()
{
cudaCheck(cudaStreamDestroy(mStream));
}
cudaStream_t get() const
{
return mStream;
}
void synchronize()
{
cudaCheck(cudaStreamSynchronize(mStream));
}
void wait(TrtCudaEvent& event);
void sleep(float* ms)
{
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
}
private:
cudaStream_t mStream{};
};
//!
//! \class TrtCudaEvent
//! \brief Managed CUDA event
//!
class TrtCudaEvent
{
public:
explicit TrtCudaEvent(bool blocking = true)
{
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
}
TrtCudaEvent(const TrtCudaEvent&) = delete;
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
TrtCudaEvent(TrtCudaEvent&&) = delete;
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
~TrtCudaEvent()
{
cudaCheck(cudaEventDestroy(mEvent));
}
cudaEvent_t get() const
{
return mEvent;
}
void record(const TrtCudaStream& stream)
{
cudaCheck(cudaEventRecord(mEvent, stream.get()));
}
void synchronize()
{
cudaCheck(cudaEventSynchronize(mEvent));
}
// Returns time elapsed time in milliseconds
float operator-(const TrtCudaEvent& e) const
{
float time{0};
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
return time;
}
private:
cudaEvent_t mEvent{};
};
inline void TrtCudaStream::wait(TrtCudaEvent& event)
{
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
}
//!
//! \class TrtCudaGraph
//! \brief Managed CUDA graph
//!
class TrtCudaGraph
{
public:
explicit TrtCudaGraph() = default;
TrtCudaGraph(const TrtCudaGraph&) = delete;
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
TrtCudaGraph(TrtCudaGraph&&) = delete;
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
~TrtCudaGraph()
{
if (mGraphExec)
{
cudaGraphExecDestroy(mGraphExec);
}
}
void beginCapture(TrtCudaStream& stream)
{
cudaCheck(cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
}
bool launch(TrtCudaStream& stream)
{
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
}
void endCapture(TrtCudaStream& stream)
{
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
cudaCheck(cudaGraphDestroy(mGraph));
}
void endCaptureOnError(TrtCudaStream& stream)
{
// There are two possibilities why stream capture would fail:
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
// (2) TRT reports a failure.
// In case (1), the returning mGraph should be nullptr.
// In case (2), the returning mGraph is not nullptr, but it should not be used.
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
if (ret == cudaErrorStreamCaptureInvalidated)
{
assert(mGraph == nullptr);
}
else
{
assert(ret == cudaSuccess);
assert(mGraph != nullptr);
cudaCheck(cudaGraphDestroy(mGraph));
mGraph = nullptr;
}
// Clean up any CUDA error.
cudaGetLastError();
sample::gLogWarning << "The CUDA graph capture on the stream has failed." << std::endl;
}
private:
cudaGraph_t mGraph{};
cudaGraphExec_t mGraphExec{};
};
//!
//! \class TrtCudaBuffer
//! \brief Managed buffer for host and device
//!
template <typename A, typename D>
class TrtCudaBuffer
{
public:
TrtCudaBuffer() = default;
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
TrtCudaBuffer(TrtCudaBuffer&& rhs)
{
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs)
{
if (this != &rhs)
{
reset(rhs.mPtr);
rhs.mPtr = nullptr;
}
return *this;
}
~TrtCudaBuffer()
{
reset();
}
TrtCudaBuffer(size_t size)
{
A()(&mPtr, size);
}
void allocate(size_t size)
{
reset();
A()(&mPtr, size);
}
void reset(void* ptr = nullptr)
{
if (mPtr)
{
D()(mPtr);
}
mPtr = ptr;
}
void* get() const
{
return mPtr;
}
private:
void* mPtr{nullptr};
};
struct DeviceAllocator
{
void operator()(void** ptr, size_t size)
{
cudaCheck(cudaMalloc(ptr, size));
}
};
struct DeviceDeallocator
{
void operator()(void* ptr)
{
cudaCheck(cudaFree(ptr));
}
};
struct ManagedAllocator
{
void operator()(void** ptr, size_t size)
{
cudaCheck(cudaMallocManaged(ptr, size));
}
};
struct HostAllocator
{
void operator()(void** ptr, size_t size)
{
cudaCheck(cudaMallocHost(ptr, size));
}
};
struct HostDeallocator
{
void operator()(void* ptr)
{
cudaCheck(cudaFreeHost(ptr));
}
};
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
//!
//! \class MirroredBuffer
//! \brief Coupled host and device buffers
//!
class IMirroredBuffer
{
public:
//!
//! Allocate memory for the mirrored buffer give the size
//! of the allocation.
//!
virtual void allocate(size_t size) = 0;
//!
//! Get the pointer to the device side buffer.
//!
//! \return pointer to device memory or nullptr if uninitialized.
//!
virtual void* getDeviceBuffer() const = 0;
//!
//! Get the pointer to the host side buffer.
//!
//! \return pointer to host memory or nullptr if uninitialized.
//!
virtual void* getHostBuffer() const = 0;
//!
//! Copy the memory from host to device.
//!
virtual void hostToDevice(TrtCudaStream& stream) = 0;
//!
//! Copy the memory from device to host.
//!
virtual void deviceToHost(TrtCudaStream& stream) = 0;
//!
//! Interface to get the size of the memory
//!
//! \return the size of memory allocated.
//!
virtual size_t getSize() const = 0;
//!
//! Virtual destructor declaraion
//!
virtual ~IMirroredBuffer() = default;
}; // class IMirroredBuffer
//!
//! Class to have a separate memory buffer for discrete device and host allocations.
//!
class DiscreteMirroredBuffer : public IMirroredBuffer
{
public:
void allocate(size_t size) override
{
mSize = size;
mHostBuffer.allocate(size);
mDeviceBuffer.allocate(size);
}
void* getDeviceBuffer() const override
{
return mDeviceBuffer.get();
}
void* getHostBuffer() const override
{
return mHostBuffer.get();
}
void hostToDevice(TrtCudaStream& stream) override
{
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get()));
}
void deviceToHost(TrtCudaStream& stream) override
{
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get()));
}
size_t getSize() const override
{
return mSize;
}
private:
size_t mSize{0};
TrtHostBuffer mHostBuffer;
TrtDeviceBuffer mDeviceBuffer;
}; // class DiscreteMirroredBuffer
//!
//! Class to have a unified memory buffer for embedded devices.
//!
class UnifiedMirroredBuffer : public IMirroredBuffer
{
public:
void allocate(size_t size) override
{
mSize = size;
mBuffer.allocate(size);
}
void* getDeviceBuffer() const override
{
return mBuffer.get();
}
void* getHostBuffer() const override
{
return mBuffer.get();
}
void hostToDevice(TrtCudaStream& stream) override
{
// Does nothing since we are using unified memory.
}
void deviceToHost(TrtCudaStream& stream) override
{
// Does nothing since we are using unified memory.
}
size_t getSize() const override
{
return mSize;
}
private:
size_t mSize{0};
TrtManagedBuffer mBuffer;
}; // class UnifiedMirroredBuffer
//!
//! Class to allocate memory for outputs with data-dependent shapes. The sizes of those are unknown so pre-allocation is
//! not possible.
//!
class OutputAllocator : public nvinfer1::IOutputAllocator
{
public:
OutputAllocator(IMirroredBuffer* buffer)
: mBuffer(buffer)
{
}
void* reallocateOutput(
char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override
{
// Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
// even for empty tensors, so allocate a dummy byte.
size = std::max(size, static_cast<uint64_t>(1));
if (size > mSize)
{
mBuffer->allocate(roundUp(size, alignment));
mSize = size;
}
return mBuffer->getDeviceBuffer();
}
void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override {}
IMirroredBuffer* getBuffer()
{
return mBuffer.get();
}
~OutputAllocator() override {}
private:
std::unique_ptr<IMirroredBuffer> mBuffer;
uint64_t mSize{};
};
inline void setCudaDevice(int device, std::ostream& os)
{
cudaCheck(cudaSetDevice(device));
cudaDeviceProp properties;
cudaCheck(cudaGetDeviceProperties(&properties, device));
// clang-format off
os << "=== Device Information ===" << std::endl;
os << "Selected Device: " << properties.name << std::endl;
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
os << "SMs: " << properties.multiProcessorCount << std::endl;
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
os << std::endl;
os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is "
<< "currently running at." << std::endl;
// clang-format on
}
inline int32_t getCudaDriverVersion()
{
int32_t version{-1};
cudaCheck(cudaDriverGetVersion(&version));
return version;
}
inline int32_t getCudaRuntimeVersion()
{
int32_t version{-1};
cudaCheck(cudaRuntimeGetVersion(&version));
return version;
}
} // namespace sample
#endif // TRT_SAMPLE_DEVICE_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,314 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_ENGINES_H
#define TRT_SAMPLE_ENGINES_H
#include <iostream>
#include <vector>
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvInferConsistency.h"
#include "NvInferSafeRuntime.h"
#include "NvOnnxParser.h"
#include "NvUffParser.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample
{
struct Parser
{
std::unique_ptr<nvcaffeparser1::ICaffeParser> caffeParser;
std::unique_ptr<nvuffparser::IUffParser> uffParser;
std::unique_ptr<nvonnxparser::IParser> onnxParser;
operator bool() const
{
return caffeParser || uffParser || onnxParser;
}
};
//!
//! \brief A helper class to hold a serialized engine (std or safe) and only deserialize it when being accessed.
//!
class LazilyDeserializedEngine
{
public:
//!
//! \brief Delete default constructor to make sure isSafe and DLACore are always set.
//!
LazilyDeserializedEngine() = delete;
//!
//! \brief Constructor of LazilyDeserializedEngine.
//!
LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath)
: mIsSafe(isSafe)
, mVersionCompatible(versionCompatible)
, mDLACore(DLACore)
, mTempdir(tempdir)
, mTempfileControls(tempfileControls)
, mLeanDLLPath(leanDLLPath)
{
}
//!
//! \brief Move from another LazilyDeserializedEngine.
//!
LazilyDeserializedEngine(LazilyDeserializedEngine&& other)
{
mIsSafe = other.mIsSafe;
mVersionCompatible = other.mVersionCompatible;
mDLACore = other.mDLACore;
mEngineBlob = std::move(other.mEngineBlob);
mEngine = std::move(other.mEngine);
mSafeEngine = std::move(other.mSafeEngine);
mTempdir = std::move(other.mTempdir);
mTempfileControls = other.mTempfileControls;
mLeanDLLPath = std::move(other.mLeanDLLPath);
mDynamicPlugins = std::move(other.mDynamicPlugins);
}
//!
//! \brief Delete copy constructor.
//!
LazilyDeserializedEngine(LazilyDeserializedEngine const& other) = delete;
//!
//! \brief Get the pointer to the ICudaEngine. Triggers deserialization if not already done so.
//!
nvinfer1::ICudaEngine* get();
//!
//! \brief Get the pointer to the ICudaEngine and release the ownership.
//!
nvinfer1::ICudaEngine* release();
//!
//! \brief Get the pointer to the safe::ICudaEngine. Triggers deserialization if not already done so.
//!
nvinfer1::safe::ICudaEngine* getSafe();
//!
//! \brief Get the underlying blob storing serialized engine.
//!
std::vector<uint8_t> const& getBlob() const
{
return mEngineBlob;
}
//!
//! \brief Set the underlying blob storing serialized engine.
//!
void setBlob(void* data, size_t size)
{
mEngineBlob.resize(size);
std::memcpy(mEngineBlob.data(), data, size);
mEngine.reset();
mSafeEngine.reset();
}
//!
//! \brief Release the underlying blob without deleting the deserialized engine.
//!
void releaseBlob()
{
mEngineBlob.clear();
}
//!
//! \brief Get if safe mode is enabled.
//!
bool isSafe()
{
return mIsSafe;
}
void setDynamicPlugins(std::vector<std::string> const& dynamicPlugins)
{
mDynamicPlugins = dynamicPlugins;
}
private:
bool mIsSafe{false};
bool mVersionCompatible{false};
int32_t mDLACore{-1};
std::vector<uint8_t> mEngineBlob;
std::string mTempdir{};
nvinfer1::TempfileControlFlags mTempfileControls{getTempfileControlDefaults()};
std::string mLeanDLLPath{};
std::vector<std::string> mDynamicPlugins;
//! \name Owned TensorRT objects
//! Per TensorRT object lifetime requirements as outlined in the developer guide,
//! the runtime must remain live while any engines created by the runtime are live.
//! DO NOT ADJUST the declaration order here: runtime -> (engine|safeEngine).
//! Destruction occurs in reverse declaration order: (engine|safeEngine) -> runtime.
//!@{
//! The runtime used to track parent of mRuntime if one exists.
//! Needed to load mRuntime if lean.so is supplied through file system path.
std::unique_ptr<nvinfer1::IRuntime> mParentRuntime{};
//! The runtime that is used to deserialize the engine.
std::unique_ptr<nvinfer1::IRuntime> mRuntime{};
//! If mIsSafe is false, this points to the deserialized std engine
std::unique_ptr<nvinfer1::ICudaEngine> mEngine{};
//! If mIsSafe is true, this points to the deserialized safe engine
std::unique_ptr<nvinfer1::safe::ICudaEngine> mSafeEngine{};
//!@}
};
struct BuildEnvironment
{
BuildEnvironment() = delete;
BuildEnvironment(BuildEnvironment const& other) = delete;
BuildEnvironment(BuildEnvironment&& other) = delete;
BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "")
: engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath)
{
}
//! \name Owned TensorRT objects
//! Per TensorRT object lifetime requirements as outlined in the developer guide,
//! factory objects must remain live while the objects created by those factories
//! are live (with the exception of builder -> engine).
//! DO NOT ADJUST the declaration order here: builder -> network -> parser.
//! Destruction occurs in reverse declaration order: parser -> network -> builder.
//!@{
//! The builder used to build the engine.
std::unique_ptr<nvinfer1::IBuilder> builder;
//! The network used by the builder.
std::unique_ptr<nvinfer1::INetworkDefinition> network;
//! The parser used to specify the network.
Parser parser;
//! The engine.
LazilyDeserializedEngine engine;
//!@}
};
//!
//! \brief Set up network and config
//!
//! \return boolean Return true if network and config were successfully set
//!
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder,
nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err,
std::vector<std::vector<char>>& sparseWeights);
//!
//! \brief Log refittable layers and weights of a refittable engine
//!
void dumpRefittable(nvinfer1::ICudaEngine& engine);
//!
//! \brief Load a serialized engine
//!
//! \return Pointer to the engine loaded or nullptr if the operation failed
//!
nvinfer1::ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err);
//!
//! \brief Save an engine into a file
//!
//! \return boolean Return true if the engine was successfully saved
//!
bool saveEngine(nvinfer1::ICudaEngine const& engine, std::string const& fileName, std::ostream& err);
//!
//! \brief Create an engine from model or serialized file, and optionally save engine
//!
//! \return Pointer to the engine created or nullptr if the creation failed
//!
bool getEngineBuildEnv(
ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err);
//!
//! \brief Create a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys,
nvinfer1::IBuilder& builder, nvinfer1::INetworkDefinition& network, std::ostream& err);
//!
//! \brief Tranfer model to a serialized network
//!
//! \return Pointer to a host memory for a serialized network
//!
nvinfer1::IHostMemory* modelToSerialized(
const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
//!
//! \brief Serialize network and save it into a file
//!
//! \return boolean Return true if the network was successfully serialized and saved
//!
bool serializeAndSave(
const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading);
//!
//! \brief Set tensor scales from a calibration table
//!
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector<IOFormat> const& inputFormats,
std::vector<IOFormat> const& outputFormats, std::string const& calibrationFile);
//!
//! \brief Check if safe runtime is loaded.
//!
bool hasSafeRuntime();
//!
//! \brief Create a safe runtime object if the dynamic library is loaded.
//!
nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
//!
//! \brief Check if consistency checker is loaded.
//!
bool hasConsistencyChecker();
//!
//! \brief Create a consistency checker object if the dynamic library is loaded.
//!
nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker(
nvinfer1::ILogger& logger, nvinfer1::IHostMemory const* engine) noexcept;
//!
//! \brief Run consistency check on serialized engine.
//!
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
bool loadEngineToBuildEnv(std::string const& engine, bool enableConsistency, BuildEnvironment& env, std::ostream& err);
} // namespace sample
#endif // TRT_SAMPLE_ENGINES_H

@ -0,0 +1,141 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_ENTRYPOINTS_H
#define TRT_SAMPLE_ENTRYPOINTS_H
//! \file sampleEntrypoints.h
//!
//! Declares and conditionally defines entrypoints needed to create base TensorRT objects, depending
//! on whether the given sample uses TRT at link time or dynamically. Since common code is built once
//! and shared across all samples (both link-time and dynamic TRT), it does not define these entrypoints,
//! so each sample must define them individually.
//!
//! Samples that use TRT at link time can define DEFINE_TRT_ENTRYPOINTS before including this header to
//! pick up the definitions here.
#include "NvCaffeParser.h"
#include "NvInfer.h"
#include "NvOnnxParser.h"
#include "NvUffParser.h"
#include "logger.h"
extern nvinfer1::IBuilder* createBuilder();
extern nvinfer1::IRuntime* createRuntime();
extern nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine);
extern nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network);
extern nvcaffeparser1::ICaffeParser* sampleCreateCaffeParser();
extern void shutdownCaffeParser();
extern nvuffparser::IUffParser* sampleCreateUffParser();
extern void shutdownUffParser();
#if !defined(DEFINE_TRT_ENTRYPOINTS)
#define DEFINE_TRT_ENTRYPOINTS 0
#endif
// Allow opting out of individual entrypoints that are unused by the sample
#if !defined(DEFINE_TRT_BUILDER_ENTRYPOINT)
#define DEFINE_TRT_BUILDER_ENTRYPOINT 1
#endif
#if !defined(DEFINE_TRT_RUNTIME_ENTRYPOINT)
#define DEFINE_TRT_RUNTIME_ENTRYPOINT 1
#endif
#if !defined(DEFINE_TRT_REFITTER_ENTRYPOINT)
#define DEFINE_TRT_REFITTER_ENTRYPOINT 1
#endif
#if !defined(DEFINE_TRT_ONNX_PARSER_ENTRYPOINT)
#define DEFINE_TRT_ONNX_PARSER_ENTRYPOINT 1
#endif
#if !defined(DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT)
#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 1
#endif
#if DEFINE_TRT_ENTRYPOINTS
nvinfer1::IBuilder* createBuilder()
{
#if DEFINE_TRT_BUILDER_ENTRYPOINT
return nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger());
#else
return {};
#endif
}
nvinfer1::IRuntime* createRuntime()
{
#if DEFINE_TRT_RUNTIME_ENTRYPOINT
return nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger());
#else
return {};
#endif
}
nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine)
{
#if DEFINE_TRT_REFITTER_ENTRYPOINT
return nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger());
#else
return {};
#endif
}
nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network)
{
#if DEFINE_TRT_ONNX_PARSER_ENTRYPOINT
return nvonnxparser::createParser(network, sample::gLogger.getTRTLogger());
#else
return {};
#endif
}
nvcaffeparser1::ICaffeParser* sampleCreateCaffeParser()
{
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
return nvcaffeparser1::createCaffeParser();
#else
return {};
#endif
}
void shutdownCaffeParser()
{
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
nvcaffeparser1::shutdownProtobufLibrary();
#endif
}
nvuffparser::IUffParser* sampleCreateUffParser()
{
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
return nvuffparser::createUffParser();
#else
return {};
#endif
}
void shutdownUffParser()
{
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
nvuffparser::shutdownProtobufLibrary();
#endif
}
#endif // DEFINE_TRT_ENTRYPOINTS
#endif // TRT_SAMPLE_ENTRYPOINTS_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,264 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_INFERENCE_H
#define TRT_SAMPLE_INFERENCE_H
#include "sampleEngines.h"
#include "sampleReporting.h"
#include "sampleUtils.h"
#include <functional>
#include <iostream>
#include <list>
#include <memory>
#include <string>
#include <vector>
#include "NvInfer.h"
#include "NvInferSafeRuntime.h"
namespace sample
{
struct InferenceEnvironment
{
InferenceEnvironment() = delete;
InferenceEnvironment(InferenceEnvironment const& other) = delete;
InferenceEnvironment(InferenceEnvironment&& other) = delete;
InferenceEnvironment(BuildEnvironment& bEnv) : engine(std::move(bEnv.engine)), safe(bEnv.engine.isSafe())
{
}
LazilyDeserializedEngine engine;
std::unique_ptr<Profiler> profiler;
std::vector<std::unique_ptr<nvinfer1::IExecutionContext>> contexts;
std::vector<std::unique_ptr<Bindings>> bindings;
bool error{false};
bool safe{false};
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContexts;
template <class ContextType>
inline ContextType* getContext(int32_t streamIdx);
//! Storage for input shape tensors.
//!
//! It's important that the addresses of the data do not change between the calls to
//! setTensorAddress/setInputShape (which tells TensorRT where the input shape tensor is)
//! and enqueueV2/enqueueV3 (when TensorRT might use the input shape tensor).
//!
//! The input shape tensors could alternatively be handled via member bindings,
//! but it simplifies control-flow to store the data here since it's shared across
//! the bindings.
std::list<std::vector<int32_t>> inputShapeTensorValues;
};
template <>
inline nvinfer1::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx)
{
return contexts[streamIdx].get();
}
template <>
inline nvinfer1::safe::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx)
{
return safeContexts[streamIdx].get();
}
//!
//! \brief Set up contexts and bindings for inference
//!
bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system);
//!
//! \brief Deserialize the engine and time how long it takes.
//!
bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys);
//!
//! \brief Run inference and collect timing, return false if any error hit during inference
//!
bool runInference(
InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector<InferenceTrace>& trace);
//!
//! \brief Get layer information of the engine.
//!
std::string getLayerInformation(
nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format);
struct Binding
{
bool isInput{false};
std::unique_ptr<IMirroredBuffer> buffer;
std::unique_ptr<OutputAllocator> outputAllocator;
int64_t volume{0};
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
void fill(std::string const& fileName);
void fill();
void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv,
std::string const separator = " ") const;
};
struct TensorInfo
{
int32_t bindingIndex{-1};
char const* name{nullptr};
nvinfer1::Dims dims{};
bool isDynamic{};
int32_t comps{-1};
nvinfer1::Dims strides{};
int32_t vectorDimIndex{-1};
bool isInput{};
nvinfer1::DataType dataType{};
int64_t vol{-1};
void updateVolume(int32_t batch)
{
vol = volume(dims, strides, vectorDimIndex, comps, batch);
}
};
class Bindings
{
public:
Bindings() = delete;
explicit Bindings(bool useManaged)
: mUseManaged(useManaged)
{
}
void addBinding(TensorInfo const& tensorInfo, std::string const& fileName = "");
void** getDeviceBuffers();
void transferInputToDevice(TrtCudaStream& stream);
void transferOutputToHost(TrtCudaStream& stream);
void fill(int binding, std::string const& fileName)
{
mBindings[binding].fill(fileName);
}
void fill(int binding)
{
mBindings[binding].fill();
}
template <typename ContextType>
void dumpBindingDimensions(int32_t binding, ContextType const& context, std::ostream& os) const;
template <typename ContextType>
void dumpBindingValues(ContextType const& context, int32_t binding, std::ostream& os,
std::string const& separator = " ", int32_t batch = 1) const;
template <typename ContextType>
void dumpRawBindingToFiles(ContextType const& context, std::ostream& os) const;
template <typename ContextType>
void dumpInputs(ContextType const& context, std::ostream& os) const
{
auto isInput = [](Binding const& b) { return b.isInput; };
dumpBindings(context, isInput, os);
}
template <typename ContextType>
void dumpOutputs(ContextType const& context, std::ostream& os) const
{
auto isOutput = [](Binding const& b) { return !b.isInput; };
dumpBindings(context, isOutput, os);
}
template <typename ContextType>
void dumpBindings(ContextType const& context, std::ostream& os) const
{
auto all = [](Binding const& b) { return true; };
dumpBindings(context, all, os);
}
template <typename ContextType>
void dumpBindings(
ContextType const& context, std::function<bool(Binding const&)> predicate, std::ostream& os) const
{
for (auto const& n : mNames)
{
auto const binding = n.second;
if (predicate(mBindings[binding]))
{
os << n.first << ": (";
dumpBindingDimensions(binding, context, os);
os << ")" << std::endl;
dumpBindingValues(context, binding, os);
os << std::endl;
}
}
}
std::unordered_map<std::string, int> getInputBindings() const
{
auto isInput = [](Binding const& b) { return b.isInput; };
return getBindings(isInput);
}
std::unordered_map<std::string, int> getOutputBindings() const
{
auto isOutput = [](Binding const& b) { return !b.isInput; };
return getBindings(isOutput);
}
std::unordered_map<std::string, int> getBindings() const
{
auto all = [](Binding const& b) { return true; };
return getBindings(all);
}
std::unordered_map<std::string, int> getBindings(std::function<bool(Binding const&)> predicate) const;
bool setTensorAddresses(nvinfer1::IExecutionContext& context) const;
bool setSafeTensorAddresses(nvinfer1::safe::IExecutionContext& context) const;
private:
std::unordered_map<std::string, int32_t> mNames;
std::vector<Binding> mBindings;
std::vector<void*> mDevicePointers;
bool mUseManaged{false};
};
struct TaskInferenceEnvironment
{
TaskInferenceEnvironment(std::string engineFile, InferenceOptions inference, int32_t deviceId = 0,
int32_t DLACore = -1, int32_t bs = batchNotProvided);
InferenceOptions iOptions{};
int32_t device{defaultDevice};
int32_t batch{batchNotProvided};
std::unique_ptr<InferenceEnvironment> iEnv;
std::vector<InferenceTrace> trace;
};
bool runMultiTasksInference(std::vector<std::unique_ptr<TaskInferenceEnvironment>>& tEnvList);
} // namespace sample
#endif // TRT_SAMPLE_INFERENCE_H

File diff suppressed because it is too large Load Diff

@ -0,0 +1,456 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_OPTIONS_H
#define TRT_SAMPLE_OPTIONS_H
#include <algorithm>
#include <array>
#include <iostream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "NvInfer.h"
namespace sample
{
// Build default params
constexpr int32_t maxBatchNotProvided{0};
constexpr int32_t defaultMinTiming{1};
constexpr int32_t defaultAvgTiming{8};
constexpr int32_t defaultMaxAuxStreams{-1};
constexpr int32_t defaultBuilderOptimizationLevel{3};
// System default params
constexpr int32_t defaultDevice{0};
// Inference default params
constexpr int32_t defaultBatch{1};
constexpr int32_t batchNotProvided{0};
constexpr int32_t defaultStreams{1};
constexpr int32_t defaultIterations{10};
constexpr float defaultWarmUp{200.F};
constexpr float defaultDuration{3.F};
constexpr float defaultSleep{};
constexpr float defaultIdle{};
constexpr float defaultPersistentCacheRatio{0};
// Reporting default params
constexpr int32_t defaultAvgRuns{10};
constexpr std::array<float, 3> defaultPercentiles{90, 95, 99};
enum class PrecisionConstraints
{
kNONE,
kOBEY,
kPREFER
};
enum class ModelFormat
{
kANY,
kCAFFE,
kONNX,
kUFF
};
enum class SparsityFlag
{
kDISABLE,
kENABLE,
kFORCE
};
enum class TimingCacheMode
{
kDISABLE,
kLOCAL,
kGLOBAL
};
//!
//! \enum RuntimeMode
//!
//! \brief Used to dictate which TensorRT runtime library to dynamically load.
//!
enum class RuntimeMode
{
//! Maps to libnvinfer.so or nvinfer.dll
kFULL,
//! Maps to libnvinfer_dispatch.so or nvinfer_dispatch.dll
kDISPATCH,
//! Maps to libnvinfer_lean.so or nvinfer_lean.dll
kLEAN,
};
inline std::ostream& operator<<(std::ostream& os, RuntimeMode const mode)
{
switch (mode)
{
case RuntimeMode::kFULL:
{
os << "full";
break;
}
case RuntimeMode::kDISPATCH:
{
os << "dispatch";
break;
}
case RuntimeMode::kLEAN:
{
os << "lean";
break;
}
}
return os;
}
using Arguments = std::unordered_multimap<std::string, std::string>;
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
using ShapeRange = std::array<std::vector<int32_t>, nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
using LayerOutputTypes = std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
using LayerDeviceTypes = std::unordered_map<std::string, nvinfer1::DeviceType>;
class Options
{
public:
virtual ~Options() = default;
virtual void parse(Arguments& arguments) = 0;
};
class BaseModelOptions : public Options
{
public:
ModelFormat format{ModelFormat::kANY};
std::string model;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class UffInput : public Options
{
public:
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
bool NHWC{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class ModelOptions : public Options
{
public:
BaseModelOptions baseModel;
std::string prototxt;
std::vector<std::string> outputs;
UffInput uffInputs;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults()
{
using F = nvinfer1::TempfileControlFlag;
return (1U << static_cast<uint32_t>(F::kALLOW_TEMPORARY_FILES))
| (1U << static_cast<uint32_t>(F::kALLOW_IN_MEMORY_FILES));
}
class BuildOptions : public Options
{
public:
int32_t maxBatch{maxBatchNotProvided};
double workspace{-1.0};
double dlaSRAM{-1.0};
double dlaLocalDRAM{-1.0};
double dlaGlobalDRAM{-1.0};
int32_t minTiming{defaultMinTiming};
int32_t avgTiming{defaultAvgTiming};
bool tf32{true};
bool fp16{false};
bool int8{false};
bool fp8{false};
bool directIO{false};
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
LayerPrecisions layerPrecisions;
LayerOutputTypes layerOutputTypes;
LayerDeviceTypes layerDeviceTypes;
bool safe{false};
bool consistency{false};
bool restricted{false};
bool skipInference{false};
bool save{false};
bool load{false};
bool refittable{false};
bool heuristic{false};
bool versionCompatible{false};
bool excludeLeanRuntime{false};
int32_t builderOptimizationLevel{defaultBuilderOptimizationLevel};
SparsityFlag sparsity{SparsityFlag::kDISABLE};
nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
std::string engine;
std::string calibration;
using ShapeProfile = std::unordered_map<std::string, ShapeRange>;
ShapeProfile shapes;
ShapeProfile shapesCalib;
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
nvinfer1::TacticSources enabledTactics{0};
nvinfer1::TacticSources disabledTactics{0};
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
std::string timingCacheFile{};
// C++11 does not automatically generate hash function for enum class.
// Use int32_t to support C++11 compilers.
std::unordered_map<int32_t, bool> previewFeatures;
nvinfer1::HardwareCompatibilityLevel hardwareCompatibilityLevel{nvinfer1::HardwareCompatibilityLevel::kNONE};
std::string tempdir{};
nvinfer1::TempfileControlFlags tempfileControls{getTempfileControlDefaults()};
RuntimeMode useRuntime{RuntimeMode::kFULL};
std::string leanDLLPath{};
int32_t maxAuxStreams{defaultMaxAuxStreams};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class SystemOptions : public Options
{
public:
int32_t device{defaultDevice};
int32_t DLACore{-1};
bool fallback{false};
bool ignoreParsedPluginLibs{false};
std::vector<std::string> plugins;
std::vector<std::string> setPluginsToSerialize;
std::vector<std::string> dynamicPlugins;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class InferenceOptions : public Options
{
public:
int32_t batch{batchNotProvided};
int32_t iterations{defaultIterations};
int32_t infStreams{defaultStreams};
float warmup{defaultWarmUp};
float duration{defaultDuration};
float sleep{defaultSleep};
float idle{defaultIdle};
float persistentCacheRatio{defaultPersistentCacheRatio};
bool overlap{true};
bool skipTransfers{false};
bool useManaged{false};
bool spin{false};
bool threads{false};
bool graph{false};
bool rerun{false};
bool timeDeserialize{false};
bool timeRefit{false};
std::unordered_map<std::string, std::string> inputs;
using ShapeProfile = std::unordered_map<std::string, std::vector<int32_t>>;
ShapeProfile shapes;
nvinfer1::ProfilingVerbosity nvtxVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class ReportingOptions : public Options
{
public:
bool verbose{false};
int32_t avgs{defaultAvgRuns};
std::vector<float> percentiles{defaultPercentiles.begin(), defaultPercentiles.end()};
bool refit{false};
bool output{false};
bool dumpRawBindings{false};
bool profile{false};
bool layerInfo{false};
std::string exportTimes;
std::string exportOutput;
std::string exportProfile;
std::string exportLayerInfo;
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class SafeBuilderOptions : public Options
{
public:
std::string serialized{};
std::string onnxModelFile{};
bool help{false};
bool verbose{false};
std::vector<IOFormat> inputFormats;
std::vector<IOFormat> outputFormats;
bool int8{false};
bool fp8{false};
std::string calibFile{};
std::vector<std::string> plugins;
bool consistency{false};
bool standard{false};
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
std::string timingCacheFile{};
SparsityFlag sparsity{SparsityFlag::kDISABLE};
int32_t minTiming{defaultMinTiming};
int32_t avgTiming{defaultAvgTiming};
void parse(Arguments& arguments) override;
static void printHelp(std::ostream& out);
};
class AllOptions : public Options
{
public:
ModelOptions model;
BuildOptions build;
SystemOptions system;
InferenceOptions inference;
ReportingOptions reporting;
bool helps{false};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
class TaskInferenceOptions : public Options
{
public:
std::string engine;
int32_t device{defaultDevice};
int32_t DLACore{-1};
int32_t batch{batchNotProvided};
bool graph{false};
float persistentCacheRatio{defaultPersistentCacheRatio};
void parse(Arguments& arguments) override;
static void help(std::ostream& out);
};
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
bool parseHelp(Arguments& arguments);
void helpHelp(std::ostream& out);
// Functions to print options
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
std::ostream& operator<<(std::ostream& os, const UffInput& input);
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims)
{
for (int32_t i = 0; i < dims.nbDims; ++i)
{
os << (i ? "x" : "") << dims.d[i];
}
return os;
}
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role)
{
switch (role)
{
case nvinfer1::WeightsRole::kKERNEL:
{
os << "Kernel";
break;
}
case nvinfer1::WeightsRole::kBIAS:
{
os << "Bias";
break;
}
case nvinfer1::WeightsRole::kSHIFT:
{
os << "Shift";
break;
}
case nvinfer1::WeightsRole::kSCALE:
{
os << "Scale";
break;
}
case nvinfer1::WeightsRole::kCONSTANT:
{
os << "Constant";
break;
}
case nvinfer1::WeightsRole::kANY:
{
os << "Any";
break;
}
}
return os;
}
inline std::ostream& operator<<(std::ostream& os, const std::vector<int32_t>& vec)
{
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i)
{
os << (i ? "x" : "") << vec[i];
}
return os;
}
} // namespace sample
#endif // TRT_SAMPLES_OPTIONS_H

@ -0,0 +1,579 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <algorithm>
#include <exception>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <numeric>
#include <utility>
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleReporting.h"
using namespace nvinfer1;
namespace sample
{
namespace
{
//!
//! \brief Find percentile in an ascending sequence of timings
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
//!
template <typename T>
float findPercentile(float percentile, std::vector<InferenceTime> const& timings, T const& toFloat)
{
int32_t const all = static_cast<int32_t>(timings.size());
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
if (timings.empty())
{
return std::numeric_limits<float>::infinity();
}
if (percentile < 0.F || percentile > 100.F)
{
throw std::runtime_error("percentile is not in [0, 100]!");
}
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
}
//!
//! \brief Find median in a sorted sequence of timings
//!
template <typename T>
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat)
{
if (timings.empty())
{
return std::numeric_limits<float>::infinity();
}
int32_t const m = timings.size() / 2;
if (timings.size() % 2)
{
return toFloat(timings[m]);
}
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
}
//!
//! \brief Find coefficient of variance (which is std / mean) in a sorted sequence of timings given the mean
//!
template <typename T>
float findCoeffOfVariance(std::vector<InferenceTime> const& timings, T const& toFloat, float mean)
{
if (timings.empty())
{
return 0;
}
if (mean == 0.F)
{
return std::numeric_limits<float>::infinity();
}
auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) {
float const diff = toFloat(a) - mean;
return acc + diff * diff;
};
float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size();
return std::sqrt(variance) / mean * 100.F;
}
inline InferenceTime traceToTiming(const InferenceTrace& a)
{
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart),
(a.d2hEnd - a.d2hStart));
}
} // namespace
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os)
{
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl;
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl;
}
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os)
{
int32_t count = 0;
InferenceTime sum;
os << std::endl;
os << "=== Trace details ===" << std::endl;
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
for (auto const& t : timings)
{
sum += t;
if (++count == runsPerAvg)
{
// clang-format off
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg
<< " ms)" << std::endl;
// clang-format on
count = 0;
sum.enq = 0;
sum.h2d = 0;
sum.compute = 0;
sum.d2h = 0;
}
}
}
void printMetricExplanations(std::ostream& os)
{
os << std::endl;
os << "=== Explanations of the performance metrics ===" << std::endl;
os << "Total Host Walltime: the host walltime from when the first query (after warmups) is enqueued to when the "
"last query is completed."
<< std::endl;
os << "GPU Compute Time: the GPU latency to execute the kernels for a query." << std::endl;
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all the queries. If this is significantly "
"shorter than Total Host Walltime, the GPU may be under-utilized because of host-side overheads or data "
"transfers."
<< std::endl;
os << "Throughput: the observed throughput computed by dividing the number of queries by the Total Host Walltime. "
"If this is significantly lower than the reciprocal of GPU Compute Time, the GPU may be under-utilized "
"because of host-side overheads or data transfers."
<< std::endl;
os << "Enqueue Time: the host latency to enqueue a query. If this is longer than GPU Compute Time, the GPU may be "
"under-utilized."
<< std::endl;
os << "H2D Latency: the latency for host-to-device data transfers for input tensors of a single query."
<< std::endl;
os << "D2H Latency: the latency for device-to-host data transfers for output tensors of a single query."
<< std::endl;
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a "
"single query."
<< std::endl;
}
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles)
{
auto const metricComparator
= [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); };
auto const metricAccumulator = [metricGetter](float acc, InferenceTime const& a) { return acc + metricGetter(a); };
std::vector<InferenceTime> newTimings = timings;
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
PerformanceResult result;
result.min = metricGetter(newTimings.front());
result.max = metricGetter(newTimings.back());
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, metricAccumulator) / newTimings.size();
result.median = findMedian(newTimings, metricGetter);
for (auto percentile : percentiles)
{
result.percentiles.emplace_back(findPercentile(percentile, newTimings, metricGetter));
}
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
return result;
}
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs, std::vector<float> const& percentiles,
int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
{
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles);
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentiles);
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
auto const h2dResult = getPerformanceResult(timings, getH2d, percentiles);
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentiles);
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles);
auto const toPerfString = [&](const PerformanceResult& r) {
std::stringstream s;
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, "
<< "median = " << r.median << " ms";
for (int32_t i = 0, n = percentiles.size(); i < n; ++i)
{
s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms";
}
return s.str();
};
osInfo << std::endl;
osInfo << "=== Performance summary ===" << std::endl;
osInfo << "Throughput: " << throughput << " qps" << std::endl;
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
// Report warnings if the throughput is bound by other factors than GPU Compute Time.
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
if (enqueueResult.median > kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median)
{
osWarning
<< "* Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized."
<< std::endl;
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the "
"throughput."
<< std::endl;
}
if (h2dResult.median >= gpuComputeResult.median)
{
osWarning << "* Throughput may be bound by host-to-device transfers for the inputs rather than GPU Compute and "
"the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl;
}
if (d2hResult.median >= gpuComputeResult.median)
{
osWarning << "* Throughput may be bound by device-to-host transfers for the outputs rather than GPU Compute "
"and the GPU may be under-utilized."
<< std::endl;
osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl;
}
// Report warnings if the GPU Compute Time is unstable.
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD)
{
osWarning << "* GPU compute time is unstable, with coefficient of variance = " << gpuComputeResult.coeffVar
<< "%." << std::endl;
osWarning << " If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the "
<< "stability." << std::endl;
}
// Report warnings if multiple inference streams are used.
if (infStreams > 1)
{
osWarning << "* Multiple inference streams are used. Latencies may not be accurate since inferences may run in "
<< " parallel. Please use \"Throughput\" as the performance metric instead." << std::endl;
}
// Explain what the metrics mean.
osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl;
printMetricExplanations(osVerbose);
osInfo << std::endl;
}
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
{
int32_t batchSize = infOpts.batch;
float const warmupMs = infOpts.warmup;
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; };
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
int32_t const warmups = noWarmup - trace.begin();
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
// when implicit batch used, batchSize = options.inference.batch, which is parsed through --batch
// when explicit batch used, batchSize = options.inference.batch = 0
// treat inference with explicit batch as a single query and report the throughput
batchSize = batchSize ? batchSize : 1;
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo);
std::vector<InferenceTime> timings(trace.size() - warmups);
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
printTiming(timings, reportingOpts.avgs, osInfo);
printEpilog(
timings, benchTime, reportingOpts.percentiles, batchSize, infOpts.infStreams, osInfo, osWarning, osVerbose);
if (!reportingOpts.exportTimes.empty())
{
exportJSONTrace(trace, reportingOpts.exportTimes, warmups);
}
}
//! Printed format:
//! [ value, ...]
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time,
//! "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time,
//! "d2h" : time, "latency" : time }
//!
void exportJSONTrace(std::vector<InferenceTrace> const& trace, std::string const& fileName, int32_t const nbWarmups)
{
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl;
char const* sep = " ";
for (auto iter = trace.begin() + nbWarmups; iter < trace.end(); ++iter)
{
auto const& t = *iter;
InferenceTime const it(traceToTiming(t));
os << sep << "{ ";
sep = ", ";
// clang-format off
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << " }"
<< std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept
{
if (mIterator == mLayers.end())
{
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
mUpdatesCount += mLayers.empty() || first;
if (first)
{
mIterator = mLayers.begin();
}
else
{
mLayers.emplace_back();
mLayers.back().name = layerName;
mIterator = mLayers.end() - 1;
}
}
mIterator->timeMs.push_back(timeMs);
++mIterator;
}
void Profiler::print(std::ostream& os) const noexcept
{
std::string const nameHdr("Layer");
std::string const timeHdr(" Time (ms)");
std::string const avgHdr(" Avg. Time (ms)");
std::string const medHdr(" Median Time (ms)");
std::string const percentageHdr(" Time %");
float const totalTimeMs = getTotalTime();
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { return a.name.size() < b.name.size(); };
auto const longestName = std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
auto const nameLength = std::max(longestName->name.size() + 1, nameHdr.size());
auto const timeLength = timeHdr.size();
auto const avgLength = avgHdr.size();
auto const medLength = medHdr.size();
auto const percentageLength = percentageHdr.size();
os << std::endl
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << medHdr << percentageHdr << std::endl;
for (auto const& p : mLayers)
{
if (p.timeMs.empty() || getTotalTime(p) == 0.F)
{
// there is no point to print profiling for layer that didn't run at all
continue;
}
// clang-format off
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p)
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p)
<< std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p)
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100
<< std::endl;
}
{
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
<< std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime()
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
// clang-format on
}
os << std::endl;
}
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept
{
std::ofstream os(fileName, std::ofstream::trunc);
os << "[" << std::endl << " { \"count\" : " << mUpdatesCount << " }" << std::endl;
auto const totalTimeMs = getTotalTime();
for (auto const& l : mLayers)
{
// clang-format off
os << ", {" << R"( "name" : ")" << l.name << R"(")"
R"(, "timeMs" : )" << getTotalTime(l)
<< R"(, "averageMs" : )" << getAvgTime(l)
<< R"(, "medianMs" : )" << getMedianTime(l)
<< R"(, "percentage" : )" << getTotalTime(l) / totalTimeMs * 100
<< " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os)
{
os << "Input Tensors:" << std::endl;
bindings.dumpInputs(context, os);
}
template <typename ContextType>
void dumpOutputs(ContextType const& context, Bindings const& bindings, std::ostream& os)
{
os << "Output Tensors:" << std::endl;
bindings.dumpOutputs(context, os);
}
template
void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
template
void dumpOutputs(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
template <typename ContextType>
void dumpRawBindingsToFiles(ContextType const& context, Bindings const& bindings, std::ostream& os)
{
bindings.dumpRawBindingToFiles(context, os);
}
template
void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
template
void dumpRawBindingsToFiles(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
template <typename ContextType>
void exportJSONOutput(
ContextType const& context, Bindings const& bindings, std::string const& fileName, int32_t batch)
{
std::ofstream os(fileName, std::ofstream::trunc);
std::string sep = " ";
auto const output = bindings.getOutputBindings();
os << "[" << std::endl;
for (auto const& binding : output)
{
// clang-format off
os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl;
sep = ", ";
os << " " << sep << R"("dimensions" : ")";
bindings.dumpBindingDimensions(binding.second, context, os);
os << "\"" << std::endl;
os << " " << sep << "\"values\" : [ ";
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
os << " ]" << std::endl << " }" << std::endl;
// clang-format on
}
os << "]" << std::endl;
}
template
void exportJSONOutput(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch);
template void exportJSONOutput(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings,
std::string const& fileName, int32_t batch);
bool printLayerInfo(
ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context)
{
if (reporting.layerInfo)
{
sample::gLogInfo << "Layer Information:" << std::endl;
sample::gLogInfo << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kONELINE)
<< std::flush;
}
if (!reporting.exportLayerInfo.empty())
{
std::ofstream os(reporting.exportLayerInfo, std::ofstream::trunc);
os << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kJSON) << std::flush;
}
return true;
}
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv)
{
if (reporting.profile)
{
iEnv.profiler->print(sample::gLogInfo);
}
if (!reporting.exportProfile.empty())
{
iEnv.profiler->exportJSONProfile(reporting.exportProfile);
}
// Print an warning about total per-layer latency when auxiliary streams are used.
if (!iEnv.safe && (reporting.profile || !reporting.exportProfile.empty()))
{
int32_t const nbAuxStreams = iEnv.engine.get()->getNbAuxStreams();
if (nbAuxStreams > 0)
{
sample::gLogWarning << "The engine uses " << nbAuxStreams << " auxiliary streams, so the \"Total\" latency "
<< "may not be accurate because some layers may have run in parallel!" << std::endl;
}
}
}
namespace details
{
template <typename ContextType>
void dump(std::unique_ptr<ContextType> const& context, std::unique_ptr<Bindings> const& binding,
ReportingOptions const& reporting, int32_t batch)
{
if (!context)
{
sample::gLogError << "Empty context! Skip printing outputs." << std::endl;
return;
}
if (reporting.output)
{
dumpOutputs(*context, *binding, sample::gLogInfo);
}
if (reporting.dumpRawBindings)
{
dumpRawBindingsToFiles(*context, *binding, sample::gLogInfo);
}
if (!reporting.exportOutput.empty())
{
exportJSONOutput(*context, *binding, reporting.exportOutput, batch);
}
}
} // namespace details
void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch)
{
auto const& binding = iEnv.bindings.at(0);
if (!binding)
{
sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl;
return;
}
if (iEnv.safe)
{
auto const& context = iEnv.safeContexts.at(0);
details::dump(context, binding, reporting, batch);
}
else
{
auto const& context = iEnv.contexts.at(0);
details::dump(context, binding, reporting, batch);
}
}
} // namespace sample

@ -0,0 +1,302 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_REPORTING_H
#define TRT_SAMPLE_REPORTING_H
#include <functional>
#include <iostream>
#include <numeric>
#include "NvInfer.h"
#include "sampleDevice.h"
#include "sampleInference.h"
#include "sampleOptions.h"
#include "sampleUtils.h"
namespace sample
{
class Bindings;
//!
//! \struct InferenceTime
//! \brief Measurement times in milliseconds
//!
struct InferenceTime
{
InferenceTime(float q, float i, float c, float o)
: enq(q)
, h2d(i)
, compute(c)
, d2h(o)
{
}
InferenceTime() = default;
InferenceTime(InferenceTime const&) = default;
InferenceTime(InferenceTime&&) = default;
InferenceTime& operator=(InferenceTime const&) = default;
InferenceTime& operator=(InferenceTime&&) = default;
~InferenceTime() = default;
float enq{0}; // Enqueue
float h2d{0}; // Host to Device
float compute{0}; // Compute
float d2h{0}; // Device to Host
// ideal latency
float latency() const
{
return h2d + compute + d2h;
}
};
//!
//! \struct InferenceTrace
//! \brief Measurement points in milliseconds
//!
struct InferenceTrace
{
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
: stream(s)
, enqStart(es)
, enqEnd(ee)
, h2dStart(is)
, h2dEnd(ie)
, computeStart(cs)
, computeEnd(ce)
, d2hStart(os)
, d2hEnd(oe)
{
}
InferenceTrace() = default;
InferenceTrace(InferenceTrace const&) = default;
InferenceTrace(InferenceTrace&&) = default;
InferenceTrace& operator=(InferenceTrace const&) = default;
InferenceTrace& operator=(InferenceTrace&&) = default;
~InferenceTrace() = default;
int32_t stream{0};
float enqStart{0};
float enqEnd{0};
float h2dStart{0};
float h2dEnd{0};
float computeStart{0};
float computeEnd{0};
float d2hStart{0};
float d2hEnd{0};
};
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
{
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
}
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
{
return a = a + b;
}
//!
//! \struct PerformanceResult
//! \brief Performance result of a performance metric
//!
struct PerformanceResult
{
float min{0.F};
float max{0.F};
float mean{0.F};
float median{0.F};
std::vector<float> percentiles;
float coeffVar{0.F}; // coefficient of variation
};
//!
//! \brief Print benchmarking time and number of traces collected
//!
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);
//!
//! \brief Print a timing trace
//!
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);
//!
//! \brief Print the performance summary of a trace
//!
void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Get the result of a specific performance metric from a trace
//!
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);
//!
//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
//!
void printMetricExplanations(std::ostream& os);
//!
//! \brief Print and summarize a timing trace
//!
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
//!
//! \brief Export a timing trace to JSON file
//!
void exportJSONTrace(
std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);
//!
//! \brief Print input tensors to stream
//!
void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
//!
//! \brief Print output tensors to stream
//!
template <typename ContextType>
void dumpOutputs(ContextType const& context, Bindings const& bindings, std::ostream& os);
template <typename ContextType>
void dumpRawBindingsToFiles(ContextType const& context, Bindings const& bindings, std::ostream& os);
//!
//! \brief Export output tensors to JSON file
//!
template <typename ContextType>
void exportJSONOutput(
ContextType const& context, Bindings const& bindings, std::string const& fileName, int32_t batch);
//!
//! \struct LayerProfile
//! \brief Layer profile information
//!
struct LayerProfile
{
std::string name;
std::vector<float> timeMs;
};
//!
//! \class Profiler
//! \brief Collect per-layer profile information, assuming times are reported in the same order
//!
class Profiler : public nvinfer1::IProfiler
{
public:
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
void print(std::ostream& os) const noexcept;
//!
//! \brief Export a profile to JSON file
//!
void exportJSONProfile(std::string const& fileName) const noexcept;
private:
float getTotalTime() const noexcept
{
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
};
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
}
float getMedianTime() const noexcept
{
if (mLayers.empty())
{
return 0.F;
}
std::vector<float> totalTime;
for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
{
auto const layerTime
= [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
totalTime.push_back(t);
}
return median(totalTime);
}
float getMedianTime(LayerProfile const& p) const noexcept
{
return median(p.timeMs);
}
static float median(std::vector<float> vals)
{
if (vals.empty())
{
return 0.F;
}
std::sort(vals.begin(), vals.end());
if (vals.size() % 2U == 1U)
{
return vals[vals.size() / 2U];
}
return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
}
//! return the total runtime of given layer profile
float getTotalTime(LayerProfile const& p) const noexcept
{
auto const& vals = p.timeMs;
return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
}
float getAvgTime(LayerProfile const& p) const noexcept
{
return getTotalTime(p) / p.timeMs.size();
}
std::vector<LayerProfile> mLayers;
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
int32_t mUpdatesCount{0};
};
//!
//! \brief Print layer info to logger or export it to output JSON file.
//!
bool printLayerInfo(
ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);
//! Forward declaration.
struct InferenceEnvironment;
//!
//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
//!
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv);
//!
//! \brief Print binding output values to logger or export them to output JSON file.
//!
void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch);
} // namespace sample
#endif // TRT_SAMPLE_REPORTING_H

@ -0,0 +1,528 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "sampleUtils.h"
#include "half.h"
using namespace nvinfer1;
namespace sample
{
size_t dataTypeSize(nvinfer1::DataType dataType)
{
switch (dataType)
{
case nvinfer1::DataType::kINT32:
case nvinfer1::DataType::kFLOAT: return 4U;
case nvinfer1::DataType::kHALF: return 2U;
case nvinfer1::DataType::kBOOL:
case nvinfer1::DataType::kUINT8:
case nvinfer1::DataType::kINT8:
case nvinfer1::DataType::kFP8: return 1U;
}
return 0;
}
int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch)
{
int32_t maxNbElems = 1;
for (int32_t i = 0; i < dims.nbDims; ++i)
{
// Get effective length of axis.
int32_t d = dims.d[i];
// Any dimension is 0, it is an empty tensor.
if (d == 0)
{
return 0;
}
if (i == vecDim)
{
d = samplesCommon::divUp(d, comps);
}
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
}
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
}
nvinfer1::Dims toDims(std::vector<int32_t> const& vec)
{
int32_t limit = static_cast<int32_t>(nvinfer1::Dims::MAX_DIMS);
if (static_cast<int32_t>(vec.size()) > limit)
{
sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl;
}
// Pick first nvinfer1::Dims::MAX_DIMS elements
nvinfer1::Dims dims{std::min(static_cast<int32_t>(vec.size()), limit), {}};
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
return dims;
}
void loadFromFile(std::string const& fileName, char* dst, size_t size)
{
ASSERT(dst);
std::ifstream file(fileName, std::ios::in | std::ios::binary);
if (file.is_open())
{
file.read(dst, size);
size_t const nbBytesRead = file.gcount();
file.close();
if (nbBytesRead != size)
{
std::ostringstream msg;
msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size
<< " bytes but only read: " << nbBytesRead << " bytes";
throw std::invalid_argument(msg.str());
}
}
else
{
std::ostringstream msg;
msg << "Cannot open file " << fileName << "!";
throw std::invalid_argument(msg.str());
}
}
std::vector<std::string> splitToStringVec(std::string const& s, char separator)
{
std::vector<std::string> splitted;
for (size_t start = 0; start < s.length();)
{
size_t separatorIndex = s.find(separator, start);
if (separatorIndex == std::string::npos)
{
separatorIndex = s.length();
}
splitted.emplace_back(s.substr(start, separatorIndex - start));
start = separatorIndex + 1;
}
return splitted;
}
bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput /*= true*/)
{
bool broadcast = formats.size() == 1;
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
if (!formats.empty() && !validFormatsCount)
{
if (isInput)
{
throw std::invalid_argument(
"The number of inputIOFormats must match network's inputs or be one for broadcasting.");
}
throw std::invalid_argument(
"The number of outputIOFormats must match network's outputs or be one for broadcasting.");
}
return broadcast;
}
void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
{
using TensorToLayer = std::unordered_map<nvinfer1::ITensor*, nvinfer1::ILayer*>;
using LayerToTensor = std::unordered_map<nvinfer1::ILayer*, nvinfer1::ITensor*>;
// 1. Collect layers and tensors information from the network.
TensorToLayer matmulI2L;
TensorToLayer constO2L;
TensorToLayer shuffleI2L;
LayerToTensor shuffleL2O;
auto collectMappingInfo = [&](int32_t const idx)
{
ILayer* l = network.getLayer(idx);
switch (l->getType())
{
case nvinfer1::LayerType::kMATRIX_MULTIPLY:
{
// assume weights on the second input.
matmulI2L.insert({l->getInput(1), l});
break;
}
case nvinfer1::LayerType::kCONSTANT:
{
DataType const dtype = static_cast<nvinfer1::IConstantLayer*>(l)->getWeights().type;
if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF)
{
// Sparsify float only.
constO2L.insert({l->getOutput(0), l});
}
break;
}
case nvinfer1::LayerType::kSHUFFLE:
{
shuffleI2L.insert({l->getInput(0), l});
shuffleL2O.insert({l, l->getOutput(0)});
break;
}
default: break;
}
};
int32_t const nbLayers = network.getNbLayers();
for (int32_t i = 0; i < nbLayers; ++i)
{
collectMappingInfo(i);
}
if (matmulI2L.size() == 0 || constO2L.size() == 0)
{
// No MatrixMultiply or Constant layer found, no weights to sparsify.
return;
}
// Helper for analysis
auto isTranspose
= [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); };
auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; };
auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool
{
for (int32_t i = 0; i < dims.nbDims; ++i)
{
if (dims.d[i] != i || dims.d[i] != -1)
{
return false;
}
}
return true;
};
auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor*
{
while (shuffleI2L.find(t) != shuffleI2L.end())
{
nvinfer1::IShuffleLayer* s = static_cast<nvinfer1::IShuffleLayer*>(shuffleI2L.at(t));
if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions())
|| !isIdenticalReshape(s->getReshapeDimensions()))
{
break;
}
if (isTranspose(s->getFirstTranspose()))
{
needTranspose = !needTranspose;
}
if (isTranspose(s->getSecondTranspose()))
{
needTranspose = !needTranspose;
}
t = shuffleL2O.at(s);
}
return t;
};
// 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose
std::unordered_map<nvinfer1::IConstantLayer*, bool> constantLayerToSparse;
for (auto& o2l : constO2L)
{
// If need to transpose the weights of the Constant layer.
// Need to transpose by default due to semantic difference.
bool needTranspose{true};
ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose);
if (matmulI2L.find(t) == matmulI2L.end())
{
continue;
}
// check MatMul params...
IMatrixMultiplyLayer* mm = static_cast<nvinfer1::IMatrixMultiplyLayer*>(matmulI2L.at(t));
bool const twoInputs = mm->getNbInputs() == 2;
bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions());
bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE
&& mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR;
if (!(twoInputs && all2D && isSimple))
{
continue;
}
if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE)
{
needTranspose = !needTranspose;
}
constantLayerToSparse.insert({static_cast<IConstantLayer*>(o2l.second), needTranspose});
}
// 3. Finally, sparsify the weights
auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose)
{
Dims dims = layer->getOutput(0)->getDimensions();
ASSERT(dims.nbDims == 2);
int32_t const idxN = needTranspose ? 1 : 0;
int32_t const n = dims.d[idxN];
int32_t const k = dims.d[1 - idxN];
sparseWeights.emplace_back();
std::vector<int8_t>& spw = sparseWeights.back();
Weights w = layer->getWeights();
DataType const dtype = w.type;
ASSERT(dtype == nvinfer1::DataType::kFLOAT
|| dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored.
if (needTranspose)
{
if (dtype == nvinfer1::DataType::kFLOAT)
{
spw.resize(w.count * sizeof(float));
transpose2DWeights<float>(spw.data(), w.values, k, n);
}
else if (dtype == nvinfer1::DataType::kHALF)
{
spw.resize(w.count * sizeof(half_float::half));
transpose2DWeights<half_float::half>(spw.data(), w.values, k, n);
}
w.values = spw.data();
std::vector<int8_t> tmpW;
sparsify(w, n, 1, tmpW);
if (dtype == nvinfer1::DataType::kFLOAT)
{
transpose2DWeights<float>(spw.data(), tmpW.data(), n, k);
}
else if (dtype == nvinfer1::DataType::kHALF)
{
transpose2DWeights<half_float::half>(spw.data(), tmpW.data(), n, k);
}
}
else
{
sparsify(w, n, 1, spw);
}
w.values = spw.data();
layer->setWeights(w);
};
for (auto& l : constantLayerToSparse)
{
sparsifyConstantWeights(l.first, l.second);
}
}
template <typename L>
void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
{
auto weights = l.getKernelWeights();
sparsify(weights, k, trs, sparseWeights);
weights.values = sparseWeights.data();
l.setKernelWeights(weights);
}
// Explicit instantiation
template void setSparseWeights<IConvolutionLayer>(
IConvolutionLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
template void setSparseWeights<IFullyConnectedLayer>(
IFullyConnectedLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
{
for (int32_t l = 0; l < network.getNbLayers(); ++l)
{
auto* layer = network.getLayer(l);
auto const t = layer->getType();
if (t == nvinfer1::LayerType::kCONVOLUTION)
{
auto& conv = *static_cast<IConvolutionLayer*>(layer);
auto const& dims = conv.getKernelSizeNd();
ASSERT(dims.nbDims == 2 || dims.nbDims == 3);
auto const k = conv.getNbOutputMaps();
auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int32_t>());
sparseWeights.emplace_back();
setSparseWeights(conv, k, trs, sparseWeights.back());
}
else if (t == nvinfer1::LayerType::kFULLY_CONNECTED)
{
auto& fc = *static_cast<nvinfer1::IFullyConnectedLayer*>(layer);
auto const k = fc.getNbOutputChannels();
sparseWeights.emplace_back();
setSparseWeights(fc, k, 1, sparseWeights.back());
}
}
sparsifyMatMulKernelWeights(network, sparseWeights);
}
void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
{
switch (weights.type)
{
case DataType::kFLOAT:
sparsify(static_cast<float const*>(weights.values), weights.count, k, trs, sparseWeights);
break;
case DataType::kHALF:
sparsify(static_cast<half_float::half const*>(weights.values), weights.count, k, trs, sparseWeights);
break;
case DataType::kINT8:
case DataType::kINT32:
case DataType::kUINT8:
case DataType::kBOOL:
case DataType::kFP8: break;
}
}
template <typename T>
void print(std::ostream& os, T v)
{
os << v;
}
void print(std::ostream& os, int8_t v)
{
os << static_cast<int32_t>(v);
}
template <typename T>
void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv)
{
auto const vol = volume(dims);
T const* typedBuffer = static_cast<T const*>(buffer);
std::string sep;
for (int64_t v = 0; v < vol; ++v)
{
int64_t curV = v;
int32_t dataOffset = 0;
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex)
{
int32_t dimVal = curV % dims.d[dimIndex];
if (dimIndex == vectorDim)
{
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
}
else
{
dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
}
curV /= dims.d[dimIndex];
ASSERT(curV >= 0);
}
os << sep;
sep = separator;
print(os, typedBuffer[dataOffset]);
}
}
// Explicit instantiation
template void dumpBuffer<bool>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<int32_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<int8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<float>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
Dims const& strides, int32_t vectorDim, int32_t spv);
template <typename T>
void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
{
auto const c = count / (k * trs);
sparseWeights.resize(count * sizeof(T));
auto* sparseValues = reinterpret_cast<T*>(sparseWeights.data());
constexpr int32_t window = 4;
constexpr int32_t nonzeros = 2;
int32_t const crs = c * trs;
auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; };
for (int64_t ki = 0; ki < k; ++ki)
{
for (int64_t rsi = 0; rsi < trs; ++rsi)
{
int32_t w = 0;
int32_t nz = 0;
for (int64_t ci = 0; ci < c; ++ci)
{
auto const index = getIndex(ki, ci, rsi);
if (nz < nonzeros)
{
sparseValues[index] = values[index];
++nz;
}
else
{
sparseValues[index] = 0;
}
if (++w == window)
{
w = 0;
nz = 0;
}
}
}
}
}
// Explicit instantiation
template void sparsify<float>(
float const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
template void sparsify<half_float::half>(
half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
template <typename T>
void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n)
{
ASSERT(dst != src);
T* tdst = reinterpret_cast<T*>(dst);
T const* tsrc = reinterpret_cast<T const*>(src);
for (int32_t mi = 0; mi < m; ++mi)
{
for (int32_t ni = 0; ni < n; ++ni)
{
int32_t const isrc = mi * n + ni;
int32_t const idst = ni * m + mi;
tdst[idst] = tsrc[isrc];
}
}
}
// Explicit instantiation
template void transpose2DWeights<float>(void* dst, void const* src, int32_t const m, int32_t const n);
template void transpose2DWeights<half_float::half>(void* dst, void const* src, int32_t const m, int32_t const n);
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type>
void fillBuffer(void* buffer, int64_t volume, T min, T max)
{
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
std::uniform_int_distribution<int32_t> distribution(min, max);
auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
std::generate(typedBuffer, typedBuffer + volume, generator);
}
template <typename T, typename std::enable_if<!std::is_integral<T>::value, int32_t>::type>
void fillBuffer(void* buffer, int64_t volume, T min, T max)
{
T* typedBuffer = static_cast<T*>(buffer);
std::default_random_engine engine;
std::uniform_real_distribution<float> distribution(min, max);
auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
std::generate(typedBuffer, typedBuffer + volume, generator);
}
// Explicit instantiation
template void fillBuffer<bool>(void* buffer, int64_t volume, bool min, bool max);
template void fillBuffer<float>(void* buffer, int64_t volume, float min, float max);
template void fillBuffer<int32_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
template void fillBuffer<int8_t>(void* buffer, int64_t volume, int8_t min, int8_t max);
template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max);
template void fillBuffer<uint8_t>(void* buffer, int64_t volume, uint8_t min, uint8_t max);
} // namespace sample

@ -0,0 +1,105 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TRT_SAMPLE_UTILS_H
#define TRT_SAMPLE_UTILS_H
#include <fstream>
#include <iostream>
#include <memory>
#include <numeric>
#include <random>
#include <string>
#include <unordered_map>
#include <vector>
#include <cuda.h>
#include <cuda_fp16.h>
#include "NvInfer.h"
#include "common.h"
#include "logger.h"
#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \
{ \
if ((condition) == false) \
{ \
(err) << (msg) << std::endl; \
return retval; \
} \
}
namespace sample
{
size_t dataTypeSize(nvinfer1::DataType dataType);
template <typename T>
inline T roundUp(T m, T n)
{
return ((m + n - 1) / n) * n;
}
//! comps is the number of components in a vector. Ignored if vecDim < 0.
int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch);
using samplesCommon::volume;
nvinfer1::Dims toDims(std::vector<int32_t> const& vec);
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
void fillBuffer(void* buffer, int64_t volume, T min, T max);
template <typename T, typename std::enable_if<!std::is_integral<T>::value, int32_t>::type = 0>
void fillBuffer(void* buffer, int64_t volume, T min, T max);
template <typename T>
void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, nvinfer1::Dims const& dims,
nvinfer1::Dims const& strides, int32_t vectorDim, int32_t spv);
void loadFromFile(std::string const& fileName, char* dst, size_t size);
std::vector<std::string> splitToStringVec(std::string const& option, char separator);
bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput = true);
int32_t getCudaDriverVersion();
int32_t getCudaRuntimeVersion();
void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0.
template <typename T>
void sparsify(T const* values, int64_t count, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
template <typename L>
void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers.
// Forward analysis on the API graph to determine which weights to sparsify.
void sparsifyMatMulKernelWeights(
nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
template <typename T>
void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n);
} // namespace sample
#endif // TRT_SAMPLE_UTILS_H

@ -0,0 +1,32 @@
#pragma once
#include "utils.h"
#include "kernel_function.cuh"
class MI_VisionInterface
{
public:
~MI_VisionInterface() = default;
// 初始化engine
virtual bool initEngine(const std::string& _onnxFileName) = 0;
// 检查模型输入输出
virtual bool check() = 0;
// 推理
virtual bool doTRTInfer(const std::vector<MN_VisionImage::MS_ImageParam>& _bufImg, std::vector<utils::MR_Result>* _detectRes, int* _user) = 0;
// 推理
virtual bool doTRTInfer(const std::vector<cv::Mat>& _matImgs, std::vector<utils::MR_Result>* _detectRes, int* _user) = 0;
// 获取错误信息
virtual std::string getError() = 0;
// 释放数据/内存
virtual void freeMemeory() = 0;
virtual bool measureAxis(std::vector<double>& measureRes, const MN_VisionImage::MS_ImageParam& _bufImg) = 0;
};
// 导出接口类
MI_ALGORITHM_EXPORT MI_VisionInterface* getInterfacePtr(const utils::InitParameter& _params);

@ -0,0 +1,66 @@
#pragma once
#include <string>
#include <memory>
typedef unsigned char uchar;
namespace MN_VisionImage {
enum class ME_ImageType
{
E_GRAY = 0,
E_RGB,
E_RGBA
};
struct MS_ImageParam
{
//无参构造
MS_ImageParam() :
m_width(-1),
m_height(-1),
m_channels(0),
mImgType(MN_VisionImage::ME_ImageType::E_RGB)
{}
//有参构造函数
MS_ImageParam(uchar* _buffer, int _nW, int _nH, const ME_ImageType& _imgType)
{
int _nChannels = 0;
if (_imgType == ME_ImageType::E_GRAY)
{
_nChannels = 1;
}
else if (_imgType == ME_ImageType::E_RGBA)
{
_nChannels = 4;
}
else
{
_nChannels = 3;
}
m_width = _nW;
m_height = _nH;
m_channels = _nChannels;
mImgType = _imgType;
int iSize = _nW * _nH * _nChannels; //图像的像素数
m_data = std::shared_ptr<uchar>(new uchar[iSize], [](uchar* p) {
if (p != nullptr)
{
delete[] p;
p = nullptr;
}
});
memcpy(m_data.get(), _buffer, iSize);
}
std::shared_ptr<uchar> m_data; // 图像数据
int m_width; // 图像宽度
int m_height; // 图像高度
int m_channels; // 图像通道数
ME_ImageType mImgType; // 图像类型
};
}

@ -0,0 +1,35 @@
#pragma once
// tensorrt
#include "argsParser.h"
#include "buffers.h"
#include "common.h"
#include "logger.h"
#include "parserOnnxConfig.h"
#include "NvOnnxParser.h"
#include <NvInfer.h>
// cuda
#include <cuda_runtime.h>
#include <cuda.h>
#include <stdio.h>
#include <thrust/sort.h>
#include <cuda_device_runtime_api.h>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <device_launch_parameters.h>
#include <device_atomic_functions.h>
// opencv
#include <opencv2/opencv.hpp>
// cpp std
#include "Windows.h"
#include <algorithm>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>
#include <chrono>
#include <memory>
#include <math.h>
#include <filesystem>
#include "MS_Image_Param.h"

@ -0,0 +1,43 @@
#pragma once
#include "common_include.h"
#include "utils.h"
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line);
#define BLOCK_SIZE 8
//note: resize rgb with padding
void resizeDevice(const int& batch_size, float* src, int src_width, int src_height,
float* dst, int dstWidth, int dstHeight,
float paddingValue, utils::AffineMat matrix);
//overload:resize rgb with padding, but src's type is uin8
void resizeDevice(const int& batch_size, unsigned char* src, int src_width, int src_height,
float* dst, int dstWidth, int dstHeight,
float paddingValue, utils::AffineMat matrix);
// overload: resize rgb/gray without padding
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
float* dst, int dstWidth, int dstHeight,
utils::ColorMode mode, utils::AffineMat matrix);
void bgr2rgbDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
float* dst, int dstWidth, int dstHeight);
void normDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
float* dst, int dstWidth, int dstHeight,
utils::InitParameter norm_param);
void hwc2chwDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
float* dst, int dstWidth, int dstHeight);
void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
// nms fast
void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea);
// nms sort
void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
int* idx, float* conf);

@ -0,0 +1,280 @@
#pragma once
#include "common_include.h"
// 接口导出
#ifndef BUILD_STATIC
# if defined(MI_ALGORITHM_LIB)
# define MI_ALGORITHM_EXPORT __declspec(dllexport)
# else
# define MI_ALGORITHM_EXPORT __declspec(dllimport)
# endif
#else
# define MI_ALGORITHM_EXPORT
#endif
namespace utils
{
namespace dataSets
{
const std::vector<std::string> coco80 = {
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
"hair drier", "toothbrush"
};
const std::vector<std::string> coco91 = {
"person", "bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light",
"fire hydrant","street sign","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe",
"hat","backpack","umbrella","shoe","eye glasses","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat",
"baseball glove","skateboard","surfboard","tennis racket","bottle","plate","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
"sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","mirror","dining table","window",
"desk","toilet","door","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","blender",
"book","clock","vase","scissors","teddy bear","hair drier","toothbrush","hair brush"
};
const std::vector<std::string> voc20 = {
"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable",
"dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"
};
const std::vector<std::string> face2 = { "non-face", "face" };
// flower_data
const std::vector<std::string> flower_labels = { "dailsy", "dandelion", "rose", "sunflower", "tulip" };
}
namespace Colors
{
const std::vector<cv::Scalar> color80{
cv::Scalar(128, 77, 207),cv::Scalar(65, 32, 208),cv::Scalar(0, 224, 45),cv::Scalar(3, 141, 219),cv::Scalar(80, 239, 253),cv::Scalar(239, 184, 12),
cv::Scalar(7, 144, 145),cv::Scalar(161, 88, 57),cv::Scalar(0, 166, 46),cv::Scalar(218, 113, 53),cv::Scalar(193, 33, 128),cv::Scalar(190, 94, 113),
cv::Scalar(113, 123, 232),cv::Scalar(69, 205, 80),cv::Scalar(18, 170, 49),cv::Scalar(89, 51, 241),cv::Scalar(153, 191, 154),cv::Scalar(27, 26, 69),
cv::Scalar(20, 186, 194),cv::Scalar(210, 202, 167),cv::Scalar(196, 113, 204),cv::Scalar(9, 81, 88),cv::Scalar(191, 162, 67),cv::Scalar(227, 73, 120),
cv::Scalar(177, 31, 19),cv::Scalar(133, 102, 137),cv::Scalar(146, 72, 97),cv::Scalar(145, 243, 208),cv::Scalar(2, 184, 176),cv::Scalar(219, 220, 93),
cv::Scalar(238, 153, 134),cv::Scalar(197, 169, 160),cv::Scalar(204, 201, 106),cv::Scalar(13, 24, 129),cv::Scalar(40, 38, 4),cv::Scalar(5, 41, 34),
cv::Scalar(46, 94, 129),cv::Scalar(102, 65, 107),cv::Scalar(27, 11, 208),cv::Scalar(191, 240, 183),cv::Scalar(225, 76, 38),cv::Scalar(193, 89, 124),
cv::Scalar(30, 14, 175),cv::Scalar(144, 96, 90),cv::Scalar(181, 186, 86),cv::Scalar(102, 136, 34),cv::Scalar(158, 71, 15),cv::Scalar(183, 81, 247),
cv::Scalar(73, 69, 89),cv::Scalar(123, 73, 232),cv::Scalar(4, 175, 57),cv::Scalar(87, 108, 23),cv::Scalar(105, 204, 142),cv::Scalar(63, 115, 53),
cv::Scalar(105, 153, 126),cv::Scalar(247, 224, 137),cv::Scalar(136, 21, 188),cv::Scalar(122, 129, 78),cv::Scalar(145, 80, 81),cv::Scalar(51, 167, 149),
cv::Scalar(162, 173, 20),cv::Scalar(252, 202, 17),cv::Scalar(10, 40, 3),cv::Scalar(150, 90, 254),cv::Scalar(169, 21, 68),cv::Scalar(157, 148, 180),
cv::Scalar(131, 254, 90),cv::Scalar(7, 221, 102),cv::Scalar(19, 191, 184),cv::Scalar(98, 126, 199),cv::Scalar(210, 61, 56),cv::Scalar(252, 86, 59),
cv::Scalar(102, 195, 55),cv::Scalar(160, 26, 91),cv::Scalar(60, 94, 66),cv::Scalar(204, 169, 193),cv::Scalar(126, 4, 181),cv::Scalar(229, 209, 196),
cv::Scalar(195, 170, 186),cv::Scalar(155, 207, 148)
};
const std::vector<cv::Scalar> color91{
cv::Scalar(148, 99, 164),cv::Scalar(65, 172, 90),cv::Scalar(18, 117, 190),cv::Scalar(173, 208, 229),cv::Scalar(37, 162, 147),cv::Scalar(121, 99, 42),
cv::Scalar(218, 173, 104),cv::Scalar(193, 213, 138),cv::Scalar(142, 168, 45),cv::Scalar(107, 143, 94),cv::Scalar(242, 89, 7),cv::Scalar(87, 218, 248),
cv::Scalar(126, 168, 9),cv::Scalar(86, 152, 105),cv::Scalar(155, 135, 251),cv::Scalar(73, 234, 44),cv::Scalar(177, 37, 42),cv::Scalar(219, 215, 54),
cv::Scalar(124, 207, 143),cv::Scalar(7, 81, 209),cv::Scalar(254, 18, 130),cv::Scalar(71, 54, 73),cv::Scalar(172, 198, 63),cv::Scalar(64, 217, 224),
cv::Scalar(105, 224, 25),cv::Scalar(41, 52, 130),cv::Scalar(220, 27, 193),cv::Scalar(65, 222, 86),cv::Scalar(250, 150, 201),cv::Scalar(201, 150, 105),
cv::Scalar(104, 96, 142),cv::Scalar(111, 230, 54),cv::Scalar(105, 24, 22),cv::Scalar(42, 226, 101),cv::Scalar(67, 26, 144),cv::Scalar(155, 113, 106),
cv::Scalar(152, 196, 216),cv::Scalar(58, 68, 152),cv::Scalar(68, 230, 213),cv::Scalar(169, 143, 129),cv::Scalar(191, 102, 41),cv::Scalar(5, 73, 170),
cv::Scalar(15, 73, 233),cv::Scalar(95, 13, 71),cv::Scalar(25, 92, 218),cv::Scalar(85, 173, 16),cv::Scalar(247, 158, 17),cv::Scalar(36, 28, 8),
cv::Scalar(31, 100, 134),cv::Scalar(131, 71, 45),cv::Scalar(158, 190, 91),cv::Scalar(90, 207, 220),cv::Scalar(125, 77, 228),cv::Scalar(40, 156, 67),
cv::Scalar(35, 250, 69),cv::Scalar(229, 61, 245),cv::Scalar(210, 201, 106),cv::Scalar(184, 35, 131),cv::Scalar(47, 124, 120),cv::Scalar(1, 114, 23),
cv::Scalar(99, 181, 17),cv::Scalar(77, 141, 151),cv::Scalar(79, 33, 95),cv::Scalar(194, 111, 146),cv::Scalar(187, 199, 138),cv::Scalar(129, 215, 40),
cv::Scalar(160, 209, 144),cv::Scalar(139, 121, 58),cv::Scalar(97, 208, 197),cv::Scalar(185, 105, 171),cv::Scalar(160, 96, 136),cv::Scalar(232, 26, 26),
cv::Scalar(34, 165, 109),cv::Scalar(19, 86, 215),cv::Scalar(205, 209, 199),cv::Scalar(131, 91, 25),cv::Scalar(51, 201, 16),cv::Scalar(64, 35, 128),
cv::Scalar(120, 161, 247),cv::Scalar(123, 164, 190),cv::Scalar(15, 191, 40),cv::Scalar(11, 44, 117),cv::Scalar(198, 136, 70),cv::Scalar(14, 224, 240),
cv::Scalar(60, 186, 193),cv::Scalar(253, 190, 129),cv::Scalar(134, 228, 173),cv::Scalar(219, 156, 214),cv::Scalar(137, 67, 254),cv::Scalar(178, 223, 250),
cv::Scalar(219, 199, 139)
};
const std::vector<cv::Scalar> color20{
cv::Scalar(128, 77, 207),cv::Scalar(65, 32, 208),cv::Scalar(0, 224, 45),cv::Scalar(3, 141, 219),cv::Scalar(80, 239, 253),cv::Scalar(239, 184, 12),
cv::Scalar(7, 144, 145),cv::Scalar(161, 88, 57),cv::Scalar(0, 166, 46),cv::Scalar(218, 113, 53),cv::Scalar(193, 33, 128),cv::Scalar(190, 94, 113),
cv::Scalar(113, 123, 232),cv::Scalar(69, 205, 80),cv::Scalar(18, 170, 49),cv::Scalar(89, 51, 241),cv::Scalar(153, 191, 154),cv::Scalar(27, 26, 69),
cv::Scalar(20, 186, 194),cv::Scalar(210, 202, 167),cv::Scalar(196, 113, 204),cv::Scalar(9, 81, 88),cv::Scalar(191, 162, 67),cv::Scalar(227, 73, 120)
};
}
// JC_Xiong-20240424
// 模型类型/功能,通过该枚举实例化模型对象
enum class ME_ModelType
{
E_RESNET34 = 0,
E_RESNET50,
E_YOLOV8,
};
// 当前产品检测结果
enum class ME_DetectRes
{
E_DETECT_OK = 0,
E_DETECT_NG,
E_DETECT_NONE,
};
// 图像分类返回结果
typedef struct MS_Classification
{
MS_Classification() :mDetectRes(ME_DetectRes::E_DETECT_OK), mConfidence(0.0), mLabel("")
{}
ME_DetectRes mDetectRes;
double mConfidence;
std::string mLabel;
}MS_ClassificationParam;
struct Box
{
float left, top, right, bottom, confidence;
int label;
std::vector<cv::Point2i> land_marks;
Box() = default;
Box(float left, float top, float right, float bottom, float confidence, int label) :
left(left), top(top), right(right), bottom(bottom), confidence(confidence), label(label) {}
Box(float left, float top, float right, float bottom, float confidence, int label, int numLandMarks) :
left(left), top(top), right(right), bottom(bottom), confidence(confidence), label(label)
{
land_marks.reserve(numLandMarks);
}
};
// 目标检测返回结果
typedef struct MS_ObjectDetect
{
MS_ObjectDetect() :mDetectRes(ME_DetectRes::E_DETECT_OK), mBoxVec(std::vector<Box>())
{}
ME_DetectRes mDetectRes;
std::vector<Box> mBoxVec;
}MS_ObjectDetectParam;
struct MR_Result
{
MR_Result() : mObjectDecRes(), mClassifyDecRes()
{}
MS_ClassificationParam mClassifyDecRes;
std::vector<std::vector<Box>> mObjectDecRes;
};
struct InitParameter
{
InitParameter() :num_class(5), dynamic_batch(false), batch_size(1), dst_h(0), dst_w(0), scale(255.0f),
meanVec{ 0.0f,0.0f,0.0f }, stdVec{ 1.0f,1.0f,1.0f }, iou_thresh(0.5), conf_thresh(0.5), topK(1000),
save_path(""), char_width(11), det_info_render_width(15), font_scale(0.6), is_show(false), is_save(false)
{}
ME_ModelType m_modelType; // 模型类型
int num_class; // flower_data
std::vector<std::string> class_names;
std::vector<std::string> input_output_names;
bool dynamic_batch;
int batch_size; // 推理的批量数据
MN_VisionImage::MS_ImageParam mImage;
int dst_h, dst_w; // 输入到模型的图像宽高
float scale;
std::vector<float> stdVec;
std::vector<float> meanVec;
float iou_thresh;
float conf_thresh;
int topK;
std::string save_path;
std::string winname = "TensorRT-Infer";
int char_width;
int det_info_render_width;
double font_scale;
bool is_show;
bool is_save;
};
// legacy
struct CandidateObject
{
float mBboxAndkeyPoints[14]; // bbox:[x y w h] + 5 facial key points:[x1 y1 x2 y2 ...x5 y5]
float mScore;
bool mIsGood;
CandidateObject()
{
std::fill_n(mBboxAndkeyPoints, 14, FLT_MAX);
mScore = FLT_MAX;
mIsGood = true;
}
CandidateObject(float* bboxAndkeyPoints, float score, bool isGood) :
mScore(score),
mIsGood(isGood)
{
memcpy(mBboxAndkeyPoints, bboxAndkeyPoints, 14 * sizeof(float));
}
};
enum class InputStream { IMAGE, VIDEO, CAMERA };
enum class ColorMode { RGB, GRAY };
struct AffineMat
{
float v0, v1, v2;
float v3, v4, v5;
};
void saveBinaryFile(float* vec, size_t len, const std::string& file);
std::vector<uint8_t> readBinaryFile(const std::string& file);
std::vector<unsigned char> loadModel(const std::string& file);
std::string getSystemTimeStr();
bool setInputStream(const InputStream& source, const std::string& imagePath, const std::string& videoPath, const int& cameraID,
cv::VideoCapture& capture, int& totalBatches, int& delayTime, InitParameter& param);
void setRenderWindow(InitParameter& param);
std::string getTimeStamp();
void show(const std::vector<std::vector<Box>>& objectss,
const std::vector<std::string>& classNames,
const int& cvDelayTime, std::vector<cv::Mat>& imgsBatch);
void save(const std::vector<std::vector<Box>>& objectss,
const std::vector<std::string>& classNames,
const std::string& savePath, std::vector<cv::Mat>& imgsBatch,
const int& batchSize, const int& batchi);
class HostTimer
{
public:
HostTimer();
float getUsedTime(); // while timing for cuda code, add "cudaDeviceSynchronize();" before this
~HostTimer();
private:
std::chrono::steady_clock::time_point t1;
std::chrono::steady_clock::time_point t2;
};
class DeviceTimer
{
public:
DeviceTimer();
float getUsedTime();
// overload
DeviceTimer(cudaStream_t ctream);
float getUsedTime(cudaStream_t ctream);
~DeviceTimer();
private:
cudaEvent_t start, end;
};
}

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>..\MF_TRTInfer\lib\opencv_lib\include\opencv2;..\MF_TRTInfer\lib\opencv_lib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>..\MF_TRTInfer\lib\opencv_lib\x64\vc15\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>opencv_world453.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>
Loading…
Cancel
Save