测试算法库程序
@ -0,0 +1,147 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>17.0</VCProjectVersion>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<ProjectGuid>{3e72c625-2f8b-4fb6-aa05-70a5c3a44bb9}</ProjectGuid>
|
||||
<RootNamespace>TestTRTInterDll</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v143</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>MultiByte</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
<Import Project="cuda11_6_Release_x64.props" />
|
||||
<Import Project="tensorrt_860_release_x64.props" />
|
||||
<Import Project="vs2019-opencv-release-X64.props" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>false</SDLCheck>
|
||||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
<LanguageStandard_C>Default</LanguageStandard_C>
|
||||
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>false</SDLCheck>
|
||||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<AdditionalIncludeDirectories>.\trtinfer_lib\include;.\trtinfer_lib\common;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalOptions>/Zc:__cplusplus %(AdditionalOptions)</AdditionalOptions>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>.\trtinfer_lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>.\trtinfer_lib\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="TestTRTInterDll.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup>
|
||||
<Filter Include="源文件">
|
||||
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
|
||||
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="头文件">
|
||||
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
|
||||
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
|
||||
</Filter>
|
||||
<Filter Include="资源文件">
|
||||
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
|
||||
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="TestTRTInterDll.cpp">
|
||||
<Filter>源文件</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ImportGroup Label="PropertySheets" />
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.6\lib\x64\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
After Width: | Height: | Size: 38 KiB |
After Width: | Height: | Size: 50 KiB |
After Width: | Height: | Size: 52 KiB |
@ -0,0 +1,5 @@
|
||||
daisy
|
||||
dandelion
|
||||
rose
|
||||
sunflower
|
||||
tulip
|
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 97 KiB |
After Width: | Height: | Size: 86 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 256 KiB |
After Width: | Height: | Size: 165 KiB |
@ -0,0 +1,590 @@
|
||||
[2024-04-24 14:10:05.326] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 14:10:18.478] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:10:20.401] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:10:21.501] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:10:23.149] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:10:24.470] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:10:29.415] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:10:29.426] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:10:29.503] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:10:29.516] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:10:29.517] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:10:29.518] <thread 28644> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:10:54.633] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:10:54.634] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:10:54.634] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:10:54.634] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:10:54.635] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:10:54.636] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:11:03.643] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:11:03.643] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:11:03.643] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:11:03.643] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:11:03.644] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:11:03.645] <thread 22540> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:21:13.820] <thread 26732> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 14:21:16.855] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:21:20.801] <thread 26732> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:21:30.715] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:21:47.950] <thread 26732> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:21:56.426] <thread 26732> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:22:20.751] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:22:20.751] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:22:20.752] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:22:20.752] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:22:20.752] <thread 8620> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:23:23.249] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 14:23:27.717] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:23:29.141] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:23:30.553] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:23:37.512] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:23:37.512] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:23:37.513] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:23:37.513] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:23:37.516] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:23:37.517] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:24:11.946] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:24:11.946] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:24:11.947] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:24:11.947] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:24:11.949] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:24:11.950] <thread 4208> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:30:58.451] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 14:31:00.901] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:31:03.057] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:31:04.749] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:31:06.295] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:31:06.296] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:31:06.296] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:31:06.297] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:31:06.298] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:06.298] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:06.299] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:31:06.299] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:31:06.301] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:06.302] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:06.303] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:31:06.305] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:31:29.839] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:31:29.839] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:31:29.839] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:31:29.840] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:29.840] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:31:29.840] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:31:29.840] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:29.841] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:29.841] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:31:29.842] <thread 5748> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:31:29.842] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:31:34.113] <thread 11120> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:34:44.910] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 14:34:46.422] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:34:47.848] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 14:34:49.635] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 14:34:50.939] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:34:50.943] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:34:50.944] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:34:50.943] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:34:50.945] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:34:50.946] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:34:50.946] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:34:50.946] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:34:50.946] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:34:50.947] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:34:50.947] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:34:50.948] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:35:33.809] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:36:17.884] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:36:17.884] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:41,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 14:36:17.885] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 14:36:17.885] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:36:17.886] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:36:17.886] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:36:17.886] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:36:17.886] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:47,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 14:36:17.887] <thread 27336> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 14:36:43.975] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:53,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 14:36:49.419] <thread 26484> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:56,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 15:08:22.000] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet34_0407.onnx
|
||||
|
||||
[2024-04-24 15:08:23.646] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 15:08:32.437] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:30,MF_Resnet34Infer::initEngine]
|
||||
on the init engine, input onnx file : ./imageDatas/resnet50.onnx
|
||||
|
||||
[2024-04-24 15:08:34.620] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MF_Resnet34Infer.cpp:48,MF_Resnet34Infer::initEngine]
|
||||
trt model has existed.
|
||||
|
||||
|
||||
[2024-04-24 15:08:37.449] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 15:08:37.451] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 15:08:37.452] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 15:08:37.451] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 15:08:37.454] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:08:37.455] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:08:37.456] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 15:08:37.455] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 15:08:37.456] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:08:37.457] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:08:37.458] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 15:08:37.459] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 15:08:57.068] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 15:08:58.514] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 15:09:05.783] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:09:05.784] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 15:09:05.784] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:09:05.785] <thread 20500> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
||||
[2024-04-24 15:09:16.908] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:43,MA_TRTInferAlgoBase::check]
|
||||
The engine's info:
|
||||
|
||||
[2024-04-24 15:09:16.909] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 0, input
|
||||
|
||||
[2024-04-24 15:09:16.910] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:09:16.910] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:49,MA_TRTInferAlgoBase::check]
|
||||
idx = 1, output
|
||||
|
||||
[2024-04-24 15:09:16.911] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:55,MA_TRTInferAlgoBase::check]
|
||||
|
||||
|
||||
|
||||
[2024-04-24 15:09:16.912] <thread 21616> [info]
|
||||
[D:\00_SST-Work\SST-Code\MF_TRTInfer\MA_TRTInferAlgoBase.cpp:58,MA_TRTInferAlgoBase::check]
|
||||
The context's info:
|
||||
|
@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ImportGroup Label="PropertySheets" />
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>..\MF_TRTInfer\lib\tensorrt_lib\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>..\MF_TRTInfer\lib\tensorrt_lib\lib\*.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>..\MF_TRTInfer\lib\tensorrt_lib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|
@ -0,0 +1,381 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef BATCH_STREAM_H
|
||||
#define BATCH_STREAM_H
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "common.h"
|
||||
#include <algorithm>
|
||||
#include <stdio.h>
|
||||
#include <vector>
|
||||
|
||||
class IBatchStream
|
||||
{
|
||||
public:
|
||||
virtual void reset(int firstBatch) = 0;
|
||||
virtual bool next() = 0;
|
||||
virtual void skip(int skipCount) = 0;
|
||||
virtual float* getBatch() = 0;
|
||||
virtual float* getLabels() = 0;
|
||||
virtual int getBatchesRead() const = 0;
|
||||
virtual int getBatchSize() const = 0;
|
||||
virtual nvinfer1::Dims getDims() const = 0;
|
||||
};
|
||||
|
||||
class MNISTBatchStream : public IBatchStream
|
||||
{
|
||||
public:
|
||||
MNISTBatchStream(int batchSize, int maxBatches, const std::string& dataFile, const std::string& labelsFile,
|
||||
const std::vector<std::string>& directories)
|
||||
: mBatchSize{batchSize}
|
||||
, mMaxBatches{maxBatches}
|
||||
, mDims{3, {1, 28, 28}} //!< We already know the dimensions of MNIST images.
|
||||
{
|
||||
readDataFile(locateFile(dataFile, directories));
|
||||
readLabelsFile(locateFile(labelsFile, directories));
|
||||
}
|
||||
|
||||
void reset(int firstBatch) override
|
||||
{
|
||||
mBatchCount = firstBatch;
|
||||
}
|
||||
|
||||
bool next() override
|
||||
{
|
||||
if (mBatchCount >= mMaxBatches)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
++mBatchCount;
|
||||
return true;
|
||||
}
|
||||
|
||||
void skip(int skipCount) override
|
||||
{
|
||||
mBatchCount += skipCount;
|
||||
}
|
||||
|
||||
float* getBatch() override
|
||||
{
|
||||
return mData.data() + (mBatchCount * mBatchSize * samplesCommon::volume(mDims));
|
||||
}
|
||||
|
||||
float* getLabels() override
|
||||
{
|
||||
return mLabels.data() + (mBatchCount * mBatchSize);
|
||||
}
|
||||
|
||||
int getBatchesRead() const override
|
||||
{
|
||||
return mBatchCount;
|
||||
}
|
||||
|
||||
int getBatchSize() const override
|
||||
{
|
||||
return mBatchSize;
|
||||
}
|
||||
|
||||
nvinfer1::Dims getDims() const override
|
||||
{
|
||||
return nvinfer1::Dims{4, {mBatchSize, mDims.d[0], mDims.d[1], mDims.d[2]}};
|
||||
}
|
||||
|
||||
private:
|
||||
void readDataFile(const std::string& dataFilePath)
|
||||
{
|
||||
std::ifstream file{dataFilePath.c_str(), std::ios::binary};
|
||||
|
||||
int magicNumber, numImages, imageH, imageW;
|
||||
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
|
||||
// All values in the MNIST files are big endian.
|
||||
magicNumber = samplesCommon::swapEndianness(magicNumber);
|
||||
ASSERT(magicNumber == 2051 && "Magic Number does not match the expected value for an MNIST image set");
|
||||
|
||||
// Read number of images and dimensions
|
||||
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
|
||||
file.read(reinterpret_cast<char*>(&imageH), sizeof(imageH));
|
||||
file.read(reinterpret_cast<char*>(&imageW), sizeof(imageW));
|
||||
|
||||
numImages = samplesCommon::swapEndianness(numImages);
|
||||
imageH = samplesCommon::swapEndianness(imageH);
|
||||
imageW = samplesCommon::swapEndianness(imageW);
|
||||
|
||||
// The MNIST data is made up of unsigned bytes, so we need to cast to float and normalize.
|
||||
int numElements = numImages * imageH * imageW;
|
||||
std::vector<uint8_t> rawData(numElements);
|
||||
file.read(reinterpret_cast<char*>(rawData.data()), numElements * sizeof(uint8_t));
|
||||
mData.resize(numElements);
|
||||
std::transform(
|
||||
rawData.begin(), rawData.end(), mData.begin(), [](uint8_t val) { return static_cast<float>(val) / 255.f; });
|
||||
}
|
||||
|
||||
void readLabelsFile(const std::string& labelsFilePath)
|
||||
{
|
||||
std::ifstream file{labelsFilePath.c_str(), std::ios::binary};
|
||||
int magicNumber, numImages;
|
||||
file.read(reinterpret_cast<char*>(&magicNumber), sizeof(magicNumber));
|
||||
// All values in the MNIST files are big endian.
|
||||
magicNumber = samplesCommon::swapEndianness(magicNumber);
|
||||
ASSERT(magicNumber == 2049 && "Magic Number does not match the expected value for an MNIST labels file");
|
||||
|
||||
file.read(reinterpret_cast<char*>(&numImages), sizeof(numImages));
|
||||
numImages = samplesCommon::swapEndianness(numImages);
|
||||
|
||||
std::vector<uint8_t> rawLabels(numImages);
|
||||
file.read(reinterpret_cast<char*>(rawLabels.data()), numImages * sizeof(uint8_t));
|
||||
mLabels.resize(numImages);
|
||||
std::transform(
|
||||
rawLabels.begin(), rawLabels.end(), mLabels.begin(), [](uint8_t val) { return static_cast<float>(val); });
|
||||
}
|
||||
|
||||
int mBatchSize{0};
|
||||
int mBatchCount{0}; //!< The batch that will be read on the next invocation of next()
|
||||
int mMaxBatches{0};
|
||||
nvinfer1::Dims mDims{};
|
||||
std::vector<float> mData{};
|
||||
std::vector<float> mLabels{};
|
||||
};
|
||||
|
||||
class BatchStream : public IBatchStream
|
||||
{
|
||||
public:
|
||||
BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::string const& suffix,
|
||||
std::vector<std::string> const& directories)
|
||||
: mBatchSize(batchSize)
|
||||
, mMaxBatches(maxBatches)
|
||||
, mPrefix(prefix)
|
||||
, mSuffix(suffix)
|
||||
, mDataDir(directories)
|
||||
{
|
||||
std::ifstream file(locateFile(mPrefix + std::string("0") + mSuffix, mDataDir).c_str(), std::ios::binary);
|
||||
ASSERT(file.good());
|
||||
int d[4];
|
||||
file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
|
||||
mDims.nbDims = 4; // The number of dimensions.
|
||||
mDims.d[0] = d[0]; // Batch Size
|
||||
mDims.d[1] = d[1]; // Channels
|
||||
mDims.d[2] = d[2]; // Height
|
||||
mDims.d[3] = d[3]; // Width
|
||||
ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0);
|
||||
|
||||
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
|
||||
mBatch.resize(mBatchSize * mImageSize, 0);
|
||||
mLabels.resize(mBatchSize, 0);
|
||||
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
|
||||
mFileLabels.resize(mDims.d[0], 0);
|
||||
reset(0);
|
||||
}
|
||||
|
||||
BatchStream(int batchSize, int maxBatches, std::string const& prefix, std::vector<std::string> const& directories)
|
||||
: BatchStream(batchSize, maxBatches, prefix, ".batch", directories)
|
||||
{
|
||||
}
|
||||
|
||||
BatchStream(int batchSize, int maxBatches, nvinfer1::Dims const& dims, std::string const& listFile,
|
||||
std::vector<std::string> const& directories)
|
||||
: mBatchSize(batchSize)
|
||||
, mMaxBatches(maxBatches)
|
||||
, mDims(dims)
|
||||
, mListFile(listFile)
|
||||
, mDataDir(directories)
|
||||
{
|
||||
mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3];
|
||||
mBatch.resize(mBatchSize * mImageSize, 0);
|
||||
mLabels.resize(mBatchSize, 0);
|
||||
mFileBatch.resize(mDims.d[0] * mImageSize, 0);
|
||||
mFileLabels.resize(mDims.d[0], 0);
|
||||
reset(0);
|
||||
}
|
||||
|
||||
// Resets data members
|
||||
void reset(int firstBatch) override
|
||||
{
|
||||
mBatchCount = 0;
|
||||
mFileCount = 0;
|
||||
mFileBatchPos = mDims.d[0];
|
||||
skip(firstBatch);
|
||||
}
|
||||
|
||||
// Advance to next batch and return true, or return false if there is no batch left.
|
||||
bool next() override
|
||||
{
|
||||
if (mBatchCount == mMaxBatches)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize)
|
||||
{
|
||||
ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]);
|
||||
if (mFileBatchPos == mDims.d[0] && !update())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// copy the smaller of: elements left to fulfill the request, or elements left in the file buffer.
|
||||
csize = std::min(mBatchSize - batchPos, mDims.d[0] - mFileBatchPos);
|
||||
std::copy_n(
|
||||
getFileBatch() + mFileBatchPos * mImageSize, csize * mImageSize, getBatch() + batchPos * mImageSize);
|
||||
std::copy_n(getFileLabels() + mFileBatchPos, csize, getLabels() + batchPos);
|
||||
}
|
||||
mBatchCount++;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Skips the batches
|
||||
void skip(int skipCount) override
|
||||
{
|
||||
if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0])
|
||||
{
|
||||
mFileCount += skipCount * mBatchSize / mDims.d[0];
|
||||
return;
|
||||
}
|
||||
|
||||
int x = mBatchCount;
|
||||
for (int i = 0; i < skipCount; i++)
|
||||
{
|
||||
next();
|
||||
}
|
||||
mBatchCount = x;
|
||||
}
|
||||
|
||||
float* getBatch() override
|
||||
{
|
||||
return mBatch.data();
|
||||
}
|
||||
|
||||
float* getLabels() override
|
||||
{
|
||||
return mLabels.data();
|
||||
}
|
||||
|
||||
int getBatchesRead() const override
|
||||
{
|
||||
return mBatchCount;
|
||||
}
|
||||
|
||||
int getBatchSize() const override
|
||||
{
|
||||
return mBatchSize;
|
||||
}
|
||||
|
||||
nvinfer1::Dims getDims() const override
|
||||
{
|
||||
return mDims;
|
||||
}
|
||||
|
||||
private:
|
||||
float* getFileBatch()
|
||||
{
|
||||
return mFileBatch.data();
|
||||
}
|
||||
|
||||
float* getFileLabels()
|
||||
{
|
||||
return mFileLabels.data();
|
||||
}
|
||||
|
||||
bool update()
|
||||
{
|
||||
if (mListFile.empty())
|
||||
{
|
||||
std::string inputFileName = locateFile(mPrefix + std::to_string(mFileCount++) + mSuffix, mDataDir);
|
||||
std::ifstream file(inputFileName.c_str(), std::ios::binary);
|
||||
if (!file)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
int d[4];
|
||||
file.read(reinterpret_cast<char*>(d), 4 * sizeof(int32_t));
|
||||
ASSERT(mDims.d[0] == d[0] && mDims.d[1] == d[1] && mDims.d[2] == d[2] && mDims.d[3] == d[3]);
|
||||
file.read(reinterpret_cast<char*>(getFileBatch()), sizeof(float) * mDims.d[0] * mImageSize);
|
||||
file.read(reinterpret_cast<char*>(getFileLabels()), sizeof(float) * mDims.d[0]);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<std::string> fNames;
|
||||
std::ifstream file(locateFile(mListFile, mDataDir), std::ios::binary);
|
||||
if (!file)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
sample::gLogInfo << "Batch #" << mFileCount << std::endl;
|
||||
file.seekg(((mBatchCount * mBatchSize)) * 7);
|
||||
|
||||
for (int i = 1; i <= mBatchSize; i++)
|
||||
{
|
||||
std::string sName;
|
||||
std::getline(file, sName);
|
||||
sName = sName + ".ppm";
|
||||
sample::gLogInfo << "Calibrating with file " << sName << std::endl;
|
||||
fNames.emplace_back(sName);
|
||||
}
|
||||
|
||||
mFileCount++;
|
||||
|
||||
const int imageC = 3;
|
||||
const int imageH = 300;
|
||||
const int imageW = 300;
|
||||
std::vector<samplesCommon::PPM<imageC, imageH, imageW>> ppms(fNames.size());
|
||||
for (uint32_t i = 0; i < fNames.size(); ++i)
|
||||
{
|
||||
readPPMFile(locateFile(fNames[i], mDataDir), ppms[i]);
|
||||
}
|
||||
|
||||
std::vector<float> data(samplesCommon::volume(mDims));
|
||||
const float scale = 2.0 / 255.0;
|
||||
const float bias = 1.0;
|
||||
long int volChl = mDims.d[2] * mDims.d[3];
|
||||
|
||||
// Normalize input data
|
||||
for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i)
|
||||
{
|
||||
for (int c = 0; c < mDims.d[1]; ++c)
|
||||
{
|
||||
for (int j = 0; j < volChl; ++j)
|
||||
{
|
||||
data[i * volImg + c * volChl + j] = scale * float(ppms[i].buffer[j * mDims.d[1] + c]) - bias;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::copy_n(data.data(), mDims.d[0] * mImageSize, getFileBatch());
|
||||
}
|
||||
|
||||
mFileBatchPos = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
int mBatchSize{0};
|
||||
int mMaxBatches{0};
|
||||
int mBatchCount{0};
|
||||
int mFileCount{0};
|
||||
int mFileBatchPos{0};
|
||||
int mImageSize{0};
|
||||
std::vector<float> mBatch; //!< Data for the batch
|
||||
std::vector<float> mLabels; //!< Labels for the batch
|
||||
std::vector<float> mFileBatch; //!< List of image files
|
||||
std::vector<float> mFileLabels; //!< List of label files
|
||||
std::string mPrefix; //!< Batch file name prefix
|
||||
std::string mSuffix; //!< Batch file name suffix
|
||||
nvinfer1::Dims mDims; //!< Input dimensions
|
||||
std::string mListFile; //!< File name of the list of image names
|
||||
std::vector<std::string> mDataDir; //!< Directories where the files can be found
|
||||
};
|
||||
|
||||
#endif
|
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ENTROPY_CALIBRATOR_H
|
||||
#define ENTROPY_CALIBRATOR_H
|
||||
|
||||
#include "BatchStream.h"
|
||||
#include "NvInfer.h"
|
||||
|
||||
//! \class EntropyCalibratorImpl
|
||||
//!
|
||||
//! \brief Implements common functionality for Entropy calibrators.
|
||||
//!
|
||||
template <typename TBatchStream>
|
||||
class EntropyCalibratorImpl
|
||||
{
|
||||
public:
|
||||
EntropyCalibratorImpl(TBatchStream const& stream, int firstBatch, std::string const& networkName,
|
||||
const char* inputBlobName, bool readCache = true)
|
||||
: mStream{stream}
|
||||
, mCalibrationTableName("CalibrationTable" + networkName)
|
||||
, mInputBlobName(inputBlobName)
|
||||
, mReadCache(readCache)
|
||||
{
|
||||
nvinfer1::Dims dims = mStream.getDims();
|
||||
mInputCount = samplesCommon::volume(dims);
|
||||
CHECK(cudaMalloc(&mDeviceInput, mInputCount * sizeof(float)));
|
||||
mStream.reset(firstBatch);
|
||||
}
|
||||
|
||||
virtual ~EntropyCalibratorImpl()
|
||||
{
|
||||
CHECK(cudaFree(mDeviceInput));
|
||||
}
|
||||
|
||||
int getBatchSize() const noexcept
|
||||
{
|
||||
return mStream.getBatchSize();
|
||||
}
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept
|
||||
{
|
||||
if (!mStream.next())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
CHECK(cudaMemcpy(mDeviceInput, mStream.getBatch(), mInputCount * sizeof(float), cudaMemcpyHostToDevice));
|
||||
ASSERT(!strcmp(names[0], mInputBlobName));
|
||||
bindings[0] = mDeviceInput;
|
||||
return true;
|
||||
}
|
||||
|
||||
const void* readCalibrationCache(size_t& length) noexcept
|
||||
{
|
||||
mCalibrationCache.clear();
|
||||
std::ifstream input(mCalibrationTableName, std::ios::binary);
|
||||
input >> std::noskipws;
|
||||
if (mReadCache && input.good())
|
||||
{
|
||||
std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
|
||||
std::back_inserter(mCalibrationCache));
|
||||
}
|
||||
length = mCalibrationCache.size();
|
||||
return length ? mCalibrationCache.data() : nullptr;
|
||||
}
|
||||
|
||||
void writeCalibrationCache(const void* cache, size_t length) noexcept
|
||||
{
|
||||
std::ofstream output(mCalibrationTableName, std::ios::binary);
|
||||
output.write(reinterpret_cast<const char*>(cache), length);
|
||||
}
|
||||
|
||||
private:
|
||||
TBatchStream mStream;
|
||||
size_t mInputCount;
|
||||
std::string mCalibrationTableName;
|
||||
const char* mInputBlobName;
|
||||
bool mReadCache{true};
|
||||
void* mDeviceInput{nullptr};
|
||||
std::vector<char> mCalibrationCache;
|
||||
};
|
||||
|
||||
//! \class Int8EntropyCalibrator2
|
||||
//!
|
||||
//! \brief Implements Entropy calibrator 2.
|
||||
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
|
||||
//!
|
||||
template <typename TBatchStream>
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2
|
||||
{
|
||||
public:
|
||||
Int8EntropyCalibrator2(TBatchStream const& stream, int32_t firstBatch, const char* networkName,
|
||||
const char* inputBlobName, bool readCache = true)
|
||||
: mImpl(stream, firstBatch, networkName, inputBlobName, readCache)
|
||||
{
|
||||
}
|
||||
|
||||
int getBatchSize() const noexcept override
|
||||
{
|
||||
return mImpl.getBatchSize();
|
||||
}
|
||||
|
||||
bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override
|
||||
{
|
||||
return mImpl.getBatch(bindings, names, nbBindings);
|
||||
}
|
||||
|
||||
const void* readCalibrationCache(size_t& length) noexcept override
|
||||
{
|
||||
return mImpl.readCalibrationCache(length);
|
||||
}
|
||||
|
||||
void writeCalibrationCache(const void* cache, size_t length) noexcept override
|
||||
{
|
||||
mImpl.writeCalibrationCache(cache, length);
|
||||
}
|
||||
|
||||
private:
|
||||
EntropyCalibratorImpl<TBatchStream> mImpl;
|
||||
};
|
||||
|
||||
#endif // ENTROPY_CALIBRATOR_H
|
@ -0,0 +1,138 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ERROR_RECORDER_H
|
||||
#define ERROR_RECORDER_H
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "logger.h"
|
||||
#include <atomic>
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
using nvinfer1::IErrorRecorder;
|
||||
using nvinfer1::ErrorCode;
|
||||
|
||||
//!
|
||||
//! A simple implementation of the IErrorRecorder interface for
|
||||
//! use by samples. This interface also can be used as a reference
|
||||
//! implementation.
|
||||
//! The sample Error recorder is based on a vector that pairs the error
|
||||
//! code and the error string into a single element. It also uses
|
||||
//! standard mutex's and atomics in order to make sure that the code
|
||||
//! works in a multi-threaded environment.
|
||||
//!
|
||||
class SampleErrorRecorder : public IErrorRecorder
|
||||
{
|
||||
using errorPair = std::pair<ErrorCode, std::string>;
|
||||
using errorStack = std::vector<errorPair>;
|
||||
|
||||
public:
|
||||
SampleErrorRecorder() = default;
|
||||
|
||||
~SampleErrorRecorder() noexcept override {}
|
||||
int32_t getNbErrors() const noexcept final
|
||||
{
|
||||
return mErrorStack.size();
|
||||
}
|
||||
ErrorCode getErrorCode(int32_t errorIdx) const noexcept final
|
||||
{
|
||||
return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first;
|
||||
};
|
||||
IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final
|
||||
{
|
||||
return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str();
|
||||
}
|
||||
// This class can never overflow since we have dynamic resize via std::vector usage.
|
||||
bool hasOverflowed() const noexcept final
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Empty the errorStack.
|
||||
void clear() noexcept final
|
||||
{
|
||||
try
|
||||
{
|
||||
// grab a lock so that there is no addition while clearing.
|
||||
std::lock_guard<std::mutex> guard(mStackLock);
|
||||
mErrorStack.clear();
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
//! Simple helper function that
|
||||
bool empty() const noexcept
|
||||
{
|
||||
return mErrorStack.empty();
|
||||
}
|
||||
|
||||
bool reportError(ErrorCode val, IErrorRecorder::ErrorDesc desc) noexcept final
|
||||
{
|
||||
try
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(mStackLock);
|
||||
sample::gLogError << "Error[" << static_cast<int32_t>(val) << "]: " << desc << std::endl;
|
||||
mErrorStack.push_back(errorPair(val, desc));
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
sample::gLogFatal << "Internal Error: " << e.what() << std::endl;
|
||||
}
|
||||
// All errors are considered fatal.
|
||||
return true;
|
||||
}
|
||||
|
||||
// Atomically increment or decrement the ref counter.
|
||||
IErrorRecorder::RefCount incRefCount() noexcept final
|
||||
{
|
||||
return ++mRefCount;
|
||||
}
|
||||
IErrorRecorder::RefCount decRefCount() noexcept final
|
||||
{
|
||||
return --mRefCount;
|
||||
}
|
||||
|
||||
private:
|
||||
// Simple helper functions.
|
||||
const errorPair& operator[](size_t index) const noexcept
|
||||
{
|
||||
return mErrorStack[index];
|
||||
}
|
||||
|
||||
bool invalidIndexCheck(int32_t index) const noexcept
|
||||
{
|
||||
// By converting signed to unsigned, we only need a single check since
|
||||
// negative numbers turn into large positive greater than the size.
|
||||
size_t sIndex = index;
|
||||
return sIndex >= mErrorStack.size();
|
||||
}
|
||||
// Mutex to hold when locking mErrorStack.
|
||||
std::mutex mStackLock;
|
||||
|
||||
// Reference count of the class. Destruction of the class when mRefCount
|
||||
// is not zero causes undefined behavior.
|
||||
std::atomic<int32_t> mRefCount{0};
|
||||
|
||||
// The error stack that holds the errors recorded by TensorRT.
|
||||
errorStack mErrorStack;
|
||||
}; // class SampleErrorRecorder
|
||||
#endif // ERROR_RECORDER_H
|
@ -0,0 +1,164 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef TENSORRT_ARGS_PARSER_H
|
||||
#define TENSORRT_ARGS_PARSER_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include "getOptWin.h"
|
||||
#else
|
||||
#include <getopt.h>
|
||||
#endif
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace samplesCommon
|
||||
{
|
||||
|
||||
//!
|
||||
//! \brief The SampleParams structure groups the basic parameters required by
|
||||
//! all sample networks.
|
||||
//!
|
||||
struct SampleParams
|
||||
{
|
||||
int32_t batchSize{1}; //!< Number of inputs in a batch
|
||||
int32_t dlaCore{-1}; //!< Specify the DLA core to run network on.
|
||||
bool int8{false}; //!< Allow runnning the network in Int8 mode.
|
||||
bool fp16{false}; //!< Allow running the network in FP16 mode.
|
||||
std::vector<std::string> dataDirs; //!< Directory paths where sample data files are stored
|
||||
std::vector<std::string> inputTensorNames;
|
||||
std::vector<std::string> outputTensorNames;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The CaffeSampleParams structure groups the additional parameters required by
|
||||
//! networks that use caffe
|
||||
//!
|
||||
struct CaffeSampleParams : public SampleParams
|
||||
{
|
||||
std::string prototxtFileName; //!< Filename of prototxt design file of a network
|
||||
std::string weightsFileName; //!< Filename of trained weights file of a network
|
||||
std::string meanFileName; //!< Filename of mean file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The OnnxSampleParams structure groups the additional parameters required by
|
||||
//! networks that use ONNX
|
||||
//!
|
||||
struct OnnxSampleParams : public SampleParams
|
||||
{
|
||||
std::string onnxFileName; //!< Filename of ONNX file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The UffSampleParams structure groups the additional parameters required by
|
||||
//! networks that use Uff
|
||||
//!
|
||||
struct UffSampleParams : public SampleParams
|
||||
{
|
||||
std::string uffFileName; //!< Filename of uff file of a network
|
||||
};
|
||||
|
||||
//!
|
||||
//! /brief Struct to maintain command-line arguments.
|
||||
//!
|
||||
struct Args
|
||||
{
|
||||
bool runInInt8{false};
|
||||
bool runInFp16{false};
|
||||
bool help{false};
|
||||
int32_t useDLACore{-1};
|
||||
int32_t batch{1};
|
||||
std::vector<std::string> dataDirs;
|
||||
std::string saveEngine;
|
||||
std::string loadEngine;
|
||||
bool useILoop{false};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Populates the Args struct with the provided command-line parameters.
|
||||
//!
|
||||
//! \throw invalid_argument if any of the arguments are not valid
|
||||
//!
|
||||
//! \return boolean If return value is true, execution can continue, otherwise program should exit
|
||||
//!
|
||||
inline bool parseArgs(Args& args, int32_t argc, char* argv[])
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
int32_t arg;
|
||||
static struct option long_options[] = {{"help", no_argument, 0, 'h'}, {"datadir", required_argument, 0, 'd'},
|
||||
{"int8", no_argument, 0, 'i'}, {"fp16", no_argument, 0, 'f'}, {"useILoop", no_argument, 0, 'l'},
|
||||
{"saveEngine", required_argument, 0, 's'}, {"loadEngine", required_argument, 0, 'o'},
|
||||
{"useDLACore", required_argument, 0, 'u'}, {"batch", required_argument, 0, 'b'}, {nullptr, 0, nullptr, 0}};
|
||||
int32_t option_index = 0;
|
||||
arg = getopt_long(argc, argv, "hd:iu", long_options, &option_index);
|
||||
if (arg == -1)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
switch (arg)
|
||||
{
|
||||
case 'h': args.help = true; return true;
|
||||
case 'd':
|
||||
if (optarg)
|
||||
{
|
||||
args.dataDirs.push_back(optarg);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cerr << "ERROR: --datadir requires option argument" << std::endl;
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
if (optarg)
|
||||
{
|
||||
args.saveEngine = optarg;
|
||||
}
|
||||
break;
|
||||
case 'o':
|
||||
if (optarg)
|
||||
{
|
||||
args.loadEngine = optarg;
|
||||
}
|
||||
break;
|
||||
case 'i': args.runInInt8 = true; break;
|
||||
case 'f': args.runInFp16 = true; break;
|
||||
case 'l': args.useILoop = true; break;
|
||||
case 'u':
|
||||
if (optarg)
|
||||
{
|
||||
args.useDLACore = std::stoi(optarg);
|
||||
}
|
||||
break;
|
||||
case 'b':
|
||||
if (optarg)
|
||||
{
|
||||
args.batch = std::stoi(optarg);
|
||||
}
|
||||
break;
|
||||
default: return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_ARGS_PARSER_H
|
@ -0,0 +1,421 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef TENSORRT_BUFFERS_H
|
||||
#define TENSORRT_BUFFERS_H
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "common.h"
|
||||
#include "half.h"
|
||||
#include <cassert>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <iostream>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <new>
|
||||
#include <numeric>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace samplesCommon
|
||||
{
|
||||
|
||||
//!
|
||||
//! \brief The GenericBuffer class is a templated class for buffers.
|
||||
//!
|
||||
//! \details This templated RAII (Resource Acquisition Is Initialization) class handles the allocation,
|
||||
//! deallocation, querying of buffers on both the device and the host.
|
||||
//! It can handle data of arbitrary types because it stores byte buffers.
|
||||
//! The template parameters AllocFunc and FreeFunc are used for the
|
||||
//! allocation and deallocation of the buffer.
|
||||
//! AllocFunc must be a functor that takes in (void** ptr, size_t size)
|
||||
//! and returns bool. ptr is a pointer to where the allocated buffer address should be stored.
|
||||
//! size is the amount of memory in bytes to allocate.
|
||||
//! The boolean indicates whether or not the memory allocation was successful.
|
||||
//! FreeFunc must be a functor that takes in (void* ptr) and returns void.
|
||||
//! ptr is the allocated buffer address. It must work with nullptr input.
|
||||
//!
|
||||
template <typename AllocFunc, typename FreeFunc>
|
||||
class GenericBuffer
|
||||
{
|
||||
public:
|
||||
//!
|
||||
//! \brief Construct an empty buffer.
|
||||
//!
|
||||
GenericBuffer(nvinfer1::DataType type = nvinfer1::DataType::kFLOAT)
|
||||
: mSize(0)
|
||||
, mCapacity(0)
|
||||
, mType(type)
|
||||
, mBuffer(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Construct a buffer with the specified allocation size in bytes.
|
||||
//!
|
||||
GenericBuffer(size_t size, nvinfer1::DataType type)
|
||||
: mSize(size)
|
||||
, mCapacity(size)
|
||||
, mType(type)
|
||||
{
|
||||
if (!allocFn(&mBuffer, this->nbBytes()))
|
||||
{
|
||||
throw std::bad_alloc();
|
||||
}
|
||||
}
|
||||
|
||||
GenericBuffer(GenericBuffer&& buf)
|
||||
: mSize(buf.mSize)
|
||||
, mCapacity(buf.mCapacity)
|
||||
, mType(buf.mType)
|
||||
, mBuffer(buf.mBuffer)
|
||||
{
|
||||
buf.mSize = 0;
|
||||
buf.mCapacity = 0;
|
||||
buf.mType = nvinfer1::DataType::kFLOAT;
|
||||
buf.mBuffer = nullptr;
|
||||
}
|
||||
|
||||
GenericBuffer& operator=(GenericBuffer&& buf)
|
||||
{
|
||||
if (this != &buf)
|
||||
{
|
||||
freeFn(mBuffer);
|
||||
mSize = buf.mSize;
|
||||
mCapacity = buf.mCapacity;
|
||||
mType = buf.mType;
|
||||
mBuffer = buf.mBuffer;
|
||||
// Reset buf.
|
||||
buf.mSize = 0;
|
||||
buf.mCapacity = 0;
|
||||
buf.mBuffer = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
void* data()
|
||||
{
|
||||
return mBuffer;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns pointer to underlying array.
|
||||
//!
|
||||
const void* data() const
|
||||
{
|
||||
return mBuffer;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the size (in number of elements) of the buffer.
|
||||
//!
|
||||
size_t size() const
|
||||
{
|
||||
return mSize;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the size (in bytes) of the buffer.
|
||||
//!
|
||||
size_t nbBytes() const
|
||||
{
|
||||
return this->size() * samplesCommon::getElementSize(mType);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Resizes the buffer. This is a no-op if the new size is smaller than or equal to the current capacity.
|
||||
//!
|
||||
void resize(size_t newSize)
|
||||
{
|
||||
mSize = newSize;
|
||||
if (mCapacity < newSize)
|
||||
{
|
||||
freeFn(mBuffer);
|
||||
if (!allocFn(&mBuffer, this->nbBytes()))
|
||||
{
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
mCapacity = newSize;
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Overload of resize that accepts Dims
|
||||
//!
|
||||
void resize(const nvinfer1::Dims& dims)
|
||||
{
|
||||
return this->resize(samplesCommon::volume(dims));
|
||||
}
|
||||
|
||||
~GenericBuffer()
|
||||
{
|
||||
freeFn(mBuffer);
|
||||
}
|
||||
|
||||
private:
|
||||
size_t mSize{0}, mCapacity{0};
|
||||
nvinfer1::DataType mType;
|
||||
void* mBuffer;
|
||||
AllocFunc allocFn;
|
||||
FreeFunc freeFn;
|
||||
};
|
||||
|
||||
class DeviceAllocator
|
||||
{
|
||||
public:
|
||||
bool operator()(void** ptr, size_t size) const
|
||||
{
|
||||
return cudaMalloc(ptr, size) == cudaSuccess;
|
||||
}
|
||||
};
|
||||
|
||||
class DeviceFree
|
||||
{
|
||||
public:
|
||||
void operator()(void* ptr) const
|
||||
{
|
||||
cudaFree(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
class HostAllocator
|
||||
{
|
||||
public:
|
||||
bool operator()(void** ptr, size_t size) const
|
||||
{
|
||||
*ptr = malloc(size);
|
||||
return *ptr != nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
class HostFree
|
||||
{
|
||||
public:
|
||||
void operator()(void* ptr) const
|
||||
{
|
||||
free(ptr);
|
||||
}
|
||||
};
|
||||
|
||||
using DeviceBuffer = GenericBuffer<DeviceAllocator, DeviceFree>;
|
||||
using HostBuffer = GenericBuffer<HostAllocator, HostFree>;
|
||||
|
||||
//!
|
||||
//! \brief The ManagedBuffer class groups together a pair of corresponding device and host buffers.
|
||||
//!
|
||||
class ManagedBuffer
|
||||
{
|
||||
public:
|
||||
DeviceBuffer deviceBuffer;
|
||||
HostBuffer hostBuffer;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief The BufferManager class handles host and device buffer allocation and deallocation.
|
||||
//!
|
||||
//! \details This RAII class handles host and device buffer allocation and deallocation,
|
||||
//! memcpy between host and device buffers to aid with inference,
|
||||
//! and debugging dumps to validate inference. The BufferManager class is meant to be
|
||||
//! used to simplify buffer management and any interactions between buffers and the engine.
|
||||
//!
|
||||
class BufferManager
|
||||
{
|
||||
public:
|
||||
static const size_t kINVALID_SIZE_VALUE = ~size_t(0);
|
||||
|
||||
//!
|
||||
//! \brief Create a BufferManager for handling buffer interactions with engine.
|
||||
//!
|
||||
BufferManager(std::shared_ptr<nvinfer1::ICudaEngine> engine, const int batchSize = 0,
|
||||
const nvinfer1::IExecutionContext* context = nullptr)
|
||||
: mEngine(engine)
|
||||
, mBatchSize(batchSize)
|
||||
{
|
||||
// Full Dims implies no batch size.
|
||||
assert(engine->hasImplicitBatchDimension() || mBatchSize == 0);
|
||||
// Create host and device buffers
|
||||
for (int i = 0; i < mEngine->getNbBindings(); i++)
|
||||
{
|
||||
auto dims = context ? context->getBindingDimensions(i) : mEngine->getBindingDimensions(i);
|
||||
size_t vol = context || !mBatchSize ? 1 : static_cast<size_t>(mBatchSize);
|
||||
nvinfer1::DataType type = mEngine->getBindingDataType(i);
|
||||
int vecDim = mEngine->getBindingVectorizedDim(i);
|
||||
if (-1 != vecDim) // i.e., 0 != lgScalarsPerVector
|
||||
{
|
||||
int scalarsPerVec = mEngine->getBindingComponentsPerElement(i);
|
||||
dims.d[vecDim] = divUp(dims.d[vecDim], scalarsPerVec);
|
||||
vol *= scalarsPerVec;
|
||||
}
|
||||
vol *= samplesCommon::volume(dims);
|
||||
std::unique_ptr<ManagedBuffer> manBuf{new ManagedBuffer()};
|
||||
manBuf->deviceBuffer = DeviceBuffer(vol, type);
|
||||
manBuf->hostBuffer = HostBuffer(vol, type);
|
||||
mDeviceBindings.emplace_back(manBuf->deviceBuffer.data());
|
||||
mManagedBuffers.emplace_back(std::move(manBuf));
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns a vector of device buffers that you can use directly as
|
||||
//! bindings for the execute and enqueue methods of IExecutionContext.
|
||||
//!
|
||||
std::vector<void*>& getDeviceBindings()
|
||||
{
|
||||
return mDeviceBindings;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns a vector of device buffers.
|
||||
//!
|
||||
const std::vector<void*>& getDeviceBindings() const
|
||||
{
|
||||
return mDeviceBindings;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the device buffer corresponding to tensorName.
|
||||
//! Returns nullptr if no such tensor can be found.
|
||||
//!
|
||||
void* getDeviceBuffer(const std::string& tensorName) const
|
||||
{
|
||||
return getBuffer(false, tensorName);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the host buffer corresponding to tensorName.
|
||||
//! Returns nullptr if no such tensor can be found.
|
||||
//!
|
||||
void* getHostBuffer(const std::string& tensorName) const
|
||||
{
|
||||
return getBuffer(true, tensorName);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Returns the size of the host and device buffers that correspond to tensorName.
|
||||
//! Returns kINVALID_SIZE_VALUE if no such tensor can be found.
|
||||
//!
|
||||
size_t size(const std::string& tensorName) const
|
||||
{
|
||||
int index = mEngine->getBindingIndex(tensorName.c_str());
|
||||
if (index == -1)
|
||||
return kINVALID_SIZE_VALUE;
|
||||
return mManagedBuffers[index]->hostBuffer.nbBytes();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Templated print function that dumps buffers of arbitrary type to std::ostream.
|
||||
//! rowCount parameter controls how many elements are on each line.
|
||||
//! A rowCount of 1 means that there is only 1 element on each line.
|
||||
//!
|
||||
template <typename T>
|
||||
void print(std::ostream& os, void* buf, size_t bufSize, size_t rowCount)
|
||||
{
|
||||
assert(rowCount != 0);
|
||||
assert(bufSize % sizeof(T) == 0);
|
||||
T* typedBuf = static_cast<T*>(buf);
|
||||
size_t numItems = bufSize / sizeof(T);
|
||||
for (int i = 0; i < static_cast<int>(numItems); i++)
|
||||
{
|
||||
// Handle rowCount == 1 case
|
||||
if (rowCount == 1 && i != static_cast<int>(numItems) - 1)
|
||||
os << typedBuf[i] << std::endl;
|
||||
else if (rowCount == 1)
|
||||
os << typedBuf[i];
|
||||
// Handle rowCount > 1 case
|
||||
else if (i % rowCount == 0)
|
||||
os << typedBuf[i];
|
||||
else if (i % rowCount == rowCount - 1)
|
||||
os << " " << typedBuf[i] << std::endl;
|
||||
else
|
||||
os << " " << typedBuf[i];
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of input host buffers to input device buffers synchronously.
|
||||
//!
|
||||
void copyInputToDevice()
|
||||
{
|
||||
memcpyBuffers(true, false, false);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of output device buffers to output host buffers synchronously.
|
||||
//!
|
||||
void copyOutputToHost()
|
||||
{
|
||||
memcpyBuffers(false, true, false);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of input host buffers to input device buffers asynchronously.
|
||||
//!
|
||||
void copyInputToDeviceAsync(const cudaStream_t& stream = 0)
|
||||
{
|
||||
memcpyBuffers(true, false, true, stream);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Copy the contents of output device buffers to output host buffers asynchronously.
|
||||
//!
|
||||
void copyOutputToHostAsync(const cudaStream_t& stream = 0)
|
||||
{
|
||||
memcpyBuffers(false, true, true, stream);
|
||||
}
|
||||
|
||||
~BufferManager() = default;
|
||||
|
||||
private:
|
||||
void* getBuffer(const bool isHost, const std::string& tensorName) const
|
||||
{
|
||||
int index = mEngine->getBindingIndex(tensorName.c_str());
|
||||
if (index == -1)
|
||||
return nullptr;
|
||||
return (isHost ? mManagedBuffers[index]->hostBuffer.data() : mManagedBuffers[index]->deviceBuffer.data());
|
||||
}
|
||||
|
||||
void memcpyBuffers(const bool copyInput, const bool deviceToHost, const bool async, const cudaStream_t& stream = 0)
|
||||
{
|
||||
for (int i = 0; i < mEngine->getNbBindings(); i++)
|
||||
{
|
||||
void* dstPtr
|
||||
= deviceToHost ? mManagedBuffers[i]->hostBuffer.data() : mManagedBuffers[i]->deviceBuffer.data();
|
||||
const void* srcPtr
|
||||
= deviceToHost ? mManagedBuffers[i]->deviceBuffer.data() : mManagedBuffers[i]->hostBuffer.data();
|
||||
const size_t byteSize = mManagedBuffers[i]->hostBuffer.nbBytes();
|
||||
const cudaMemcpyKind memcpyType = deviceToHost ? cudaMemcpyDeviceToHost : cudaMemcpyHostToDevice;
|
||||
if ((copyInput && mEngine->bindingIsInput(i)) || (!copyInput && !mEngine->bindingIsInput(i)))
|
||||
{
|
||||
if (async)
|
||||
CHECK(cudaMemcpyAsync(dstPtr, srcPtr, byteSize, memcpyType, stream));
|
||||
else
|
||||
CHECK(cudaMemcpy(dstPtr, srcPtr, byteSize, memcpyType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<nvinfer1::ICudaEngine> mEngine; //!< The pointer to the engine
|
||||
int mBatchSize; //!< The batch size for legacy networks, 0 otherwise.
|
||||
std::vector<std::unique_ptr<ManagedBuffer>> mManagedBuffers; //!< The vector of pointers to managed buffers
|
||||
std::vector<void*> mDeviceBindings; //!< The vector of device buffers needed for engine execution
|
||||
};
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_BUFFERS_H
|
@ -0,0 +1,124 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Script to dump TensorFlow weights in TRT v1 and v2 dump format.
|
||||
# The V1 format is for TensorRT 4.0. The V2 format is for TensorRT 4.0 and later.
|
||||
|
||||
import sys
|
||||
import struct
|
||||
import argparse
|
||||
|
||||
try:
|
||||
import tensorflow as tf
|
||||
from tensorflow.python import pywrap_tensorflow
|
||||
except ImportError as err:
|
||||
sys.stderr.write("""Error: Failed to import module ({})""".format(err))
|
||||
sys.exit()
|
||||
|
||||
parser = argparse.ArgumentParser(description="TensorFlow Weight Dumper")
|
||||
|
||||
parser.add_argument(
|
||||
"-m",
|
||||
"--model",
|
||||
required=True,
|
||||
help="The checkpoint file basename, example basename(model.ckpt-766908.data-00000-of-00001) -> model.ckpt-766908",
|
||||
)
|
||||
parser.add_argument("-o", "--output", required=True, help="The weight file to dump all the weights to.")
|
||||
parser.add_argument("-1", "--wtsv1", required=False, default=False, type=bool, help="Dump the weights in the wts v1.")
|
||||
|
||||
opt = parser.parse_args()
|
||||
|
||||
if opt.wtsv1:
|
||||
print("Outputting the trained weights in TensorRT's wts v1 format. This format is documented as:")
|
||||
print("Line 0: <number of buffers in the file>")
|
||||
print("Line 1-Num: [buffer name] [buffer type] [buffer size] <hex values>")
|
||||
else:
|
||||
print("Outputting the trained weights in TensorRT's wts v2 format. This format is documented as:")
|
||||
print("Line 0: <number of buffers in the file>")
|
||||
print("Line 1-Num: [buffer name] [buffer type] [(buffer shape{e.g. (1, 2, 3)}] <buffer shaped size bytes of data>")
|
||||
|
||||
inputbase = opt.model
|
||||
outputbase = opt.output
|
||||
|
||||
|
||||
def float_to_hex(f):
|
||||
return hex(struct.unpack("<I", struct.pack("<f", f))[0])
|
||||
|
||||
|
||||
def getTRTType(tensor):
|
||||
if tf.as_dtype(tensor.dtype) == tf.float32:
|
||||
return 0
|
||||
if tf.as_dtype(tensor.dtype) == tf.float16:
|
||||
return 1
|
||||
if tf.as_dtype(tensor.dtype) == tf.int8:
|
||||
return 2
|
||||
if tf.as_dtype(tensor.dtype) == tf.int32:
|
||||
return 3
|
||||
print("Tensor data type of %s is not supported in TensorRT" % (tensor.dtype))
|
||||
sys.exit()
|
||||
|
||||
|
||||
try:
|
||||
# Open output file
|
||||
if opt.wtsv1:
|
||||
outputFileName = outputbase + ".wts"
|
||||
else:
|
||||
outputFileName = outputbase + ".wts2"
|
||||
outputFile = open(outputFileName, "w")
|
||||
|
||||
# read vars from checkpoint
|
||||
reader = pywrap_tensorflow.NewCheckpointReader(inputbase)
|
||||
var_to_shape_map = reader.get_variable_to_shape_map()
|
||||
|
||||
# Record count of weights
|
||||
count = 0
|
||||
for key in sorted(var_to_shape_map):
|
||||
count += 1
|
||||
outputFile.write("%s\n" % (count))
|
||||
|
||||
# Dump the weights in either v1 or v2 format
|
||||
for key in sorted(var_to_shape_map):
|
||||
tensor = reader.get_tensor(key)
|
||||
file_key = key.replace("/", "_")
|
||||
typeOfElem = getTRTType(tensor)
|
||||
val = tensor.shape
|
||||
if opt.wtsv1:
|
||||
val = tensor.size
|
||||
print("%s %s %s " % (file_key, typeOfElem, val))
|
||||
flat_tensor = tensor.flatten()
|
||||
outputFile.write("%s 0 %s " % (file_key, val))
|
||||
if opt.wtsv1:
|
||||
for weight in flat_tensor:
|
||||
hexval = float_to_hex(float(weight))
|
||||
outputFile.write("%s " % (hexval[2:]))
|
||||
else:
|
||||
outputFile.write(flat_tensor.tobytes())
|
||||
outputFile.write("\n")
|
||||
outputFile.close()
|
||||
|
||||
except Exception as e: # pylint: disable=broad-except
|
||||
print(str(e))
|
||||
if "corrupted compressed block contents" in str(e):
|
||||
print("It's likely that your checkpoint file has been compressed " "with SNAPPY.")
|
||||
if "Data loss" in str(e) and (any([e in inputbase for e in [".index", ".meta", ".data"]])):
|
||||
proposed_file = ".".join(inputbase.split(".")[0:-1])
|
||||
v2_file_error_template = """
|
||||
It's likely that this is a V2 checkpoint and you need to provide the filename
|
||||
*prefix*. Try removing the '.' and extension. Try:
|
||||
inspect checkpoint --file_name = {}"""
|
||||
print(v2_file_error_template.format(proposed_file))
|
@ -0,0 +1,248 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "getOptions.h"
|
||||
#include "logger.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cctype>
|
||||
#include <cstring>
|
||||
#include <set>
|
||||
|
||||
namespace nvinfer1
|
||||
{
|
||||
namespace utility
|
||||
{
|
||||
|
||||
//! Matching for TRTOptions is defined as follows:
|
||||
//!
|
||||
//! If A and B both have longName set, A matches B if and only if A.longName ==
|
||||
//! B.longName and (A.shortName == B.shortName if both have short name set).
|
||||
//!
|
||||
//! If A only has shortName set and B only has longName set, then A does not
|
||||
//! match B. It is assumed that when 2 TRTOptions are compared, one of them is
|
||||
//! the definition of a TRTOption in the input to getOptions. As such, if the
|
||||
//! definition only has shortName set, it will never be equal to a TRTOption
|
||||
//! that does not have shortName set (and same for longName).
|
||||
//!
|
||||
//! If A and B both have shortName set but B does not have longName set, A
|
||||
//! matches B if and only if A.shortName == B.shortName.
|
||||
//!
|
||||
//! If A has neither long or short name set, A matches B if and only if B has
|
||||
//! neither long or short name set.
|
||||
bool matches(const TRTOption& a, const TRTOption& b)
|
||||
{
|
||||
if (!a.longName.empty() && !b.longName.empty())
|
||||
{
|
||||
if (a.shortName && b.shortName)
|
||||
{
|
||||
return (a.longName == b.longName) && (a.shortName == b.shortName);
|
||||
}
|
||||
return a.longName == b.longName;
|
||||
}
|
||||
|
||||
// If only one of them is not set, this will return false anyway.
|
||||
return a.shortName == b.shortName;
|
||||
}
|
||||
|
||||
//! getTRTOptionIndex returns the index of a TRTOption in a vector of
|
||||
//! TRTOptions, -1 if not found.
|
||||
int getTRTOptionIndex(const std::vector<TRTOption>& options, const TRTOption& opt)
|
||||
{
|
||||
for (size_t i = 0; i < options.size(); ++i)
|
||||
{
|
||||
if (matches(opt, options[i]))
|
||||
{
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
//! validateTRTOption will return a string containing an error message if options
|
||||
//! contain non-numeric characters, or if there are duplicate option names found.
|
||||
//! Otherwise, returns the empty string.
|
||||
std::string validateTRTOption(
|
||||
const std::set<char>& seenShortNames, const std::set<std::string>& seenLongNames, const TRTOption& opt)
|
||||
{
|
||||
if (opt.shortName != 0)
|
||||
{
|
||||
if (!std::isalnum(opt.shortName))
|
||||
{
|
||||
return "Short name '" + std::to_string(opt.shortName) + "' is non-alphanumeric";
|
||||
}
|
||||
|
||||
if (seenShortNames.find(opt.shortName) != seenShortNames.end())
|
||||
{
|
||||
return "Short name '" + std::to_string(opt.shortName) + "' is a duplicate";
|
||||
}
|
||||
}
|
||||
|
||||
if (!opt.longName.empty())
|
||||
{
|
||||
for (const char& c : opt.longName)
|
||||
{
|
||||
if (!std::isalnum(c) && c != '-' && c != '_')
|
||||
{
|
||||
return "Long name '" + opt.longName + "' contains characters that are not '-', '_', or alphanumeric";
|
||||
}
|
||||
}
|
||||
|
||||
if (seenLongNames.find(opt.longName) != seenLongNames.end())
|
||||
{
|
||||
return "Long name '" + opt.longName + "' is a duplicate";
|
||||
}
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
//! validateTRTOptions will return a string containing an error message if any
|
||||
//! options contain non-numeric characters, or if there are duplicate option
|
||||
//! names found. Otherwise, returns the empty string.
|
||||
std::string validateTRTOptions(const std::vector<TRTOption>& options)
|
||||
{
|
||||
std::set<char> seenShortNames;
|
||||
std::set<std::string> seenLongNames;
|
||||
for (size_t i = 0; i < options.size(); ++i)
|
||||
{
|
||||
const std::string errMsg = validateTRTOption(seenShortNames, seenLongNames, options[i]);
|
||||
if (!errMsg.empty())
|
||||
{
|
||||
return "Error '" + errMsg + "' at TRTOption " + std::to_string(i);
|
||||
}
|
||||
|
||||
seenShortNames.insert(options[i].shortName);
|
||||
seenLongNames.insert(options[i].longName);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
//! parseArgs parses an argument list and returns a TRTParsedArgs with the
|
||||
//! fields set accordingly. Assumes that options is validated.
|
||||
//! ErrMsg will be set if:
|
||||
//! - an argument is null
|
||||
//! - an argument is empty
|
||||
//! - an argument does not have option (i.e. "-" and "--")
|
||||
//! - a short argument has more than 1 character
|
||||
//! - the last argument in the list requires a value
|
||||
TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector<TRTOption>& options)
|
||||
{
|
||||
TRTParsedArgs parsedArgs;
|
||||
parsedArgs.values.resize(options.size());
|
||||
|
||||
for (int i = 1; i < argc; ++i) // index of current command-line argument
|
||||
{
|
||||
if (argv[i] == nullptr)
|
||||
{
|
||||
return TRTParsedArgs{"Null argument at index " + std::to_string(i)};
|
||||
}
|
||||
|
||||
const std::string argStr(argv[i]);
|
||||
if (argStr.empty())
|
||||
{
|
||||
return TRTParsedArgs{"Empty argument at index " + std::to_string(i)};
|
||||
}
|
||||
|
||||
// No starting hyphen means it is a positional argument
|
||||
if (argStr[0] != '-')
|
||||
{
|
||||
parsedArgs.positionalArgs.push_back(argStr);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (argStr == "-" || argStr == "--")
|
||||
{
|
||||
return TRTParsedArgs{"Argument does not specify an option at index " + std::to_string(i)};
|
||||
}
|
||||
|
||||
// If only 1 hyphen, char after is the flag.
|
||||
TRTOption opt{' ', "", false, ""};
|
||||
std::string value;
|
||||
if (argStr[1] != '-')
|
||||
{
|
||||
// Must only have 1 char after the hyphen
|
||||
if (argStr.size() > 2)
|
||||
{
|
||||
return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)};
|
||||
}
|
||||
opt.shortName = argStr[1];
|
||||
}
|
||||
else
|
||||
{
|
||||
opt.longName = argStr.substr(2);
|
||||
|
||||
// We need to support --foo=bar syntax, so look for '='
|
||||
const size_t eqIndex = opt.longName.find('=');
|
||||
if (eqIndex < opt.longName.size())
|
||||
{
|
||||
value = opt.longName.substr(eqIndex + 1);
|
||||
opt.longName = opt.longName.substr(0, eqIndex);
|
||||
}
|
||||
}
|
||||
|
||||
const int idx = getTRTOptionIndex(options, opt);
|
||||
if (idx < 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if (options[idx].valueRequired)
|
||||
{
|
||||
if (!value.empty())
|
||||
{
|
||||
parsedArgs.values[idx].second.push_back(value);
|
||||
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i + 1 >= argc)
|
||||
{
|
||||
return TRTParsedArgs{"Last argument requires value, but none given"};
|
||||
}
|
||||
|
||||
const std::string nextArg(argv[i + 1]);
|
||||
if (nextArg.size() >= 1 && nextArg[0] == '-')
|
||||
{
|
||||
sample::gLogWarning << "Warning: Using '" << nextArg << "' as a value for '" << argStr
|
||||
<< "', Should this be its own flag?" << std::endl;
|
||||
}
|
||||
|
||||
parsedArgs.values[idx].second.push_back(nextArg);
|
||||
i += 1; // Next argument already consumed
|
||||
|
||||
parsedArgs.values[idx].first = parsedArgs.values[idx].second.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
parsedArgs.values[idx].first += 1;
|
||||
}
|
||||
}
|
||||
return parsedArgs;
|
||||
}
|
||||
|
||||
TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options)
|
||||
{
|
||||
const std::string errMsg = validateTRTOptions(options);
|
||||
if (!errMsg.empty())
|
||||
{
|
||||
return TRTParsedArgs{errMsg};
|
||||
}
|
||||
return parseArgs(argc, argv, options);
|
||||
}
|
||||
} // namespace utility
|
||||
} // namespace nvinfer1
|
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_GET_OPTIONS_H
|
||||
#define TRT_GET_OPTIONS_H
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace nvinfer1
|
||||
{
|
||||
namespace utility
|
||||
{
|
||||
|
||||
//! TRTOption defines a command line option. At least 1 of shortName and longName
|
||||
//! must be defined.
|
||||
//! If bool initialization is undefined behavior on your system, valueRequired
|
||||
//! must also be explicitly defined.
|
||||
//! helpText is optional.
|
||||
struct TRTOption
|
||||
{
|
||||
char shortName; //!< Option name in short (single hyphen) form (i.e. -a, -b)
|
||||
std::string longName; //!< Option name in long (double hyphen) form (i.e. --foo, --bar)
|
||||
bool valueRequired; //!< True if a value is needed for an option (i.e. -N 4, --foo bar)
|
||||
std::string helpText; //!< Text to show when printing out the command usage
|
||||
};
|
||||
|
||||
//! TRTParsedArgs is returned by getOptions after it has parsed a command line
|
||||
//! argument list (argv).
|
||||
//!
|
||||
//! errMsg is a string containing an error message if any errors occurred. If it
|
||||
//! is empty, no errors occurred.
|
||||
//!
|
||||
//! values stores a vector of pairs for each option (ordered by order in the
|
||||
//! input). Each pair contains an int (the number of occurrences) and a vector
|
||||
//! of strings (a list of values). The user should know which of these to use,
|
||||
//! and which options required values. For non-value options, only occurrences is
|
||||
//! populated. For value-required options, occurrences == # of values. Values do
|
||||
//! not need to be unique.
|
||||
//!
|
||||
//! positionalArgs stores additional arguments that are passed in without an
|
||||
//! option (these must not start with a hyphen).
|
||||
struct TRTParsedArgs
|
||||
{
|
||||
std::string errMsg;
|
||||
std::vector<std::pair<int, std::vector<std::string>>> values;
|
||||
std::vector<std::string> positionalArgs;
|
||||
};
|
||||
|
||||
//! Parse the input arguments passed to main() and extract options as well as
|
||||
//! positional arguments.
|
||||
//!
|
||||
//! Options are supposed to be passed to main() with a preceding hyphen '-'.
|
||||
//!
|
||||
//! If there is a single preceding hyphen, there should be exactly 1 character
|
||||
//! after the hyphen, which is interpreted as the option.
|
||||
//!
|
||||
//! If there are 2 preceding hyphens, the entire argument (without the hyphens)
|
||||
//! is interpreted as the option.
|
||||
//!
|
||||
//! If the option requires a value, the next argument is used as the value.
|
||||
//!
|
||||
//! Positional arguments must not start with a hyphen.
|
||||
//!
|
||||
//! If an argument requires a value, the next argument is interpreted as the
|
||||
//! value, even if it is the form of a valid option (i.e. --foo --bar will store
|
||||
//! "--bar" as a value for option "foo" if "foo" requires a value).
|
||||
//! We also support --name=value syntax. In this case, 'value' would be used as
|
||||
//! the value, NOT the next argument.
|
||||
//!
|
||||
//! For options:
|
||||
//! { { 'a', "", false },
|
||||
//! { 'b', "", false },
|
||||
//! { 0, "cee", false },
|
||||
//! { 'd', "", true },
|
||||
//! { 'e', "", true },
|
||||
//! { 'f', "foo", true } }
|
||||
//!
|
||||
//! ./main hello world -a -a --cee -d 12 -f 34
|
||||
//! and
|
||||
//! ./main hello world -a -a --cee -d 12 --foo 34
|
||||
//!
|
||||
//! will result in:
|
||||
//!
|
||||
//! TRTParsedArgs {
|
||||
//! errMsg: "",
|
||||
//! values: { { 2, {} },
|
||||
//! { 0, {} },
|
||||
//! { 1, {} },
|
||||
//! { 1, {"12"} },
|
||||
//! { 0, {} },
|
||||
//! { 1, {"34"} } }
|
||||
//! positionalArgs: {"hello", "world"},
|
||||
//! }
|
||||
//!
|
||||
//! Non-POSIX behavior:
|
||||
//! - Does not support "-abcde" as a shorthand for "-a -b -c -d -e". Each
|
||||
//! option must have its own hyphen prefix.
|
||||
//! - Does not support -e12 as a shorthand for "-e 12". Values MUST be
|
||||
//! whitespace-separated from the option it is for.
|
||||
//!
|
||||
//! @param[in] argc The number of arguments passed to main (including the
|
||||
//! file name, which is disregarded)
|
||||
//! @param[in] argv The arguments passed to main (including the file name,
|
||||
//! which is disregarded)
|
||||
//! @param[in] options List of TRTOptions to parse
|
||||
//! @return TRTParsedArgs. See TRTParsedArgs documentation for descriptions of
|
||||
//! the fields.
|
||||
TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector<TRTOption>& options);
|
||||
} // namespace utility
|
||||
} // namespace nvinfer1
|
||||
|
||||
#endif // TRT_GET_OPTIONS_H
|
@ -0,0 +1,568 @@
|
||||
/* $OpenBSD: getopt_long.c,v 1.23 2007/10/31 12:34:57 chl Exp $ */
|
||||
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
|
||||
|
||||
/*
|
||||
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||
*
|
||||
* Permission to use, copy, modify, and distribute this software for any
|
||||
* purpose with or without fee is hereby granted, provided that the above
|
||||
* copyright notice and this permission notice appear in all copies.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
*
|
||||
* Sponsored in part by the Defense Advanced Research Projects
|
||||
* Agency (DARPA) and Air Force Research Laboratory, Air Force
|
||||
* Materiel Command, USAF, under agreement number F39502-99-1-0512.
|
||||
*/
|
||||
/*-
|
||||
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This code is derived from software contributed to The NetBSD Foundation
|
||||
* by Dieter Baron and Thomas Klausner.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "getoptWin.h"
|
||||
#include <errno.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <windows.h>
|
||||
|
||||
#define REPLACE_GETOPT /* use this getopt as the system getopt(3) */
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
int opterr = 1; /* if error message should be printed */
|
||||
int optind = 1; /* index into parent argv vector */
|
||||
int optopt = '?'; /* character checked for validity */
|
||||
#undef optreset /* see getopt.h */
|
||||
#define optreset __mingw_optreset
|
||||
int optreset; /* reset getopt */
|
||||
char* optarg; /* argument associated with option */
|
||||
#endif
|
||||
|
||||
#define PRINT_ERROR ((opterr) && (*options != ':'))
|
||||
|
||||
#define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
|
||||
#define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
|
||||
#define FLAG_LONGONLY 0x04 /* operate as getopt_long_only */
|
||||
|
||||
/* return values */
|
||||
#define BADCH (int) '?'
|
||||
#define BADARG ((*options == ':') ? (int) ':' : (int) '?')
|
||||
#define INORDER (int) 1
|
||||
|
||||
#ifndef __CYGWIN__
|
||||
#define __progname __argv[0]
|
||||
#else
|
||||
extern char __declspec(dllimport) * __progname;
|
||||
#endif
|
||||
|
||||
#ifdef __CYGWIN__
|
||||
static char EMSG[] = "";
|
||||
#else
|
||||
#define EMSG ""
|
||||
#endif
|
||||
|
||||
static int getopt_internal(int, char* const*, char const*, const struct option*, int*, int);
|
||||
static int parse_long_options(char* const*, char const*, const struct option*, int*, int);
|
||||
static int gcd(int, int);
|
||||
static void permute_args(int, int, int, char* const*);
|
||||
|
||||
static char* place = EMSG; /* option letter processing */
|
||||
|
||||
/* XXX: set optreset to 1 rather than these two */
|
||||
static int nonopt_start = -1; /* first non option argument (for permute) */
|
||||
static int nonopt_end = -1; /* first option after non options (for permute) */
|
||||
|
||||
/* Error messages */
|
||||
static char const recargchar[] = "option requires an argument -- %c";
|
||||
static char const recargstring[] = "option requires an argument -- %s";
|
||||
static char const ambig[] = "ambiguous option -- %.*s";
|
||||
static char const noarg[] = "option doesn't take an argument -- %.*s";
|
||||
static char const illoptchar[] = "unknown option -- %c";
|
||||
static char const illoptstring[] = "unknown option -- %s";
|
||||
|
||||
static void _vwarnx(char const* fmt, va_list ap)
|
||||
{
|
||||
(void) fprintf(stderr, "%s: ", __progname);
|
||||
if (fmt != NULL)
|
||||
(void) vfprintf(stderr, fmt, ap);
|
||||
(void) fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
static void warnx(char const* fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
va_start(ap, fmt);
|
||||
_vwarnx(fmt, ap);
|
||||
va_end(ap);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the greatest common divisor of a and b.
|
||||
*/
|
||||
static int gcd(int a, int b)
|
||||
{
|
||||
int c;
|
||||
|
||||
c = a % b;
|
||||
while (c != 0)
|
||||
{
|
||||
a = b;
|
||||
b = c;
|
||||
c = a % b;
|
||||
}
|
||||
|
||||
return (b);
|
||||
}
|
||||
|
||||
/*
|
||||
* Exchange the block from nonopt_start to nonopt_end with the block
|
||||
* from nonopt_end to opt_end (keeping the same order of arguments
|
||||
* in each block).
|
||||
*/
|
||||
static void permute_args(int panonopt_start, int panonopt_end, int opt_end, char* const* nargv)
|
||||
{
|
||||
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
|
||||
char* swap;
|
||||
|
||||
/*
|
||||
* compute lengths of blocks and number and size of cycles
|
||||
*/
|
||||
nnonopts = panonopt_end - panonopt_start;
|
||||
nopts = opt_end - panonopt_end;
|
||||
ncycle = gcd(nnonopts, nopts);
|
||||
cyclelen = (opt_end - panonopt_start) / ncycle;
|
||||
|
||||
for (i = 0; i < ncycle; i++)
|
||||
{
|
||||
cstart = panonopt_end + i;
|
||||
pos = cstart;
|
||||
for (j = 0; j < cyclelen; j++)
|
||||
{
|
||||
if (pos >= panonopt_end)
|
||||
pos -= nnonopts;
|
||||
else
|
||||
pos += nopts;
|
||||
swap = nargv[pos];
|
||||
/* LINTED const cast */
|
||||
((char**) nargv)[pos] = nargv[cstart];
|
||||
/* LINTED const cast */
|
||||
((char**) nargv)[cstart] = swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* parse_long_options --
|
||||
* Parse long options in argc/argv argument vector.
|
||||
* Returns -1 if short_too is set and the option does not match long_options.
|
||||
*/
|
||||
static int parse_long_options(
|
||||
char* const* nargv, char const* options, const struct option* long_options, int* idx, int short_too)
|
||||
{
|
||||
char *current_argv, *has_equal;
|
||||
size_t current_argv_len;
|
||||
int i, ambiguous, match;
|
||||
|
||||
#define IDENTICAL_INTERPRETATION(_x, _y) \
|
||||
(long_options[(_x)].has_arg == long_options[(_y)].has_arg && long_options[(_x)].flag == long_options[(_y)].flag \
|
||||
&& long_options[(_x)].val == long_options[(_y)].val)
|
||||
|
||||
current_argv = place;
|
||||
match = -1;
|
||||
ambiguous = 0;
|
||||
|
||||
optind++;
|
||||
|
||||
if ((has_equal = strchr(current_argv, '=')) != NULL)
|
||||
{
|
||||
/* argument found (--option=arg) */
|
||||
current_argv_len = has_equal - current_argv;
|
||||
has_equal++;
|
||||
}
|
||||
else
|
||||
current_argv_len = strlen(current_argv);
|
||||
|
||||
for (i = 0; long_options[i].name; i++)
|
||||
{
|
||||
/* find matching long option */
|
||||
if (strncmp(current_argv, long_options[i].name, current_argv_len))
|
||||
continue;
|
||||
|
||||
if (strlen(long_options[i].name) == current_argv_len)
|
||||
{
|
||||
/* exact match */
|
||||
match = i;
|
||||
ambiguous = 0;
|
||||
break;
|
||||
}
|
||||
/*
|
||||
* If this is a known short option, don't allow
|
||||
* a partial match of a single character.
|
||||
*/
|
||||
if (short_too && current_argv_len == 1)
|
||||
continue;
|
||||
|
||||
if (match == -1) /* partial match */
|
||||
match = i;
|
||||
else if (!IDENTICAL_INTERPRETATION(i, match))
|
||||
ambiguous = 1;
|
||||
}
|
||||
if (ambiguous)
|
||||
{
|
||||
/* ambiguous abbreviation */
|
||||
if (PRINT_ERROR)
|
||||
warnx(ambig, (int) current_argv_len, current_argv);
|
||||
optopt = 0;
|
||||
return (BADCH);
|
||||
}
|
||||
if (match != -1)
|
||||
{ /* option found */
|
||||
if (long_options[match].has_arg == no_argument && has_equal)
|
||||
{
|
||||
if (PRINT_ERROR)
|
||||
warnx(noarg, (int) current_argv_len, current_argv);
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless of flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
return (BADARG);
|
||||
}
|
||||
if (long_options[match].has_arg == required_argument || long_options[match].has_arg == optional_argument)
|
||||
{
|
||||
if (has_equal)
|
||||
optarg = has_equal;
|
||||
else if (long_options[match].has_arg == required_argument)
|
||||
{
|
||||
/*
|
||||
* optional argument doesn't use next nargv
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
}
|
||||
}
|
||||
if ((long_options[match].has_arg == required_argument) && (optarg == NULL))
|
||||
{
|
||||
/*
|
||||
* Missing argument; leading ':' indicates no error
|
||||
* should be generated.
|
||||
*/
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargstring, current_argv);
|
||||
/*
|
||||
* XXX: GNU sets optopt to val regardless of flag
|
||||
*/
|
||||
if (long_options[match].flag == NULL)
|
||||
optopt = long_options[match].val;
|
||||
else
|
||||
optopt = 0;
|
||||
--optind;
|
||||
return (BADARG);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* unknown option */
|
||||
if (short_too)
|
||||
{
|
||||
--optind;
|
||||
return (-1);
|
||||
}
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptstring, current_argv);
|
||||
optopt = 0;
|
||||
return (BADCH);
|
||||
}
|
||||
if (idx)
|
||||
*idx = match;
|
||||
if (long_options[match].flag)
|
||||
{
|
||||
*long_options[match].flag = long_options[match].val;
|
||||
return (0);
|
||||
}
|
||||
else
|
||||
return (long_options[match].val);
|
||||
#undef IDENTICAL_INTERPRETATION
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_internal --
|
||||
* Parse argc/argv argument vector. Called by user level routines.
|
||||
*/
|
||||
static int getopt_internal(
|
||||
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx, int flags)
|
||||
{
|
||||
char const* oli; /* option letter list index */
|
||||
int optchar, short_too;
|
||||
static int posixly_correct = -1;
|
||||
|
||||
if (options == NULL)
|
||||
return (-1);
|
||||
|
||||
/*
|
||||
* XXX Some GNU programs (like cvs) set optind to 0 instead of
|
||||
* XXX using optreset. Work around this braindamage.
|
||||
*/
|
||||
if (optind == 0)
|
||||
optind = optreset = 1;
|
||||
|
||||
/*
|
||||
* Disable GNU extensions if POSIXLY_CORRECT is set or options
|
||||
* string begins with a '+'.
|
||||
*
|
||||
* CV, 2009-12-14: Check POSIXLY_CORRECT anew if optind == 0 or
|
||||
* optreset != 0 for GNU compatibility.
|
||||
*/
|
||||
if (posixly_correct == -1 || optreset != 0)
|
||||
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
|
||||
if (*options == '-')
|
||||
flags |= FLAG_ALLARGS;
|
||||
else if (posixly_correct || *options == '+')
|
||||
flags &= ~FLAG_PERMUTE;
|
||||
if (*options == '+' || *options == '-')
|
||||
options++;
|
||||
|
||||
optarg = NULL;
|
||||
if (optreset)
|
||||
nonopt_start = nonopt_end = -1;
|
||||
start:
|
||||
if (optreset || !*place)
|
||||
{ /* update scanning pointer */
|
||||
optreset = 0;
|
||||
if (optind >= nargc)
|
||||
{ /* end of argument vector */
|
||||
place = EMSG;
|
||||
if (nonopt_end != -1)
|
||||
{
|
||||
/* do permutation, if we have to */
|
||||
permute_args(nonopt_start, nonopt_end, optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
else if (nonopt_start != -1)
|
||||
{
|
||||
/*
|
||||
* If we skipped non-options, set optind
|
||||
* to the first of them.
|
||||
*/
|
||||
optind = nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return (-1);
|
||||
}
|
||||
if (*(place = nargv[optind]) != '-' || (place[1] == '\0' && strchr(options, '-') == NULL))
|
||||
{
|
||||
place = EMSG; /* found non-option */
|
||||
if (flags & FLAG_ALLARGS)
|
||||
{
|
||||
/*
|
||||
* GNU extension:
|
||||
* return non-option as argument to option 1
|
||||
*/
|
||||
optarg = nargv[optind++];
|
||||
return (INORDER);
|
||||
}
|
||||
if (!(flags & FLAG_PERMUTE))
|
||||
{
|
||||
/*
|
||||
* If no permutation wanted, stop parsing
|
||||
* at first non-option.
|
||||
*/
|
||||
return (-1);
|
||||
}
|
||||
/* do permutation */
|
||||
if (nonopt_start == -1)
|
||||
nonopt_start = optind;
|
||||
else if (nonopt_end != -1)
|
||||
{
|
||||
permute_args(nonopt_start, nonopt_end, optind, nargv);
|
||||
nonopt_start = optind - (nonopt_end - nonopt_start);
|
||||
nonopt_end = -1;
|
||||
}
|
||||
optind++;
|
||||
/* process next argument */
|
||||
goto start;
|
||||
}
|
||||
if (nonopt_start != -1 && nonopt_end == -1)
|
||||
nonopt_end = optind;
|
||||
|
||||
/*
|
||||
* If we have "-" do nothing, if "--" we are done.
|
||||
*/
|
||||
if (place[1] != '\0' && *++place == '-' && place[1] == '\0')
|
||||
{
|
||||
optind++;
|
||||
place = EMSG;
|
||||
/*
|
||||
* We found an option (--), so if we skipped
|
||||
* non-options, we have to permute.
|
||||
*/
|
||||
if (nonopt_end != -1)
|
||||
{
|
||||
permute_args(nonopt_start, nonopt_end, optind, nargv);
|
||||
optind -= nonopt_end - nonopt_start;
|
||||
}
|
||||
nonopt_start = nonopt_end = -1;
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check long options if:
|
||||
* 1) we were passed some
|
||||
* 2) the arg is not just "-"
|
||||
* 3) either the arg starts with -- we are getopt_long_only()
|
||||
*/
|
||||
if (long_options != NULL && place != nargv[optind] && (*place == '-' || (flags & FLAG_LONGONLY)))
|
||||
{
|
||||
short_too = 0;
|
||||
if (*place == '-')
|
||||
place++; /* --foo long option */
|
||||
else if (*place != ':' && strchr(options, *place) != NULL)
|
||||
short_too = 1; /* could be short option too */
|
||||
|
||||
optchar = parse_long_options(nargv, options, long_options, idx, short_too);
|
||||
if (optchar != -1)
|
||||
{
|
||||
place = EMSG;
|
||||
return (optchar);
|
||||
}
|
||||
}
|
||||
|
||||
if ((optchar = (int) *place++) == (int) ':' || (optchar == (int) '-' && *place != '\0')
|
||||
|| (oli = strchr(options, optchar)) == NULL)
|
||||
{
|
||||
/*
|
||||
* If the user specified "-" and '-' isn't listed in
|
||||
* options, return -1 (non-option) as per POSIX.
|
||||
* Otherwise, it is an unknown option character (or ':').
|
||||
*/
|
||||
if (optchar == (int) '-' && *place == '\0')
|
||||
return (-1);
|
||||
if (!*place)
|
||||
++optind;
|
||||
if (PRINT_ERROR)
|
||||
warnx(illoptchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADCH);
|
||||
}
|
||||
if (long_options != NULL && optchar == 'W' && oli[1] == ';')
|
||||
{
|
||||
/* -W long-option */
|
||||
if (*place) /* no space */
|
||||
/* NOTHING */;
|
||||
else if (++optind >= nargc)
|
||||
{ /* no arg */
|
||||
place = EMSG;
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADARG);
|
||||
}
|
||||
else /* white space */
|
||||
place = nargv[optind];
|
||||
optchar = parse_long_options(nargv, options, long_options, idx, 0);
|
||||
place = EMSG;
|
||||
return (optchar);
|
||||
}
|
||||
if (*++oli != ':')
|
||||
{ /* doesn't take argument */
|
||||
if (!*place)
|
||||
++optind;
|
||||
}
|
||||
else
|
||||
{ /* takes (optional) argument */
|
||||
optarg = NULL;
|
||||
if (*place) /* no white space */
|
||||
optarg = place;
|
||||
else if (oli[1] != ':')
|
||||
{ /* arg not optional */
|
||||
if (++optind >= nargc)
|
||||
{ /* no arg */
|
||||
place = EMSG;
|
||||
if (PRINT_ERROR)
|
||||
warnx(recargchar, optchar);
|
||||
optopt = optchar;
|
||||
return (BADARG);
|
||||
}
|
||||
else
|
||||
optarg = nargv[optind];
|
||||
}
|
||||
place = EMSG;
|
||||
++optind;
|
||||
}
|
||||
/* dump back option letter */
|
||||
return (optchar);
|
||||
}
|
||||
|
||||
#ifdef REPLACE_GETOPT
|
||||
/*
|
||||
* getopt --
|
||||
* Parse argc/argv argument vector.
|
||||
*
|
||||
* [eventually this will replace the BSD getopt]
|
||||
*/
|
||||
int getopt(int nargc, char* const* nargv, char const* options)
|
||||
{
|
||||
|
||||
/*
|
||||
* We don't pass FLAG_PERMUTE to getopt_internal() since
|
||||
* the BSD getopt(3) (unlike GNU) has never done this.
|
||||
*
|
||||
* Furthermore, since many privileged programs call getopt()
|
||||
* before dropping privileges it makes sense to keep things
|
||||
* as simple (and bug-free) as possible.
|
||||
*/
|
||||
return (getopt_internal(nargc, nargv, options, NULL, NULL, 0));
|
||||
}
|
||||
#endif /* REPLACE_GETOPT */
|
||||
|
||||
/*
|
||||
* getopt_long --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int getopt_long(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
|
||||
{
|
||||
|
||||
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE));
|
||||
}
|
||||
|
||||
/*
|
||||
* getopt_long_only --
|
||||
* Parse argc/argv argument vector.
|
||||
*/
|
||||
int getopt_long_only(int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx)
|
||||
{
|
||||
|
||||
return (getopt_internal(nargc, nargv, options, long_options, idx, FLAG_PERMUTE | FLAG_LONGONLY));
|
||||
}
|
@ -0,0 +1,124 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef __GETOPT_H__
|
||||
/**
|
||||
* DISCLAIMER
|
||||
* This file has no copyright assigned and is placed in the Public Domain.
|
||||
* This file is a part of the w64 mingw-runtime package.
|
||||
*
|
||||
* The w64 mingw-runtime package and its code is distributed in the hope that it
|
||||
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
|
||||
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
|
||||
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
|
||||
#define __GETOPT_H__
|
||||
|
||||
/* All the headers include this file. */
|
||||
#include <crtdefs.h>
|
||||
|
||||
#if defined(WINGETOPT_SHARED_LIB)
|
||||
#if defined(BUILDING_WINGETOPT_DLL)
|
||||
#define WINGETOPT_API __declspec(dllexport)
|
||||
#else
|
||||
#define WINGETOPT_API __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define WINGETOPT_API
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
WINGETOPT_API extern int optind; /* index of first non-option in argv */
|
||||
WINGETOPT_API extern int optopt; /* single option character, as parsed */
|
||||
WINGETOPT_API extern int opterr; /* flag to enable built-in diagnostics... */
|
||||
/* (user may set to zero, to suppress) */
|
||||
|
||||
WINGETOPT_API extern char* optarg; /* pointer to argument of current option */
|
||||
|
||||
extern int getopt(int nargc, char* const* nargv, char const* options);
|
||||
|
||||
#ifdef _BSD_SOURCE
|
||||
/*
|
||||
* BSD adds the non-standard `optreset' feature, for reinitialisation
|
||||
* of `getopt' parsing. We support this feature, for applications which
|
||||
* proclaim their BSD heritage, before including this header; however,
|
||||
* to maintain portability, developers are advised to avoid it.
|
||||
*/
|
||||
#define optreset __mingw_optreset
|
||||
extern int optreset;
|
||||
#endif
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* POSIX requires the `getopt' API to be specified in `unistd.h';
|
||||
* thus, `unistd.h' includes this header. However, we do not want
|
||||
* to expose the `getopt_long' or `getopt_long_only' APIs, when
|
||||
* included in this manner. Thus, close the standard __GETOPT_H__
|
||||
* declarations block, and open an additional __GETOPT_LONG_H__
|
||||
* specific block, only when *not* __UNISTD_H_SOURCED__, in which
|
||||
* to declare the extended API.
|
||||
*/
|
||||
#endif /* !defined(__GETOPT_H__) */
|
||||
|
||||
#if !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__)
|
||||
#define __GETOPT_LONG_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
|
||||
struct option /* specification for a long form option... */
|
||||
{
|
||||
char const* name; /* option name, without leading hyphens */
|
||||
int has_arg; /* does it take an argument? */
|
||||
int* flag; /* where to save its status, or NULL */
|
||||
int val; /* its associated status value */
|
||||
};
|
||||
|
||||
enum /* permitted values for its `has_arg' field... */
|
||||
{
|
||||
no_argument = 0, /* option never takes an argument */
|
||||
required_argument, /* option always requires an argument */
|
||||
optional_argument /* option may take an argument */
|
||||
};
|
||||
|
||||
extern int getopt_long(
|
||||
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
|
||||
extern int getopt_long_only(
|
||||
int nargc, char* const* nargv, char const* options, const struct option* long_options, int* idx);
|
||||
/*
|
||||
* Previous MinGW implementation had...
|
||||
*/
|
||||
#ifndef HAVE_DECL_GETOPT
|
||||
/*
|
||||
* ...for the long form API only; keep this for compatibility.
|
||||
*/
|
||||
#define HAVE_DECL_GETOPT 1
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !defined(__UNISTD_H_SOURCED__) && !defined(__GETOPT_LONG_H__) */
|
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "logger.h"
|
||||
#include "ErrorRecorder.h"
|
||||
#include "logging.h"
|
||||
using namespace nvinfer1;
|
||||
SampleErrorRecorder gRecorder;
|
||||
namespace sample
|
||||
{
|
||||
Logger gLogger{ Logger::Severity::kINFO };
|
||||
LogStreamConsumer gLogVerbose{ LOG_VERBOSE(gLogger) };
|
||||
LogStreamConsumer gLogInfo{ LOG_INFO(gLogger) };
|
||||
LogStreamConsumer gLogWarning{ LOG_WARN(gLogger) };
|
||||
LogStreamConsumer gLogError{ LOG_ERROR(gLogger) };
|
||||
LogStreamConsumer gLogFatal{ LOG_FATAL(gLogger) };
|
||||
|
||||
void setReportableSeverity(Logger::Severity severity)
|
||||
{
|
||||
gLogger.setReportableSeverity(severity);
|
||||
gLogVerbose.setReportableSeverity(severity);
|
||||
gLogInfo.setReportableSeverity(severity);
|
||||
gLogWarning.setReportableSeverity(severity);
|
||||
gLogError.setReportableSeverity(severity);
|
||||
gLogFatal.setReportableSeverity(severity);
|
||||
}
|
||||
} // namespace sample
|
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef LOGGER_H
|
||||
#define LOGGER_H
|
||||
|
||||
#include "logging.h"
|
||||
|
||||
class SampleErrorRecorder;
|
||||
extern SampleErrorRecorder gRecorder;
|
||||
namespace sample
|
||||
{
|
||||
extern Logger gLogger;
|
||||
extern LogStreamConsumer gLogVerbose;
|
||||
extern LogStreamConsumer gLogInfo;
|
||||
extern LogStreamConsumer gLogWarning;
|
||||
extern LogStreamConsumer gLogError;
|
||||
extern LogStreamConsumer gLogFatal;
|
||||
|
||||
void setReportableSeverity(Logger::Severity severity);
|
||||
} // namespace sample
|
||||
|
||||
#endif // LOGGER_H
|
@ -0,0 +1,579 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_LOGGING_H
|
||||
#define TENSORRT_LOGGING_H
|
||||
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "sampleOptions.h"
|
||||
#include <cassert>
|
||||
#include <ctime>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
using Severity = nvinfer1::ILogger::Severity;
|
||||
|
||||
class LogStreamConsumerBuffer : public std::stringbuf
|
||||
{
|
||||
public:
|
||||
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
|
||||
: mOutput(stream)
|
||||
, mPrefix(prefix)
|
||||
, mShouldLog(shouldLog)
|
||||
{
|
||||
}
|
||||
|
||||
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other) noexcept
|
||||
: mOutput(other.mOutput)
|
||||
, mPrefix(other.mPrefix)
|
||||
, mShouldLog(other.mShouldLog)
|
||||
{
|
||||
}
|
||||
LogStreamConsumerBuffer(const LogStreamConsumerBuffer& other) = delete;
|
||||
LogStreamConsumerBuffer() = delete;
|
||||
LogStreamConsumerBuffer& operator=(const LogStreamConsumerBuffer&) = delete;
|
||||
LogStreamConsumerBuffer& operator=(LogStreamConsumerBuffer&&) = delete;
|
||||
|
||||
~LogStreamConsumerBuffer() override
|
||||
{
|
||||
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
|
||||
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
|
||||
// if the pointer to the beginning is not equal to the pointer to the current position,
|
||||
// call putOutput() to log the output to the stream
|
||||
if (pbase() != pptr())
|
||||
{
|
||||
putOutput();
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! synchronizes the stream buffer and returns 0 on success
|
||||
//! synchronizing the stream buffer consists of inserting the buffer contents into the stream,
|
||||
//! resetting the buffer and flushing the stream
|
||||
//!
|
||||
int32_t sync() override
|
||||
{
|
||||
putOutput();
|
||||
return 0;
|
||||
}
|
||||
|
||||
void putOutput()
|
||||
{
|
||||
if (mShouldLog)
|
||||
{
|
||||
// prepend timestamp
|
||||
std::time_t timestamp = std::time(nullptr);
|
||||
tm* tm_local = std::localtime(×tamp);
|
||||
mOutput << "[";
|
||||
mOutput << std::setw(2) << std::setfill('0') << 1 + tm_local->tm_mon << "/";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
|
||||
mOutput << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
|
||||
mOutput << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
|
||||
// std::stringbuf::str() gets the string contents of the buffer
|
||||
// insert the buffer contents pre-appended by the appropriate prefix into the stream
|
||||
mOutput << mPrefix << str();
|
||||
}
|
||||
// set the buffer to empty
|
||||
str("");
|
||||
// flush the stream
|
||||
mOutput.flush();
|
||||
}
|
||||
|
||||
void setShouldLog(bool shouldLog)
|
||||
{
|
||||
mShouldLog = shouldLog;
|
||||
}
|
||||
|
||||
private:
|
||||
std::ostream& mOutput;
|
||||
std::string mPrefix;
|
||||
bool mShouldLog{};
|
||||
}; // class LogStreamConsumerBuffer
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumerBase
|
||||
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
|
||||
//!
|
||||
class LogStreamConsumerBase
|
||||
{
|
||||
public:
|
||||
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
|
||||
: mBuffer(stream, prefix, shouldLog)
|
||||
{
|
||||
}
|
||||
|
||||
protected:
|
||||
std::mutex mLogMutex;
|
||||
LogStreamConsumerBuffer mBuffer;
|
||||
}; // class LogStreamConsumerBase
|
||||
|
||||
//!
|
||||
//! \class LogStreamConsumer
|
||||
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
|
||||
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
|
||||
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
|
||||
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
|
||||
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
|
||||
//! Please do not change the order of the parent classes.
|
||||
//!
|
||||
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
|
||||
{
|
||||
public:
|
||||
//!
|
||||
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
|
||||
//! Reportable severity determines if the messages are severe enough to be logged.
|
||||
//!
|
||||
LogStreamConsumer(nvinfer1::ILogger::Severity reportableSeverity, nvinfer1::ILogger::Severity severity)
|
||||
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
|
||||
, std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
, mShouldLog(severity <= reportableSeverity)
|
||||
, mSeverity(severity)
|
||||
{
|
||||
}
|
||||
|
||||
LogStreamConsumer(LogStreamConsumer&& other) noexcept
|
||||
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
|
||||
, std::ostream(&mBuffer) // links the stream buffer with the stream
|
||||
, mShouldLog(other.mShouldLog)
|
||||
, mSeverity(other.mSeverity)
|
||||
{
|
||||
}
|
||||
LogStreamConsumer(const LogStreamConsumer& other) = delete;
|
||||
LogStreamConsumer() = delete;
|
||||
~LogStreamConsumer() override = default;
|
||||
LogStreamConsumer& operator=(const LogStreamConsumer&) = delete;
|
||||
LogStreamConsumer& operator=(LogStreamConsumer&&) = delete;
|
||||
|
||||
void setReportableSeverity(Severity reportableSeverity)
|
||||
{
|
||||
mShouldLog = mSeverity <= reportableSeverity;
|
||||
mBuffer.setShouldLog(mShouldLog);
|
||||
}
|
||||
|
||||
std::mutex& getMutex()
|
||||
{
|
||||
return mLogMutex;
|
||||
}
|
||||
|
||||
bool getShouldLog() const
|
||||
{
|
||||
return mShouldLog;
|
||||
}
|
||||
|
||||
private:
|
||||
static std::ostream& severityOstream(Severity severity)
|
||||
{
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
static std::string severityPrefix(Severity severity)
|
||||
{
|
||||
switch (severity)
|
||||
{
|
||||
case Severity::kINTERNAL_ERROR: return "[F] ";
|
||||
case Severity::kERROR: return "[E] ";
|
||||
case Severity::kWARNING: return "[W] ";
|
||||
case Severity::kINFO: return "[I] ";
|
||||
case Severity::kVERBOSE: return "[V] ";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
bool mShouldLog;
|
||||
Severity mSeverity;
|
||||
}; // class LogStreamConsumer
|
||||
|
||||
template <typename T>
|
||||
LogStreamConsumer& operator<<(LogStreamConsumer& logger, const T& obj)
|
||||
{
|
||||
if (logger.getShouldLog())
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
os << obj;
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
//!
|
||||
//! Special handling std::endl
|
||||
//!
|
||||
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, std::ostream& (*f)(std::ostream&) )
|
||||
{
|
||||
if (logger.getShouldLog())
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
os << f;
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
inline LogStreamConsumer& operator<<(LogStreamConsumer& logger, const nvinfer1::Dims& dims)
|
||||
{
|
||||
if (logger.getShouldLog())
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(logger.getMutex());
|
||||
auto& os = static_cast<std::ostream&>(logger);
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i)
|
||||
{
|
||||
os << (i ? "x" : "") << dims.d[i];
|
||||
}
|
||||
}
|
||||
return logger;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \class Logger
|
||||
//!
|
||||
//! \brief Class which manages logging of TensorRT tools and samples
|
||||
//!
|
||||
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
|
||||
//! and supports logging two types of messages:
|
||||
//!
|
||||
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
|
||||
//! - Test pass/fail messages
|
||||
//!
|
||||
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
|
||||
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
|
||||
//!
|
||||
//! In the future, this class could be extended to support dumping test results to a file in some standard format
|
||||
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
|
||||
//!
|
||||
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
|
||||
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
|
||||
//! library and messages coming from the sample.
|
||||
//!
|
||||
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
|
||||
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
|
||||
//! object.
|
||||
//!
|
||||
class Logger : public nvinfer1::ILogger
|
||||
{
|
||||
public:
|
||||
explicit Logger(Severity severity = Severity::kWARNING)
|
||||
: mReportableSeverity(severity)
|
||||
{
|
||||
}
|
||||
|
||||
//!
|
||||
//! \enum TestResult
|
||||
//! \brief Represents the state of a given test
|
||||
//!
|
||||
enum class TestResult
|
||||
{
|
||||
kRUNNING, //!< The test is running
|
||||
kPASSED, //!< The test passed
|
||||
kFAILED, //!< The test failed
|
||||
kWAIVED //!< The test was waived
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
|
||||
//! \return The nvinfer1::ILogger associated with this Logger
|
||||
//!
|
||||
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
|
||||
//! we can eliminate the inheritance of Logger from ILogger
|
||||
//!
|
||||
nvinfer1::ILogger& getTRTLogger() noexcept
|
||||
{
|
||||
return *this;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
|
||||
//!
|
||||
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
|
||||
//! inheritance from nvinfer1::ILogger
|
||||
//!
|
||||
void log(Severity severity, const char* msg) noexcept override
|
||||
{
|
||||
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Method for controlling the verbosity of logging output
|
||||
//!
|
||||
//! \param severity The logger will only emit messages that have severity of this level or higher.
|
||||
//!
|
||||
void setReportableSeverity(Severity severity) noexcept
|
||||
{
|
||||
mReportableSeverity = severity;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Opaque handle that holds logging information for a particular test
|
||||
//!
|
||||
//! This object is an opaque handle to information used by the Logger to print test results.
|
||||
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
|
||||
//! with Logger::reportTest{Start,End}().
|
||||
//!
|
||||
class TestAtom
|
||||
{
|
||||
public:
|
||||
TestAtom(TestAtom&&) = default;
|
||||
|
||||
private:
|
||||
friend class Logger;
|
||||
|
||||
TestAtom(bool started, const std::string& name, const std::string& cmdline)
|
||||
: mStarted(started)
|
||||
, mName(name)
|
||||
, mCmdline(cmdline)
|
||||
{
|
||||
}
|
||||
|
||||
bool mStarted;
|
||||
std::string mName;
|
||||
std::string mCmdline;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Define a test for logging
|
||||
//!
|
||||
//! \param[in] name The name of the test. This should be a string starting with
|
||||
//! "TensorRT" and containing dot-separated strings containing
|
||||
//! the characters [A-Za-z0-9_].
|
||||
//! For example, "TensorRT.sample_googlenet"
|
||||
//! \param[in] cmdline The command line used to reproduce the test
|
||||
//
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
//!
|
||||
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
|
||||
{
|
||||
return TestAtom(false, name, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
|
||||
//! as input
|
||||
//!
|
||||
//! \param[in] name The name of the test
|
||||
//! \param[in] argc The number of command-line arguments
|
||||
//! \param[in] argv The array of command-line arguments (given as C strings)
|
||||
//!
|
||||
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
|
||||
//!
|
||||
static TestAtom defineTest(const std::string& name, int32_t argc, char const* const* argv)
|
||||
{
|
||||
// Append TensorRT version as info
|
||||
const std::string vname = name + " [TensorRT v" + std::to_string(NV_TENSORRT_VERSION) + "]";
|
||||
auto cmdline = genCmdlineString(argc, argv);
|
||||
return defineTest(vname, cmdline);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has started.
|
||||
//!
|
||||
//! \pre reportTestStart() has not been called yet for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has started
|
||||
//!
|
||||
static void reportTestStart(TestAtom& testAtom)
|
||||
{
|
||||
reportTestResult(testAtom, TestResult::kRUNNING);
|
||||
assert(!testAtom.mStarted);
|
||||
testAtom.mStarted = true;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Report that a test has ended.
|
||||
//!
|
||||
//! \pre reportTestStart() has been called for the given testAtom
|
||||
//!
|
||||
//! \param[in] testAtom The handle to the test that has ended
|
||||
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
|
||||
//! TestResult::kFAILED, TestResult::kWAIVED
|
||||
//!
|
||||
static void reportTestEnd(TestAtom const& testAtom, TestResult result)
|
||||
{
|
||||
assert(result != TestResult::kRUNNING);
|
||||
assert(testAtom.mStarted);
|
||||
reportTestResult(testAtom, result);
|
||||
}
|
||||
|
||||
static int32_t reportPass(TestAtom const& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kPASSED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t reportFail(TestAtom const& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kFAILED);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
static int32_t reportWaive(TestAtom const& testAtom)
|
||||
{
|
||||
reportTestEnd(testAtom, TestResult::kWAIVED);
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
static int32_t reportTest(TestAtom const& testAtom, bool pass)
|
||||
{
|
||||
return pass ? reportPass(testAtom) : reportFail(testAtom);
|
||||
}
|
||||
|
||||
Severity getReportableSeverity() const
|
||||
{
|
||||
return mReportableSeverity;
|
||||
}
|
||||
|
||||
private:
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a log message with the given severity
|
||||
//!
|
||||
static const char* severityPrefix(Severity severity)
|
||||
{
|
||||
switch (severity)
|
||||
{
|
||||
case Severity::kINTERNAL_ERROR: return "[F] ";
|
||||
case Severity::kERROR: return "[E] ";
|
||||
case Severity::kWARNING: return "[W] ";
|
||||
case Severity::kINFO: return "[I] ";
|
||||
case Severity::kVERBOSE: return "[V] ";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate string for prefixing a test result message with the given result
|
||||
//!
|
||||
static const char* testResultString(TestResult result)
|
||||
{
|
||||
switch (result)
|
||||
{
|
||||
case TestResult::kRUNNING: return "RUNNING";
|
||||
case TestResult::kPASSED: return "PASSED";
|
||||
case TestResult::kFAILED: return "FAILED";
|
||||
case TestResult::kWAIVED: return "WAIVED";
|
||||
default: assert(0); return "";
|
||||
}
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
|
||||
//!
|
||||
static std::ostream& severityOstream(Severity severity)
|
||||
{
|
||||
return severity >= Severity::kINFO ? std::cout : std::cerr;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief method that implements logging test results
|
||||
//!
|
||||
static void reportTestResult(TestAtom const& testAtom, TestResult result)
|
||||
{
|
||||
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
|
||||
<< testAtom.mCmdline << std::endl;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief generate a command line string from the given (argc, argv) values
|
||||
//!
|
||||
static std::string genCmdlineString(int32_t argc, char const* const* argv)
|
||||
{
|
||||
std::stringstream ss;
|
||||
for (int32_t i = 0; i < argc; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
{
|
||||
ss << " ";
|
||||
}
|
||||
ss << argv[i];
|
||||
}
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
Severity mReportableSeverity;
|
||||
}; // class Logger
|
||||
|
||||
namespace
|
||||
{
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_INFO(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_INFO(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_WARN(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_WARN(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_ERROR(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
|
||||
//! ("fatal" severity)
|
||||
//!
|
||||
//! Example usage:
|
||||
//!
|
||||
//! LOG_FATAL(logger) << "hello world" << std::endl;
|
||||
//!
|
||||
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
|
||||
{
|
||||
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
|
||||
}
|
||||
} // anonymous namespace
|
||||
} // namespace sample
|
||||
#endif // TENSORRT_LOGGING_H
|
@ -0,0 +1,152 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PARSER_ONNX_CONFIG_H
|
||||
#define PARSER_ONNX_CONFIG_H
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvOnnxConfig.h"
|
||||
#include "NvOnnxParser.h"
|
||||
|
||||
#define ONNX_DEBUG 1
|
||||
|
||||
/**
|
||||
* \class ParserOnnxConfig
|
||||
* \brief Configuration Manager Class Concrete Implementation
|
||||
*
|
||||
* \note:
|
||||
*
|
||||
*/
|
||||
|
||||
class ParserOnnxConfig : public nvonnxparser::IOnnxConfig
|
||||
{
|
||||
|
||||
protected:
|
||||
std::string mModelFilename{};
|
||||
std::string mTextFilename{};
|
||||
std::string mFullTextFilename{};
|
||||
nvinfer1::DataType mModelDtype;
|
||||
nvonnxparser::IOnnxConfig::Verbosity mVerbosity;
|
||||
bool mPrintLayercInfo;
|
||||
|
||||
public:
|
||||
ParserOnnxConfig()
|
||||
: mModelDtype(nvinfer1::DataType::kFLOAT)
|
||||
, mVerbosity(static_cast<int>(nvinfer1::ILogger::Severity::kWARNING))
|
||||
, mPrintLayercInfo(false)
|
||||
{
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug())
|
||||
{
|
||||
std::cout << " ParserOnnxConfig::ctor(): " << this << "\t" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
~ParserOnnxConfig() override
|
||||
{
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug())
|
||||
{
|
||||
std::cout << "ParserOnnxConfig::dtor(): " << this << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
void setModelDtype(const nvinfer1::DataType modelDtype) noexcept override
|
||||
{
|
||||
mModelDtype = modelDtype;
|
||||
}
|
||||
|
||||
nvinfer1::DataType getModelDtype() const noexcept override
|
||||
{
|
||||
return mModelDtype;
|
||||
}
|
||||
|
||||
const char* getModelFileName() const noexcept override
|
||||
{
|
||||
return mModelFilename.c_str();
|
||||
}
|
||||
void setModelFileName(const char* onnxFilename) noexcept override
|
||||
{
|
||||
mModelFilename = std::string(onnxFilename);
|
||||
}
|
||||
nvonnxparser::IOnnxConfig::Verbosity getVerbosityLevel() const noexcept override
|
||||
{
|
||||
return mVerbosity;
|
||||
}
|
||||
void addVerbosity() noexcept override
|
||||
{
|
||||
++mVerbosity;
|
||||
}
|
||||
void reduceVerbosity() noexcept override
|
||||
{
|
||||
--mVerbosity;
|
||||
}
|
||||
void setVerbosityLevel(nvonnxparser::IOnnxConfig::Verbosity verbosity) noexcept override
|
||||
{
|
||||
mVerbosity = verbosity;
|
||||
}
|
||||
|
||||
const char* getTextFileName() const noexcept override
|
||||
{
|
||||
return mTextFilename.c_str();
|
||||
}
|
||||
void setTextFileName(const char* textFilename) noexcept override
|
||||
{
|
||||
mTextFilename = std::string(textFilename);
|
||||
}
|
||||
const char* getFullTextFileName() const noexcept override
|
||||
{
|
||||
return mFullTextFilename.c_str();
|
||||
}
|
||||
void setFullTextFileName(const char* fullTextFilename) noexcept override
|
||||
{
|
||||
mFullTextFilename = std::string(fullTextFilename);
|
||||
}
|
||||
bool getPrintLayerInfo() const noexcept override
|
||||
{
|
||||
return mPrintLayercInfo;
|
||||
}
|
||||
void setPrintLayerInfo(bool src) noexcept override
|
||||
{
|
||||
mPrintLayercInfo = src;
|
||||
} //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
|
||||
|
||||
virtual bool isDebug() const noexcept
|
||||
{
|
||||
#if ONNX_DEBUG
|
||||
return (std::getenv("ONNX_DEBUG") ? true : false);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void destroy() noexcept override
|
||||
{
|
||||
delete this;
|
||||
}
|
||||
|
||||
}; // class ParserOnnxConfig
|
||||
|
||||
#endif
|
@ -0,0 +1,224 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TENSORRT_SAFE_COMMON_H
|
||||
#define TENSORRT_SAFE_COMMON_H
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
// For safeLoadLibrary
|
||||
#ifdef _MSC_VER
|
||||
// Needed so that the max/min definitions in windows.h do not conflict with std::max/min.
|
||||
#define NOMINMAX
|
||||
#include <windows.h>
|
||||
#undef NOMINMAX
|
||||
#else
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
#undef CHECK
|
||||
#define CHECK(status) \
|
||||
do \
|
||||
{ \
|
||||
auto ret = (status); \
|
||||
if (ret != 0) \
|
||||
{ \
|
||||
std::cerr << "Cuda failure: " << ret << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#undef SAFE_ASSERT
|
||||
#define SAFE_ASSERT(condition) \
|
||||
do \
|
||||
{ \
|
||||
if (!(condition)) \
|
||||
{ \
|
||||
std::cerr << "Assertion failure: " << #condition << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
namespace samplesCommon
|
||||
{
|
||||
template <typename T>
|
||||
inline std::shared_ptr<T> infer_object(T* obj)
|
||||
{
|
||||
if (!obj)
|
||||
{
|
||||
throw std::runtime_error("Failed to create object");
|
||||
}
|
||||
return std::shared_ptr<T>(obj);
|
||||
}
|
||||
|
||||
inline uint32_t elementSize(nvinfer1::DataType t)
|
||||
{
|
||||
switch (t)
|
||||
{
|
||||
case nvinfer1::DataType::kINT32:
|
||||
case nvinfer1::DataType::kFLOAT: return 4;
|
||||
case nvinfer1::DataType::kHALF: return 2;
|
||||
case nvinfer1::DataType::kINT8: return 1;
|
||||
case nvinfer1::DataType::kUINT8: return 1;
|
||||
case nvinfer1::DataType::kBOOL: return 1;
|
||||
case nvinfer1::DataType::kFP8: return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename A, typename B>
|
||||
inline A divUp(A x, B n)
|
||||
{
|
||||
return (x + n - 1) / n;
|
||||
}
|
||||
|
||||
inline int64_t volume(nvinfer1::Dims const& d)
|
||||
{
|
||||
return std::accumulate(d.d, d.d + d.nbDims, int64_t{1}, std::multiplies<int64_t>{});
|
||||
}
|
||||
|
||||
// Return m rounded up to nearest multiple of n
|
||||
template <typename T>
|
||||
inline T roundUp(T m, T n)
|
||||
{
|
||||
return ((m + n - 1) / n) * n;
|
||||
}
|
||||
|
||||
//! comps is the number of components in a vector. Ignored if vecDim < 0.
|
||||
inline int64_t volume(nvinfer1::Dims dims, int32_t vecDim, int32_t comps, int32_t batch)
|
||||
{
|
||||
if (vecDim >= 0)
|
||||
{
|
||||
dims.d[vecDim] = roundUp(dims.d[vecDim], comps);
|
||||
}
|
||||
return samplesCommon::volume(dims) * std::max(batch, 1);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \class TrtCudaGraphSafe
|
||||
//! \brief Managed CUDA graph
|
||||
//!
|
||||
class TrtCudaGraphSafe
|
||||
{
|
||||
public:
|
||||
explicit TrtCudaGraphSafe() = default;
|
||||
|
||||
TrtCudaGraphSafe(const TrtCudaGraphSafe&) = delete;
|
||||
|
||||
TrtCudaGraphSafe& operator=(const TrtCudaGraphSafe&) = delete;
|
||||
|
||||
TrtCudaGraphSafe(TrtCudaGraphSafe&&) = delete;
|
||||
|
||||
TrtCudaGraphSafe& operator=(TrtCudaGraphSafe&&) = delete;
|
||||
|
||||
~TrtCudaGraphSafe()
|
||||
{
|
||||
if (mGraphExec)
|
||||
{
|
||||
cudaGraphExecDestroy(mGraphExec);
|
||||
}
|
||||
}
|
||||
|
||||
void beginCapture(cudaStream_t& stream)
|
||||
{
|
||||
// cudaStreamCaptureModeGlobal is the only allowed mode in SAFE CUDA
|
||||
CHECK(cudaStreamBeginCapture(stream, cudaStreamCaptureModeGlobal));
|
||||
}
|
||||
|
||||
bool launch(cudaStream_t& stream)
|
||||
{
|
||||
return cudaGraphLaunch(mGraphExec, stream) == cudaSuccess;
|
||||
}
|
||||
|
||||
void endCapture(cudaStream_t& stream)
|
||||
{
|
||||
CHECK(cudaStreamEndCapture(stream, &mGraph));
|
||||
CHECK(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
|
||||
CHECK(cudaGraphDestroy(mGraph));
|
||||
}
|
||||
|
||||
void endCaptureOnError(cudaStream_t& stream)
|
||||
{
|
||||
// There are two possibilities why stream capture would fail:
|
||||
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
|
||||
// (2) TRT reports a failure.
|
||||
// In case (1), the returning mGraph should be nullptr.
|
||||
// In case (2), the returning mGraph is not nullptr, but it should not be used.
|
||||
const auto ret = cudaStreamEndCapture(stream, &mGraph);
|
||||
if (ret == cudaErrorStreamCaptureInvalidated)
|
||||
{
|
||||
SAFE_ASSERT(mGraph == nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
SAFE_ASSERT(ret == cudaSuccess);
|
||||
SAFE_ASSERT(mGraph != nullptr);
|
||||
CHECK(cudaGraphDestroy(mGraph));
|
||||
mGraph = nullptr;
|
||||
}
|
||||
// Clean up any CUDA error.
|
||||
cudaGetLastError();
|
||||
sample::gLogError << "The CUDA graph capture on the stream has failed." << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaGraph_t mGraph{};
|
||||
cudaGraphExec_t mGraphExec{};
|
||||
};
|
||||
|
||||
inline void safeLoadLibrary(const std::string& path)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
void* handle = LoadLibrary(path.c_str());
|
||||
#else
|
||||
int32_t flags{RTLD_LAZY};
|
||||
void* handle = dlopen(path.c_str(), flags);
|
||||
#endif
|
||||
if (handle == nullptr)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
sample::gLogError << "Could not load plugin library: " << path << std::endl;
|
||||
#else
|
||||
sample::gLogError << "Could not load plugin library: " << path << ", due to: " << dlerror() << std::endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
inline std::vector<std::string> safeSplitString(std::string str, char delimiter = ',')
|
||||
{
|
||||
std::vector<std::string> splitVect;
|
||||
std::stringstream ss(str);
|
||||
std::string substr;
|
||||
|
||||
while (ss.good())
|
||||
{
|
||||
getline(ss, substr, delimiter);
|
||||
splitVect.emplace_back(std::move(substr));
|
||||
}
|
||||
return splitVect;
|
||||
}
|
||||
|
||||
} // namespace samplesCommon
|
||||
|
||||
#endif // TENSORRT_SAFE_COMMON_H
|
@ -0,0 +1,338 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef SampleConfig_H
|
||||
#define SampleConfig_H
|
||||
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvOnnxConfig.h"
|
||||
class SampleConfig : public nvonnxparser::IOnnxConfig
|
||||
{
|
||||
public:
|
||||
enum class InputDataFormat : int
|
||||
{
|
||||
kASCII = 0,
|
||||
kPPM = 1
|
||||
};
|
||||
|
||||
private:
|
||||
std::string mModelFilename;
|
||||
std::string mEngineFilename;
|
||||
std::string mTextFilename;
|
||||
std::string mFullTextFilename;
|
||||
std::string mImageFilename;
|
||||
std::string mReferenceFilename;
|
||||
std::string mOutputFilename;
|
||||
std::string mCalibrationFilename;
|
||||
std::string mTimingCacheFilename;
|
||||
int64_t mLabel{-1};
|
||||
int64_t mMaxBatchSize{32};
|
||||
int64_t mCalibBatchSize{0};
|
||||
int64_t mMaxNCalibBatch{0};
|
||||
int64_t mFirstCalibBatch{0};
|
||||
int64_t mUseDLACore{-1};
|
||||
nvinfer1::DataType mModelDtype{nvinfer1::DataType::kFLOAT};
|
||||
bool mTF32{true};
|
||||
Verbosity mVerbosity{static_cast<int>(nvinfer1::ILogger::Severity::kWARNING)};
|
||||
bool mPrintLayercInfo{false};
|
||||
bool mDebugBuilder{false};
|
||||
InputDataFormat mInputDataFormat{InputDataFormat::kASCII};
|
||||
uint64_t mTopK{0};
|
||||
float mFailurePercentage{-1.0f};
|
||||
float mTolerance{0.0f};
|
||||
float mAbsTolerance{1e-5f};
|
||||
|
||||
public:
|
||||
SampleConfig()
|
||||
{
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug())
|
||||
{
|
||||
std::cout << " SampleConfig::ctor(): " << this << "\t" << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
~SampleConfig() override
|
||||
{
|
||||
#ifdef ONNX_DEBUG
|
||||
if (isDebug())
|
||||
{
|
||||
std::cout << "SampleConfig::dtor(): " << this << std::endl;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
void setModelDtype(const nvinfer1::DataType mdt) noexcept override
|
||||
{
|
||||
mModelDtype = mdt;
|
||||
}
|
||||
|
||||
nvinfer1::DataType getModelDtype() const noexcept override
|
||||
{
|
||||
return mModelDtype;
|
||||
}
|
||||
|
||||
bool getTF32() const noexcept
|
||||
{
|
||||
return mTF32;
|
||||
}
|
||||
|
||||
void setTF32(bool enabled) noexcept
|
||||
{
|
||||
mTF32 = enabled;
|
||||
}
|
||||
|
||||
const char* getModelFileName() const noexcept override
|
||||
{
|
||||
return mModelFilename.c_str();
|
||||
}
|
||||
|
||||
void setModelFileName(const char* onnxFilename) noexcept override
|
||||
{
|
||||
mModelFilename = std::string(onnxFilename);
|
||||
}
|
||||
Verbosity getVerbosityLevel() const noexcept override
|
||||
{
|
||||
return mVerbosity;
|
||||
}
|
||||
void addVerbosity() noexcept override
|
||||
{
|
||||
++mVerbosity;
|
||||
}
|
||||
void reduceVerbosity() noexcept override
|
||||
{
|
||||
--mVerbosity;
|
||||
}
|
||||
void setVerbosityLevel(Verbosity v) noexcept override
|
||||
{
|
||||
mVerbosity = v;
|
||||
}
|
||||
const char* getEngineFileName() const noexcept
|
||||
{
|
||||
return mEngineFilename.c_str();
|
||||
}
|
||||
void setEngineFileName(const char* engineFilename) noexcept
|
||||
{
|
||||
mEngineFilename = std::string(engineFilename);
|
||||
}
|
||||
const char* getTextFileName() const noexcept override
|
||||
{
|
||||
return mTextFilename.c_str();
|
||||
}
|
||||
void setTextFileName(const char* textFilename) noexcept override
|
||||
{
|
||||
mTextFilename = std::string(textFilename);
|
||||
}
|
||||
const char* getFullTextFileName() const noexcept override
|
||||
{
|
||||
return mFullTextFilename.c_str();
|
||||
}
|
||||
void setFullTextFileName(const char* fullTextFilename) noexcept override
|
||||
{
|
||||
mFullTextFilename = std::string(fullTextFilename);
|
||||
}
|
||||
void setLabel(int64_t label) noexcept
|
||||
{
|
||||
mLabel = label;
|
||||
} //!< set the Label
|
||||
|
||||
int64_t getLabel() const noexcept
|
||||
{
|
||||
return mLabel;
|
||||
} //!< get the Label
|
||||
|
||||
bool getPrintLayerInfo() const noexcept override
|
||||
{
|
||||
return mPrintLayercInfo;
|
||||
}
|
||||
|
||||
void setPrintLayerInfo(bool b) noexcept override
|
||||
{
|
||||
mPrintLayercInfo = b;
|
||||
} //!< get the boolean variable corresponding to the Layer Info, see getPrintLayerInfo()
|
||||
|
||||
void setMaxBatchSize(int64_t maxBatchSize) noexcept
|
||||
{
|
||||
mMaxBatchSize = maxBatchSize;
|
||||
} //!< set the Max Batch Size
|
||||
int64_t getMaxBatchSize() const noexcept
|
||||
{
|
||||
return mMaxBatchSize;
|
||||
} //!< get the Max Batch Size
|
||||
|
||||
void setCalibBatchSize(int64_t CalibBatchSize) noexcept
|
||||
{
|
||||
mCalibBatchSize = CalibBatchSize;
|
||||
} //!< set the calibration batch size
|
||||
int64_t getCalibBatchSize() const noexcept
|
||||
{
|
||||
return mCalibBatchSize;
|
||||
} //!< get calibration batch size
|
||||
|
||||
void setMaxNCalibBatch(int64_t MaxNCalibBatch) noexcept
|
||||
{
|
||||
mMaxNCalibBatch = MaxNCalibBatch;
|
||||
} //!< set Max Number of Calibration Batches
|
||||
int64_t getMaxNCalibBatch() const noexcept
|
||||
{
|
||||
return mMaxNCalibBatch;
|
||||
} //!< get the Max Number of Calibration Batches
|
||||
|
||||
void setFirstCalibBatch(int64_t FirstCalibBatch) noexcept
|
||||
{
|
||||
mFirstCalibBatch = FirstCalibBatch;
|
||||
} //!< set the first calibration batch
|
||||
int64_t getFirstCalibBatch() const noexcept
|
||||
{
|
||||
return mFirstCalibBatch;
|
||||
} //!< get the first calibration batch
|
||||
|
||||
void setUseDLACore(int64_t UseDLACore) noexcept
|
||||
{
|
||||
mUseDLACore = UseDLACore;
|
||||
} //!< set the DLA core to use
|
||||
int64_t getUseDLACore() const noexcept
|
||||
{
|
||||
return mUseDLACore;
|
||||
} //!< get the DLA core to use
|
||||
|
||||
void setDebugBuilder() noexcept
|
||||
{
|
||||
mDebugBuilder = true;
|
||||
} //!< enable the Debug info, while building the engine.
|
||||
bool getDebugBuilder() const noexcept
|
||||
{
|
||||
return mDebugBuilder;
|
||||
} //!< get the boolean variable, corresponding to the debug builder
|
||||
|
||||
const char* getImageFileName() const noexcept //!< set Image file name (PPM or ASCII)
|
||||
{
|
||||
return mImageFilename.c_str();
|
||||
}
|
||||
void setImageFileName(const char* imageFilename) noexcept //!< get the Image file name
|
||||
{
|
||||
mImageFilename = std::string(imageFilename);
|
||||
}
|
||||
const char* getReferenceFileName() const noexcept
|
||||
{
|
||||
return mReferenceFilename.c_str();
|
||||
}
|
||||
void setReferenceFileName(const char* referenceFilename) noexcept //!< set reference file name
|
||||
{
|
||||
mReferenceFilename = std::string(referenceFilename);
|
||||
}
|
||||
|
||||
void setInputDataFormat(InputDataFormat idt) noexcept
|
||||
{
|
||||
mInputDataFormat = idt;
|
||||
} //!< specifies expected data format of the image file (PPM or ASCII)
|
||||
InputDataFormat getInputDataFormat() const noexcept
|
||||
{
|
||||
return mInputDataFormat;
|
||||
} //!< returns the expected data format of the image file.
|
||||
|
||||
const char* getOutputFileName() const noexcept //!< specifies the file to save the results
|
||||
{
|
||||
return mOutputFilename.c_str();
|
||||
}
|
||||
void setOutputFileName(const char* outputFilename) noexcept //!< get the output file name
|
||||
{
|
||||
mOutputFilename = std::string(outputFilename);
|
||||
}
|
||||
|
||||
const char* getCalibrationFileName() const noexcept
|
||||
{
|
||||
return mCalibrationFilename.c_str();
|
||||
} //!< specifies the file containing the list of image files for int8 calibration
|
||||
void setCalibrationFileName(const char* calibrationFilename) noexcept //!< get the int 8 calibration list file name
|
||||
{
|
||||
mCalibrationFilename = std::string(calibrationFilename);
|
||||
}
|
||||
|
||||
uint64_t getTopK() const noexcept
|
||||
{
|
||||
return mTopK;
|
||||
}
|
||||
void setTopK(uint64_t topK) noexcept
|
||||
{
|
||||
mTopK = topK;
|
||||
} //!< If this options is specified, return the K top probabilities.
|
||||
|
||||
float getFailurePercentage() const noexcept
|
||||
{
|
||||
return mFailurePercentage;
|
||||
}
|
||||
|
||||
void setFailurePercentage(float f) noexcept
|
||||
{
|
||||
mFailurePercentage = f;
|
||||
}
|
||||
|
||||
float getAbsoluteTolerance() const noexcept
|
||||
{
|
||||
return mAbsTolerance;
|
||||
}
|
||||
|
||||
void setAbsoluteTolerance(float a) noexcept
|
||||
{
|
||||
mAbsTolerance = a;
|
||||
}
|
||||
|
||||
float getTolerance() const noexcept
|
||||
{
|
||||
return mTolerance;
|
||||
}
|
||||
|
||||
void setTolerance(float t) noexcept
|
||||
{
|
||||
mTolerance = t;
|
||||
}
|
||||
|
||||
const char* getTimingCacheFilename() const noexcept
|
||||
{
|
||||
return mTimingCacheFilename.c_str();
|
||||
}
|
||||
|
||||
void setTimingCacheFileName(const char* timingCacheFilename) noexcept
|
||||
{
|
||||
mTimingCacheFilename = std::string(timingCacheFilename);
|
||||
}
|
||||
|
||||
bool isDebug() const noexcept
|
||||
{
|
||||
#if ONNX_DEBUG
|
||||
return (std::getenv("ONNX_DEBUG") ? true : false);
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void destroy() noexcept override
|
||||
{
|
||||
delete this;
|
||||
}
|
||||
|
||||
}; // class SampleConfig
|
||||
|
||||
#endif
|
@ -0,0 +1,554 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_DEVICE_H
|
||||
#define TRT_SAMPLE_DEVICE_H
|
||||
|
||||
#include <cassert>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
inline void cudaCheck(cudaError_t ret, std::ostream& err = std::cerr)
|
||||
{
|
||||
if (ret != cudaSuccess)
|
||||
{
|
||||
err << "Cuda failure: " << cudaGetErrorString(ret) << std::endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
class TrtCudaEvent;
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void cudaSleep(void* sleep)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::duration<float, std::milli>(*static_cast<float*>(sleep)));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
//!
|
||||
//! \class TrtCudaStream
|
||||
//! \brief Managed CUDA stream
|
||||
//!
|
||||
class TrtCudaStream
|
||||
{
|
||||
public:
|
||||
TrtCudaStream()
|
||||
{
|
||||
cudaCheck(cudaStreamCreate(&mStream));
|
||||
}
|
||||
|
||||
TrtCudaStream(const TrtCudaStream&) = delete;
|
||||
|
||||
TrtCudaStream& operator=(const TrtCudaStream&) = delete;
|
||||
|
||||
TrtCudaStream(TrtCudaStream&&) = delete;
|
||||
|
||||
TrtCudaStream& operator=(TrtCudaStream&&) = delete;
|
||||
|
||||
~TrtCudaStream()
|
||||
{
|
||||
cudaCheck(cudaStreamDestroy(mStream));
|
||||
}
|
||||
|
||||
cudaStream_t get() const
|
||||
{
|
||||
return mStream;
|
||||
}
|
||||
|
||||
void synchronize()
|
||||
{
|
||||
cudaCheck(cudaStreamSynchronize(mStream));
|
||||
}
|
||||
|
||||
void wait(TrtCudaEvent& event);
|
||||
|
||||
void sleep(float* ms)
|
||||
{
|
||||
cudaCheck(cudaLaunchHostFunc(mStream, cudaSleep, ms));
|
||||
}
|
||||
|
||||
private:
|
||||
cudaStream_t mStream{};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class TrtCudaEvent
|
||||
//! \brief Managed CUDA event
|
||||
//!
|
||||
class TrtCudaEvent
|
||||
{
|
||||
public:
|
||||
explicit TrtCudaEvent(bool blocking = true)
|
||||
{
|
||||
const uint32_t flags = blocking ? cudaEventBlockingSync : cudaEventDefault;
|
||||
cudaCheck(cudaEventCreateWithFlags(&mEvent, flags));
|
||||
}
|
||||
|
||||
TrtCudaEvent(const TrtCudaEvent&) = delete;
|
||||
|
||||
TrtCudaEvent& operator=(const TrtCudaEvent&) = delete;
|
||||
|
||||
TrtCudaEvent(TrtCudaEvent&&) = delete;
|
||||
|
||||
TrtCudaEvent& operator=(TrtCudaEvent&&) = delete;
|
||||
|
||||
~TrtCudaEvent()
|
||||
{
|
||||
cudaCheck(cudaEventDestroy(mEvent));
|
||||
}
|
||||
|
||||
cudaEvent_t get() const
|
||||
{
|
||||
return mEvent;
|
||||
}
|
||||
|
||||
void record(const TrtCudaStream& stream)
|
||||
{
|
||||
cudaCheck(cudaEventRecord(mEvent, stream.get()));
|
||||
}
|
||||
|
||||
void synchronize()
|
||||
{
|
||||
cudaCheck(cudaEventSynchronize(mEvent));
|
||||
}
|
||||
|
||||
// Returns time elapsed time in milliseconds
|
||||
float operator-(const TrtCudaEvent& e) const
|
||||
{
|
||||
float time{0};
|
||||
cudaCheck(cudaEventElapsedTime(&time, e.get(), get()));
|
||||
return time;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaEvent_t mEvent{};
|
||||
};
|
||||
|
||||
inline void TrtCudaStream::wait(TrtCudaEvent& event)
|
||||
{
|
||||
cudaCheck(cudaStreamWaitEvent(mStream, event.get(), 0));
|
||||
}
|
||||
|
||||
//!
|
||||
//! \class TrtCudaGraph
|
||||
//! \brief Managed CUDA graph
|
||||
//!
|
||||
class TrtCudaGraph
|
||||
{
|
||||
public:
|
||||
explicit TrtCudaGraph() = default;
|
||||
|
||||
TrtCudaGraph(const TrtCudaGraph&) = delete;
|
||||
|
||||
TrtCudaGraph& operator=(const TrtCudaGraph&) = delete;
|
||||
|
||||
TrtCudaGraph(TrtCudaGraph&&) = delete;
|
||||
|
||||
TrtCudaGraph& operator=(TrtCudaGraph&&) = delete;
|
||||
|
||||
~TrtCudaGraph()
|
||||
{
|
||||
if (mGraphExec)
|
||||
{
|
||||
cudaGraphExecDestroy(mGraphExec);
|
||||
}
|
||||
}
|
||||
|
||||
void beginCapture(TrtCudaStream& stream)
|
||||
{
|
||||
cudaCheck(cudaStreamBeginCapture(stream.get(), cudaStreamCaptureModeThreadLocal));
|
||||
}
|
||||
|
||||
bool launch(TrtCudaStream& stream)
|
||||
{
|
||||
return cudaGraphLaunch(mGraphExec, stream.get()) == cudaSuccess;
|
||||
}
|
||||
|
||||
void endCapture(TrtCudaStream& stream)
|
||||
{
|
||||
cudaCheck(cudaStreamEndCapture(stream.get(), &mGraph));
|
||||
cudaCheck(cudaGraphInstantiate(&mGraphExec, mGraph, nullptr, nullptr, 0));
|
||||
cudaCheck(cudaGraphDestroy(mGraph));
|
||||
}
|
||||
|
||||
void endCaptureOnError(TrtCudaStream& stream)
|
||||
{
|
||||
// There are two possibilities why stream capture would fail:
|
||||
// (1) stream is in cudaErrorStreamCaptureInvalidated state.
|
||||
// (2) TRT reports a failure.
|
||||
// In case (1), the returning mGraph should be nullptr.
|
||||
// In case (2), the returning mGraph is not nullptr, but it should not be used.
|
||||
const auto ret = cudaStreamEndCapture(stream.get(), &mGraph);
|
||||
if (ret == cudaErrorStreamCaptureInvalidated)
|
||||
{
|
||||
assert(mGraph == nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(ret == cudaSuccess);
|
||||
assert(mGraph != nullptr);
|
||||
cudaCheck(cudaGraphDestroy(mGraph));
|
||||
mGraph = nullptr;
|
||||
}
|
||||
// Clean up any CUDA error.
|
||||
cudaGetLastError();
|
||||
sample::gLogWarning << "The CUDA graph capture on the stream has failed." << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
cudaGraph_t mGraph{};
|
||||
cudaGraphExec_t mGraphExec{};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class TrtCudaBuffer
|
||||
//! \brief Managed buffer for host and device
|
||||
//!
|
||||
template <typename A, typename D>
|
||||
class TrtCudaBuffer
|
||||
{
|
||||
public:
|
||||
TrtCudaBuffer() = default;
|
||||
|
||||
TrtCudaBuffer(const TrtCudaBuffer&) = delete;
|
||||
|
||||
TrtCudaBuffer& operator=(const TrtCudaBuffer&) = delete;
|
||||
|
||||
TrtCudaBuffer(TrtCudaBuffer&& rhs)
|
||||
{
|
||||
reset(rhs.mPtr);
|
||||
rhs.mPtr = nullptr;
|
||||
}
|
||||
|
||||
TrtCudaBuffer& operator=(TrtCudaBuffer&& rhs)
|
||||
{
|
||||
if (this != &rhs)
|
||||
{
|
||||
reset(rhs.mPtr);
|
||||
rhs.mPtr = nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~TrtCudaBuffer()
|
||||
{
|
||||
reset();
|
||||
}
|
||||
|
||||
TrtCudaBuffer(size_t size)
|
||||
{
|
||||
A()(&mPtr, size);
|
||||
}
|
||||
|
||||
void allocate(size_t size)
|
||||
{
|
||||
reset();
|
||||
A()(&mPtr, size);
|
||||
}
|
||||
|
||||
void reset(void* ptr = nullptr)
|
||||
{
|
||||
if (mPtr)
|
||||
{
|
||||
D()(mPtr);
|
||||
}
|
||||
mPtr = ptr;
|
||||
}
|
||||
|
||||
void* get() const
|
||||
{
|
||||
return mPtr;
|
||||
}
|
||||
|
||||
private:
|
||||
void* mPtr{nullptr};
|
||||
};
|
||||
|
||||
struct DeviceAllocator
|
||||
{
|
||||
void operator()(void** ptr, size_t size)
|
||||
{
|
||||
cudaCheck(cudaMalloc(ptr, size));
|
||||
}
|
||||
};
|
||||
|
||||
struct DeviceDeallocator
|
||||
{
|
||||
void operator()(void* ptr)
|
||||
{
|
||||
cudaCheck(cudaFree(ptr));
|
||||
}
|
||||
};
|
||||
|
||||
struct ManagedAllocator
|
||||
{
|
||||
void operator()(void** ptr, size_t size)
|
||||
{
|
||||
cudaCheck(cudaMallocManaged(ptr, size));
|
||||
}
|
||||
};
|
||||
|
||||
struct HostAllocator
|
||||
{
|
||||
void operator()(void** ptr, size_t size)
|
||||
{
|
||||
cudaCheck(cudaMallocHost(ptr, size));
|
||||
}
|
||||
};
|
||||
|
||||
struct HostDeallocator
|
||||
{
|
||||
void operator()(void* ptr)
|
||||
{
|
||||
cudaCheck(cudaFreeHost(ptr));
|
||||
}
|
||||
};
|
||||
|
||||
using TrtDeviceBuffer = TrtCudaBuffer<DeviceAllocator, DeviceDeallocator>;
|
||||
using TrtManagedBuffer = TrtCudaBuffer<ManagedAllocator, DeviceDeallocator>;
|
||||
|
||||
using TrtHostBuffer = TrtCudaBuffer<HostAllocator, HostDeallocator>;
|
||||
|
||||
//!
|
||||
//! \class MirroredBuffer
|
||||
//! \brief Coupled host and device buffers
|
||||
//!
|
||||
class IMirroredBuffer
|
||||
{
|
||||
public:
|
||||
//!
|
||||
//! Allocate memory for the mirrored buffer give the size
|
||||
//! of the allocation.
|
||||
//!
|
||||
virtual void allocate(size_t size) = 0;
|
||||
|
||||
//!
|
||||
//! Get the pointer to the device side buffer.
|
||||
//!
|
||||
//! \return pointer to device memory or nullptr if uninitialized.
|
||||
//!
|
||||
virtual void* getDeviceBuffer() const = 0;
|
||||
|
||||
//!
|
||||
//! Get the pointer to the host side buffer.
|
||||
//!
|
||||
//! \return pointer to host memory or nullptr if uninitialized.
|
||||
//!
|
||||
virtual void* getHostBuffer() const = 0;
|
||||
|
||||
//!
|
||||
//! Copy the memory from host to device.
|
||||
//!
|
||||
virtual void hostToDevice(TrtCudaStream& stream) = 0;
|
||||
|
||||
//!
|
||||
//! Copy the memory from device to host.
|
||||
//!
|
||||
virtual void deviceToHost(TrtCudaStream& stream) = 0;
|
||||
|
||||
//!
|
||||
//! Interface to get the size of the memory
|
||||
//!
|
||||
//! \return the size of memory allocated.
|
||||
//!
|
||||
virtual size_t getSize() const = 0;
|
||||
|
||||
//!
|
||||
//! Virtual destructor declaraion
|
||||
//!
|
||||
virtual ~IMirroredBuffer() = default;
|
||||
|
||||
}; // class IMirroredBuffer
|
||||
|
||||
//!
|
||||
//! Class to have a separate memory buffer for discrete device and host allocations.
|
||||
//!
|
||||
class DiscreteMirroredBuffer : public IMirroredBuffer
|
||||
{
|
||||
public:
|
||||
void allocate(size_t size) override
|
||||
{
|
||||
mSize = size;
|
||||
mHostBuffer.allocate(size);
|
||||
mDeviceBuffer.allocate(size);
|
||||
}
|
||||
|
||||
void* getDeviceBuffer() const override
|
||||
{
|
||||
return mDeviceBuffer.get();
|
||||
}
|
||||
|
||||
void* getHostBuffer() const override
|
||||
{
|
||||
return mHostBuffer.get();
|
||||
}
|
||||
|
||||
void hostToDevice(TrtCudaStream& stream) override
|
||||
{
|
||||
cudaCheck(cudaMemcpyAsync(mDeviceBuffer.get(), mHostBuffer.get(), mSize, cudaMemcpyHostToDevice, stream.get()));
|
||||
}
|
||||
|
||||
void deviceToHost(TrtCudaStream& stream) override
|
||||
{
|
||||
cudaCheck(cudaMemcpyAsync(mHostBuffer.get(), mDeviceBuffer.get(), mSize, cudaMemcpyDeviceToHost, stream.get()));
|
||||
}
|
||||
|
||||
size_t getSize() const override
|
||||
{
|
||||
return mSize;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t mSize{0};
|
||||
TrtHostBuffer mHostBuffer;
|
||||
TrtDeviceBuffer mDeviceBuffer;
|
||||
}; // class DiscreteMirroredBuffer
|
||||
|
||||
//!
|
||||
//! Class to have a unified memory buffer for embedded devices.
|
||||
//!
|
||||
class UnifiedMirroredBuffer : public IMirroredBuffer
|
||||
{
|
||||
public:
|
||||
void allocate(size_t size) override
|
||||
{
|
||||
mSize = size;
|
||||
mBuffer.allocate(size);
|
||||
}
|
||||
|
||||
void* getDeviceBuffer() const override
|
||||
{
|
||||
return mBuffer.get();
|
||||
}
|
||||
|
||||
void* getHostBuffer() const override
|
||||
{
|
||||
return mBuffer.get();
|
||||
}
|
||||
|
||||
void hostToDevice(TrtCudaStream& stream) override
|
||||
{
|
||||
// Does nothing since we are using unified memory.
|
||||
}
|
||||
|
||||
void deviceToHost(TrtCudaStream& stream) override
|
||||
{
|
||||
// Does nothing since we are using unified memory.
|
||||
}
|
||||
|
||||
size_t getSize() const override
|
||||
{
|
||||
return mSize;
|
||||
}
|
||||
|
||||
private:
|
||||
size_t mSize{0};
|
||||
TrtManagedBuffer mBuffer;
|
||||
}; // class UnifiedMirroredBuffer
|
||||
|
||||
//!
|
||||
//! Class to allocate memory for outputs with data-dependent shapes. The sizes of those are unknown so pre-allocation is
|
||||
//! not possible.
|
||||
//!
|
||||
class OutputAllocator : public nvinfer1::IOutputAllocator
|
||||
{
|
||||
public:
|
||||
OutputAllocator(IMirroredBuffer* buffer)
|
||||
: mBuffer(buffer)
|
||||
{
|
||||
}
|
||||
|
||||
void* reallocateOutput(
|
||||
char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override
|
||||
{
|
||||
// Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr
|
||||
// even for empty tensors, so allocate a dummy byte.
|
||||
size = std::max(size, static_cast<uint64_t>(1));
|
||||
if (size > mSize)
|
||||
{
|
||||
mBuffer->allocate(roundUp(size, alignment));
|
||||
mSize = size;
|
||||
}
|
||||
return mBuffer->getDeviceBuffer();
|
||||
}
|
||||
|
||||
void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override {}
|
||||
|
||||
IMirroredBuffer* getBuffer()
|
||||
{
|
||||
return mBuffer.get();
|
||||
}
|
||||
|
||||
~OutputAllocator() override {}
|
||||
|
||||
private:
|
||||
std::unique_ptr<IMirroredBuffer> mBuffer;
|
||||
uint64_t mSize{};
|
||||
};
|
||||
|
||||
inline void setCudaDevice(int device, std::ostream& os)
|
||||
{
|
||||
cudaCheck(cudaSetDevice(device));
|
||||
|
||||
cudaDeviceProp properties;
|
||||
cudaCheck(cudaGetDeviceProperties(&properties, device));
|
||||
|
||||
// clang-format off
|
||||
os << "=== Device Information ===" << std::endl;
|
||||
os << "Selected Device: " << properties.name << std::endl;
|
||||
os << "Compute Capability: " << properties.major << "." << properties.minor << std::endl;
|
||||
os << "SMs: " << properties.multiProcessorCount << std::endl;
|
||||
os << "Device Global Memory: " << (properties.totalGlobalMem >> 20) << " MiB" << std::endl;
|
||||
os << "Shared Memory per SM: " << (properties.sharedMemPerMultiprocessor >> 10) << " KiB" << std::endl;
|
||||
os << "Memory Bus Width: " << properties.memoryBusWidth << " bits"
|
||||
<< " (ECC " << (properties.ECCEnabled != 0 ? "enabled" : "disabled") << ")" << std::endl;
|
||||
os << "Application Compute Clock Rate: " << properties.clockRate / 1000000.0F << " GHz" << std::endl;
|
||||
os << "Application Memory Clock Rate: " << properties.memoryClockRate / 1000000.0F << " GHz" << std::endl;
|
||||
os << std::endl;
|
||||
os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is "
|
||||
<< "currently running at." << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
inline int32_t getCudaDriverVersion()
|
||||
{
|
||||
int32_t version{-1};
|
||||
cudaCheck(cudaDriverGetVersion(&version));
|
||||
return version;
|
||||
}
|
||||
|
||||
inline int32_t getCudaRuntimeVersion()
|
||||
{
|
||||
int32_t version{-1};
|
||||
cudaCheck(cudaRuntimeGetVersion(&version));
|
||||
return version;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_DEVICE_H
|
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_ENGINES_H
|
||||
#define TRT_SAMPLE_ENGINES_H
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "NvCaffeParser.h"
|
||||
#include "NvInfer.h"
|
||||
#include "NvInferConsistency.h"
|
||||
#include "NvInferSafeRuntime.h"
|
||||
#include "NvOnnxParser.h"
|
||||
#include "NvUffParser.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
struct Parser
|
||||
{
|
||||
std::unique_ptr<nvcaffeparser1::ICaffeParser> caffeParser;
|
||||
std::unique_ptr<nvuffparser::IUffParser> uffParser;
|
||||
std::unique_ptr<nvonnxparser::IParser> onnxParser;
|
||||
|
||||
operator bool() const
|
||||
{
|
||||
return caffeParser || uffParser || onnxParser;
|
||||
}
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief A helper class to hold a serialized engine (std or safe) and only deserialize it when being accessed.
|
||||
//!
|
||||
class LazilyDeserializedEngine
|
||||
{
|
||||
public:
|
||||
//!
|
||||
//! \brief Delete default constructor to make sure isSafe and DLACore are always set.
|
||||
//!
|
||||
LazilyDeserializedEngine() = delete;
|
||||
|
||||
//!
|
||||
//! \brief Constructor of LazilyDeserializedEngine.
|
||||
//!
|
||||
LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
|
||||
nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath)
|
||||
: mIsSafe(isSafe)
|
||||
, mVersionCompatible(versionCompatible)
|
||||
, mDLACore(DLACore)
|
||||
, mTempdir(tempdir)
|
||||
, mTempfileControls(tempfileControls)
|
||||
, mLeanDLLPath(leanDLLPath)
|
||||
{
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Move from another LazilyDeserializedEngine.
|
||||
//!
|
||||
LazilyDeserializedEngine(LazilyDeserializedEngine&& other)
|
||||
{
|
||||
mIsSafe = other.mIsSafe;
|
||||
mVersionCompatible = other.mVersionCompatible;
|
||||
mDLACore = other.mDLACore;
|
||||
mEngineBlob = std::move(other.mEngineBlob);
|
||||
mEngine = std::move(other.mEngine);
|
||||
mSafeEngine = std::move(other.mSafeEngine);
|
||||
mTempdir = std::move(other.mTempdir);
|
||||
mTempfileControls = other.mTempfileControls;
|
||||
mLeanDLLPath = std::move(other.mLeanDLLPath);
|
||||
mDynamicPlugins = std::move(other.mDynamicPlugins);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Delete copy constructor.
|
||||
//!
|
||||
LazilyDeserializedEngine(LazilyDeserializedEngine const& other) = delete;
|
||||
|
||||
//!
|
||||
//! \brief Get the pointer to the ICudaEngine. Triggers deserialization if not already done so.
|
||||
//!
|
||||
nvinfer1::ICudaEngine* get();
|
||||
|
||||
//!
|
||||
//! \brief Get the pointer to the ICudaEngine and release the ownership.
|
||||
//!
|
||||
nvinfer1::ICudaEngine* release();
|
||||
|
||||
//!
|
||||
//! \brief Get the pointer to the safe::ICudaEngine. Triggers deserialization if not already done so.
|
||||
//!
|
||||
nvinfer1::safe::ICudaEngine* getSafe();
|
||||
|
||||
//!
|
||||
//! \brief Get the underlying blob storing serialized engine.
|
||||
//!
|
||||
std::vector<uint8_t> const& getBlob() const
|
||||
{
|
||||
return mEngineBlob;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Set the underlying blob storing serialized engine.
|
||||
//!
|
||||
void setBlob(void* data, size_t size)
|
||||
{
|
||||
mEngineBlob.resize(size);
|
||||
std::memcpy(mEngineBlob.data(), data, size);
|
||||
mEngine.reset();
|
||||
mSafeEngine.reset();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Release the underlying blob without deleting the deserialized engine.
|
||||
//!
|
||||
void releaseBlob()
|
||||
{
|
||||
mEngineBlob.clear();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Get if safe mode is enabled.
|
||||
//!
|
||||
bool isSafe()
|
||||
{
|
||||
return mIsSafe;
|
||||
}
|
||||
|
||||
void setDynamicPlugins(std::vector<std::string> const& dynamicPlugins)
|
||||
{
|
||||
mDynamicPlugins = dynamicPlugins;
|
||||
}
|
||||
|
||||
private:
|
||||
bool mIsSafe{false};
|
||||
bool mVersionCompatible{false};
|
||||
int32_t mDLACore{-1};
|
||||
std::vector<uint8_t> mEngineBlob;
|
||||
|
||||
std::string mTempdir{};
|
||||
nvinfer1::TempfileControlFlags mTempfileControls{getTempfileControlDefaults()};
|
||||
std::string mLeanDLLPath{};
|
||||
std::vector<std::string> mDynamicPlugins;
|
||||
|
||||
//! \name Owned TensorRT objects
|
||||
//! Per TensorRT object lifetime requirements as outlined in the developer guide,
|
||||
//! the runtime must remain live while any engines created by the runtime are live.
|
||||
//! DO NOT ADJUST the declaration order here: runtime -> (engine|safeEngine).
|
||||
//! Destruction occurs in reverse declaration order: (engine|safeEngine) -> runtime.
|
||||
//!@{
|
||||
|
||||
//! The runtime used to track parent of mRuntime if one exists.
|
||||
//! Needed to load mRuntime if lean.so is supplied through file system path.
|
||||
std::unique_ptr<nvinfer1::IRuntime> mParentRuntime{};
|
||||
|
||||
//! The runtime that is used to deserialize the engine.
|
||||
std::unique_ptr<nvinfer1::IRuntime> mRuntime{};
|
||||
|
||||
//! If mIsSafe is false, this points to the deserialized std engine
|
||||
std::unique_ptr<nvinfer1::ICudaEngine> mEngine{};
|
||||
|
||||
//! If mIsSafe is true, this points to the deserialized safe engine
|
||||
std::unique_ptr<nvinfer1::safe::ICudaEngine> mSafeEngine{};
|
||||
|
||||
//!@}
|
||||
};
|
||||
|
||||
struct BuildEnvironment
|
||||
{
|
||||
BuildEnvironment() = delete;
|
||||
BuildEnvironment(BuildEnvironment const& other) = delete;
|
||||
BuildEnvironment(BuildEnvironment&& other) = delete;
|
||||
BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir,
|
||||
nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "")
|
||||
: engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath)
|
||||
{
|
||||
}
|
||||
|
||||
//! \name Owned TensorRT objects
|
||||
//! Per TensorRT object lifetime requirements as outlined in the developer guide,
|
||||
//! factory objects must remain live while the objects created by those factories
|
||||
//! are live (with the exception of builder -> engine).
|
||||
//! DO NOT ADJUST the declaration order here: builder -> network -> parser.
|
||||
//! Destruction occurs in reverse declaration order: parser -> network -> builder.
|
||||
//!@{
|
||||
|
||||
//! The builder used to build the engine.
|
||||
std::unique_ptr<nvinfer1::IBuilder> builder;
|
||||
|
||||
//! The network used by the builder.
|
||||
std::unique_ptr<nvinfer1::INetworkDefinition> network;
|
||||
|
||||
//! The parser used to specify the network.
|
||||
Parser parser;
|
||||
|
||||
//! The engine.
|
||||
LazilyDeserializedEngine engine;
|
||||
//!@}
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Set up network and config
|
||||
//!
|
||||
//! \return boolean Return true if network and config were successfully set
|
||||
//!
|
||||
bool setupNetworkAndConfig(const BuildOptions& build, const SystemOptions& sys, nvinfer1::IBuilder& builder,
|
||||
nvinfer1::INetworkDefinition& network, nvinfer1::IBuilderConfig& config, std::ostream& err,
|
||||
std::vector<std::vector<char>>& sparseWeights);
|
||||
|
||||
//!
|
||||
//! \brief Log refittable layers and weights of a refittable engine
|
||||
//!
|
||||
void dumpRefittable(nvinfer1::ICudaEngine& engine);
|
||||
|
||||
//!
|
||||
//! \brief Load a serialized engine
|
||||
//!
|
||||
//! \return Pointer to the engine loaded or nullptr if the operation failed
|
||||
//!
|
||||
nvinfer1::ICudaEngine* loadEngine(std::string const& engine, int32_t DLACore, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Save an engine into a file
|
||||
//!
|
||||
//! \return boolean Return true if the engine was successfully saved
|
||||
//!
|
||||
bool saveEngine(nvinfer1::ICudaEngine const& engine, std::string const& fileName, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Create an engine from model or serialized file, and optionally save engine
|
||||
//!
|
||||
//! \return Pointer to the engine created or nullptr if the creation failed
|
||||
//!
|
||||
bool getEngineBuildEnv(
|
||||
ModelOptions const& model, BuildOptions const& build, SystemOptions& sys, BuildEnvironment& env, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Create a serialized network
|
||||
//!
|
||||
//! \return Pointer to a host memory for a serialized network
|
||||
//!
|
||||
nvinfer1::IHostMemory* networkToSerialized(const BuildOptions& build, const SystemOptions& sys,
|
||||
nvinfer1::IBuilder& builder, nvinfer1::INetworkDefinition& network, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Tranfer model to a serialized network
|
||||
//!
|
||||
//! \return Pointer to a host memory for a serialized network
|
||||
//!
|
||||
nvinfer1::IHostMemory* modelToSerialized(
|
||||
const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
|
||||
|
||||
//!
|
||||
//! \brief Serialize network and save it into a file
|
||||
//!
|
||||
//! \return boolean Return true if the network was successfully serialized and saved
|
||||
//!
|
||||
bool serializeAndSave(
|
||||
const ModelOptions& model, const BuildOptions& build, const SystemOptions& sys, std::ostream& err);
|
||||
|
||||
bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngine& engine, bool multiThreading);
|
||||
|
||||
//!
|
||||
//! \brief Set tensor scales from a calibration table
|
||||
//!
|
||||
void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector<IOFormat> const& inputFormats,
|
||||
std::vector<IOFormat> const& outputFormats, std::string const& calibrationFile);
|
||||
|
||||
//!
|
||||
//! \brief Check if safe runtime is loaded.
|
||||
//!
|
||||
bool hasSafeRuntime();
|
||||
|
||||
//!
|
||||
//! \brief Create a safe runtime object if the dynamic library is loaded.
|
||||
//!
|
||||
nvinfer1::safe::IRuntime* createSafeInferRuntime(nvinfer1::ILogger& logger) noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Check if consistency checker is loaded.
|
||||
//!
|
||||
bool hasConsistencyChecker();
|
||||
|
||||
//!
|
||||
//! \brief Create a consistency checker object if the dynamic library is loaded.
|
||||
//!
|
||||
nvinfer1::consistency::IConsistencyChecker* createConsistencyChecker(
|
||||
nvinfer1::ILogger& logger, nvinfer1::IHostMemory const* engine) noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Run consistency check on serialized engine.
|
||||
//!
|
||||
bool checkSafeEngine(void const* serializedEngine, int32_t const engineSize);
|
||||
|
||||
bool loadEngineToBuildEnv(std::string const& engine, bool enableConsistency, BuildEnvironment& env, std::ostream& err);
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_ENGINES_H
|
@ -0,0 +1,141 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_ENTRYPOINTS_H
|
||||
#define TRT_SAMPLE_ENTRYPOINTS_H
|
||||
|
||||
//! \file sampleEntrypoints.h
|
||||
//!
|
||||
//! Declares and conditionally defines entrypoints needed to create base TensorRT objects, depending
|
||||
//! on whether the given sample uses TRT at link time or dynamically. Since common code is built once
|
||||
//! and shared across all samples (both link-time and dynamic TRT), it does not define these entrypoints,
|
||||
//! so each sample must define them individually.
|
||||
//!
|
||||
//! Samples that use TRT at link time can define DEFINE_TRT_ENTRYPOINTS before including this header to
|
||||
//! pick up the definitions here.
|
||||
|
||||
#include "NvCaffeParser.h"
|
||||
#include "NvInfer.h"
|
||||
#include "NvOnnxParser.h"
|
||||
#include "NvUffParser.h"
|
||||
#include "logger.h"
|
||||
|
||||
extern nvinfer1::IBuilder* createBuilder();
|
||||
extern nvinfer1::IRuntime* createRuntime();
|
||||
extern nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine);
|
||||
|
||||
extern nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network);
|
||||
|
||||
extern nvcaffeparser1::ICaffeParser* sampleCreateCaffeParser();
|
||||
extern void shutdownCaffeParser();
|
||||
|
||||
extern nvuffparser::IUffParser* sampleCreateUffParser();
|
||||
extern void shutdownUffParser();
|
||||
|
||||
#if !defined(DEFINE_TRT_ENTRYPOINTS)
|
||||
#define DEFINE_TRT_ENTRYPOINTS 0
|
||||
#endif
|
||||
|
||||
// Allow opting out of individual entrypoints that are unused by the sample
|
||||
#if !defined(DEFINE_TRT_BUILDER_ENTRYPOINT)
|
||||
#define DEFINE_TRT_BUILDER_ENTRYPOINT 1
|
||||
#endif
|
||||
#if !defined(DEFINE_TRT_RUNTIME_ENTRYPOINT)
|
||||
#define DEFINE_TRT_RUNTIME_ENTRYPOINT 1
|
||||
#endif
|
||||
#if !defined(DEFINE_TRT_REFITTER_ENTRYPOINT)
|
||||
#define DEFINE_TRT_REFITTER_ENTRYPOINT 1
|
||||
#endif
|
||||
#if !defined(DEFINE_TRT_ONNX_PARSER_ENTRYPOINT)
|
||||
#define DEFINE_TRT_ONNX_PARSER_ENTRYPOINT 1
|
||||
#endif
|
||||
#if !defined(DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT)
|
||||
#define DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT 1
|
||||
#endif
|
||||
|
||||
#if DEFINE_TRT_ENTRYPOINTS
|
||||
nvinfer1::IBuilder* createBuilder()
|
||||
{
|
||||
#if DEFINE_TRT_BUILDER_ENTRYPOINT
|
||||
return nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger());
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
nvinfer1::IRuntime* createRuntime()
|
||||
{
|
||||
#if DEFINE_TRT_RUNTIME_ENTRYPOINT
|
||||
return nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger());
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
nvinfer1::IRefitter* createRefitter(nvinfer1::ICudaEngine& engine)
|
||||
{
|
||||
#if DEFINE_TRT_REFITTER_ENTRYPOINT
|
||||
return nvinfer1::createInferRefitter(engine, sample::gLogger.getTRTLogger());
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
nvonnxparser::IParser* createONNXParser(nvinfer1::INetworkDefinition& network)
|
||||
{
|
||||
#if DEFINE_TRT_ONNX_PARSER_ENTRYPOINT
|
||||
return nvonnxparser::createParser(network, sample::gLogger.getTRTLogger());
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
nvcaffeparser1::ICaffeParser* sampleCreateCaffeParser()
|
||||
{
|
||||
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
|
||||
return nvcaffeparser1::createCaffeParser();
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
void shutdownCaffeParser()
|
||||
{
|
||||
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
|
||||
nvcaffeparser1::shutdownProtobufLibrary();
|
||||
#endif
|
||||
}
|
||||
|
||||
nvuffparser::IUffParser* sampleCreateUffParser()
|
||||
{
|
||||
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
|
||||
return nvuffparser::createUffParser();
|
||||
#else
|
||||
return {};
|
||||
#endif
|
||||
}
|
||||
|
||||
void shutdownUffParser()
|
||||
{
|
||||
#if DEFINE_TRT_LEGACY_PARSER_ENTRYPOINT
|
||||
nvuffparser::shutdownProtobufLibrary();
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // DEFINE_TRT_ENTRYPOINTS
|
||||
|
||||
#endif // TRT_SAMPLE_ENTRYPOINTS_H
|
@ -0,0 +1,264 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_INFERENCE_H
|
||||
#define TRT_SAMPLE_INFERENCE_H
|
||||
|
||||
#include "sampleEngines.h"
|
||||
#include "sampleReporting.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
#include "NvInferSafeRuntime.h"
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
struct InferenceEnvironment
|
||||
{
|
||||
InferenceEnvironment() = delete;
|
||||
InferenceEnvironment(InferenceEnvironment const& other) = delete;
|
||||
InferenceEnvironment(InferenceEnvironment&& other) = delete;
|
||||
InferenceEnvironment(BuildEnvironment& bEnv) : engine(std::move(bEnv.engine)), safe(bEnv.engine.isSafe())
|
||||
{
|
||||
}
|
||||
|
||||
LazilyDeserializedEngine engine;
|
||||
std::unique_ptr<Profiler> profiler;
|
||||
std::vector<std::unique_ptr<nvinfer1::IExecutionContext>> contexts;
|
||||
std::vector<std::unique_ptr<Bindings>> bindings;
|
||||
bool error{false};
|
||||
|
||||
bool safe{false};
|
||||
std::vector<std::unique_ptr<nvinfer1::safe::IExecutionContext>> safeContexts;
|
||||
|
||||
template <class ContextType>
|
||||
inline ContextType* getContext(int32_t streamIdx);
|
||||
|
||||
//! Storage for input shape tensors.
|
||||
//!
|
||||
//! It's important that the addresses of the data do not change between the calls to
|
||||
//! setTensorAddress/setInputShape (which tells TensorRT where the input shape tensor is)
|
||||
//! and enqueueV2/enqueueV3 (when TensorRT might use the input shape tensor).
|
||||
//!
|
||||
//! The input shape tensors could alternatively be handled via member bindings,
|
||||
//! but it simplifies control-flow to store the data here since it's shared across
|
||||
//! the bindings.
|
||||
std::list<std::vector<int32_t>> inputShapeTensorValues;
|
||||
};
|
||||
|
||||
template <>
|
||||
inline nvinfer1::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx)
|
||||
{
|
||||
return contexts[streamIdx].get();
|
||||
}
|
||||
|
||||
template <>
|
||||
inline nvinfer1::safe::IExecutionContext* InferenceEnvironment::getContext(int32_t streamIdx)
|
||||
{
|
||||
return safeContexts[streamIdx].get();
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Set up contexts and bindings for inference
|
||||
//!
|
||||
bool setUpInference(InferenceEnvironment& iEnv, InferenceOptions const& inference, SystemOptions const& system);
|
||||
|
||||
//!
|
||||
//! \brief Deserialize the engine and time how long it takes.
|
||||
//!
|
||||
bool timeDeserialize(InferenceEnvironment& iEnv, SystemOptions const& sys);
|
||||
|
||||
//!
|
||||
//! \brief Run inference and collect timing, return false if any error hit during inference
|
||||
//!
|
||||
bool runInference(
|
||||
InferenceOptions const& inference, InferenceEnvironment& iEnv, int32_t device, std::vector<InferenceTrace>& trace);
|
||||
|
||||
//!
|
||||
//! \brief Get layer information of the engine.
|
||||
//!
|
||||
std::string getLayerInformation(
|
||||
nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context, nvinfer1::LayerInformationFormat format);
|
||||
|
||||
struct Binding
|
||||
{
|
||||
bool isInput{false};
|
||||
std::unique_ptr<IMirroredBuffer> buffer;
|
||||
std::unique_ptr<OutputAllocator> outputAllocator;
|
||||
int64_t volume{0};
|
||||
nvinfer1::DataType dataType{nvinfer1::DataType::kFLOAT};
|
||||
|
||||
void fill(std::string const& fileName);
|
||||
|
||||
void fill();
|
||||
|
||||
void dump(std::ostream& os, nvinfer1::Dims dims, nvinfer1::Dims strides, int32_t vectorDim, int32_t spv,
|
||||
std::string const separator = " ") const;
|
||||
};
|
||||
|
||||
struct TensorInfo
|
||||
{
|
||||
int32_t bindingIndex{-1};
|
||||
char const* name{nullptr};
|
||||
nvinfer1::Dims dims{};
|
||||
bool isDynamic{};
|
||||
int32_t comps{-1};
|
||||
nvinfer1::Dims strides{};
|
||||
int32_t vectorDimIndex{-1};
|
||||
bool isInput{};
|
||||
nvinfer1::DataType dataType{};
|
||||
int64_t vol{-1};
|
||||
|
||||
void updateVolume(int32_t batch)
|
||||
{
|
||||
vol = volume(dims, strides, vectorDimIndex, comps, batch);
|
||||
}
|
||||
};
|
||||
|
||||
class Bindings
|
||||
{
|
||||
public:
|
||||
Bindings() = delete;
|
||||
explicit Bindings(bool useManaged)
|
||||
: mUseManaged(useManaged)
|
||||
{
|
||||
}
|
||||
|
||||
void addBinding(TensorInfo const& tensorInfo, std::string const& fileName = "");
|
||||
|
||||
void** getDeviceBuffers();
|
||||
|
||||
void transferInputToDevice(TrtCudaStream& stream);
|
||||
|
||||
void transferOutputToHost(TrtCudaStream& stream);
|
||||
|
||||
void fill(int binding, std::string const& fileName)
|
||||
{
|
||||
mBindings[binding].fill(fileName);
|
||||
}
|
||||
|
||||
void fill(int binding)
|
||||
{
|
||||
mBindings[binding].fill();
|
||||
}
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpBindingDimensions(int32_t binding, ContextType const& context, std::ostream& os) const;
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpBindingValues(ContextType const& context, int32_t binding, std::ostream& os,
|
||||
std::string const& separator = " ", int32_t batch = 1) const;
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpRawBindingToFiles(ContextType const& context, std::ostream& os) const;
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpInputs(ContextType const& context, std::ostream& os) const
|
||||
{
|
||||
auto isInput = [](Binding const& b) { return b.isInput; };
|
||||
dumpBindings(context, isInput, os);
|
||||
}
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpOutputs(ContextType const& context, std::ostream& os) const
|
||||
{
|
||||
auto isOutput = [](Binding const& b) { return !b.isInput; };
|
||||
dumpBindings(context, isOutput, os);
|
||||
}
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpBindings(ContextType const& context, std::ostream& os) const
|
||||
{
|
||||
auto all = [](Binding const& b) { return true; };
|
||||
dumpBindings(context, all, os);
|
||||
}
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpBindings(
|
||||
ContextType const& context, std::function<bool(Binding const&)> predicate, std::ostream& os) const
|
||||
{
|
||||
for (auto const& n : mNames)
|
||||
{
|
||||
auto const binding = n.second;
|
||||
if (predicate(mBindings[binding]))
|
||||
{
|
||||
os << n.first << ": (";
|
||||
dumpBindingDimensions(binding, context, os);
|
||||
os << ")" << std::endl;
|
||||
|
||||
dumpBindingValues(context, binding, os);
|
||||
os << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
std::unordered_map<std::string, int> getInputBindings() const
|
||||
{
|
||||
auto isInput = [](Binding const& b) { return b.isInput; };
|
||||
return getBindings(isInput);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getOutputBindings() const
|
||||
{
|
||||
auto isOutput = [](Binding const& b) { return !b.isInput; };
|
||||
return getBindings(isOutput);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getBindings() const
|
||||
{
|
||||
auto all = [](Binding const& b) { return true; };
|
||||
return getBindings(all);
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int> getBindings(std::function<bool(Binding const&)> predicate) const;
|
||||
|
||||
bool setTensorAddresses(nvinfer1::IExecutionContext& context) const;
|
||||
|
||||
bool setSafeTensorAddresses(nvinfer1::safe::IExecutionContext& context) const;
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string, int32_t> mNames;
|
||||
std::vector<Binding> mBindings;
|
||||
std::vector<void*> mDevicePointers;
|
||||
bool mUseManaged{false};
|
||||
};
|
||||
|
||||
struct TaskInferenceEnvironment
|
||||
{
|
||||
TaskInferenceEnvironment(std::string engineFile, InferenceOptions inference, int32_t deviceId = 0,
|
||||
int32_t DLACore = -1, int32_t bs = batchNotProvided);
|
||||
InferenceOptions iOptions{};
|
||||
int32_t device{defaultDevice};
|
||||
int32_t batch{batchNotProvided};
|
||||
std::unique_ptr<InferenceEnvironment> iEnv;
|
||||
std::vector<InferenceTrace> trace;
|
||||
};
|
||||
|
||||
bool runMultiTasksInference(std::vector<std::unique_ptr<TaskInferenceEnvironment>>& tEnvList);
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_INFERENCE_H
|
@ -0,0 +1,456 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_OPTIONS_H
|
||||
#define TRT_SAMPLE_OPTIONS_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
// Build default params
|
||||
constexpr int32_t maxBatchNotProvided{0};
|
||||
constexpr int32_t defaultMinTiming{1};
|
||||
constexpr int32_t defaultAvgTiming{8};
|
||||
constexpr int32_t defaultMaxAuxStreams{-1};
|
||||
constexpr int32_t defaultBuilderOptimizationLevel{3};
|
||||
|
||||
// System default params
|
||||
constexpr int32_t defaultDevice{0};
|
||||
|
||||
// Inference default params
|
||||
constexpr int32_t defaultBatch{1};
|
||||
constexpr int32_t batchNotProvided{0};
|
||||
constexpr int32_t defaultStreams{1};
|
||||
constexpr int32_t defaultIterations{10};
|
||||
constexpr float defaultWarmUp{200.F};
|
||||
constexpr float defaultDuration{3.F};
|
||||
constexpr float defaultSleep{};
|
||||
constexpr float defaultIdle{};
|
||||
constexpr float defaultPersistentCacheRatio{0};
|
||||
|
||||
// Reporting default params
|
||||
constexpr int32_t defaultAvgRuns{10};
|
||||
constexpr std::array<float, 3> defaultPercentiles{90, 95, 99};
|
||||
|
||||
enum class PrecisionConstraints
|
||||
{
|
||||
kNONE,
|
||||
kOBEY,
|
||||
kPREFER
|
||||
};
|
||||
|
||||
enum class ModelFormat
|
||||
{
|
||||
kANY,
|
||||
kCAFFE,
|
||||
kONNX,
|
||||
kUFF
|
||||
};
|
||||
|
||||
enum class SparsityFlag
|
||||
{
|
||||
kDISABLE,
|
||||
kENABLE,
|
||||
kFORCE
|
||||
};
|
||||
|
||||
enum class TimingCacheMode
|
||||
{
|
||||
kDISABLE,
|
||||
kLOCAL,
|
||||
kGLOBAL
|
||||
};
|
||||
|
||||
//!
|
||||
//! \enum RuntimeMode
|
||||
//!
|
||||
//! \brief Used to dictate which TensorRT runtime library to dynamically load.
|
||||
//!
|
||||
enum class RuntimeMode
|
||||
{
|
||||
//! Maps to libnvinfer.so or nvinfer.dll
|
||||
kFULL,
|
||||
|
||||
//! Maps to libnvinfer_dispatch.so or nvinfer_dispatch.dll
|
||||
kDISPATCH,
|
||||
|
||||
//! Maps to libnvinfer_lean.so or nvinfer_lean.dll
|
||||
kLEAN,
|
||||
};
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, RuntimeMode const mode)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case RuntimeMode::kFULL:
|
||||
{
|
||||
os << "full";
|
||||
break;
|
||||
}
|
||||
case RuntimeMode::kDISPATCH:
|
||||
{
|
||||
os << "dispatch";
|
||||
break;
|
||||
}
|
||||
case RuntimeMode::kLEAN:
|
||||
{
|
||||
os << "lean";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
using Arguments = std::unordered_multimap<std::string, std::string>;
|
||||
|
||||
using IOFormat = std::pair<nvinfer1::DataType, nvinfer1::TensorFormats>;
|
||||
|
||||
using ShapeRange = std::array<std::vector<int32_t>, nvinfer1::EnumMax<nvinfer1::OptProfileSelector>()>;
|
||||
|
||||
using LayerPrecisions = std::unordered_map<std::string, nvinfer1::DataType>;
|
||||
using LayerOutputTypes = std::unordered_map<std::string, std::vector<nvinfer1::DataType>>;
|
||||
using LayerDeviceTypes = std::unordered_map<std::string, nvinfer1::DeviceType>;
|
||||
|
||||
class Options
|
||||
{
|
||||
public:
|
||||
virtual ~Options() = default;
|
||||
virtual void parse(Arguments& arguments) = 0;
|
||||
};
|
||||
|
||||
class BaseModelOptions : public Options
|
||||
{
|
||||
public:
|
||||
ModelFormat format{ModelFormat::kANY};
|
||||
std::string model;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class UffInput : public Options
|
||||
{
|
||||
public:
|
||||
std::vector<std::pair<std::string, nvinfer1::Dims>> inputs;
|
||||
bool NHWC{false};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class ModelOptions : public Options
|
||||
{
|
||||
public:
|
||||
BaseModelOptions baseModel;
|
||||
std::string prototxt;
|
||||
std::vector<std::string> outputs;
|
||||
UffInput uffInputs;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
constexpr nvinfer1::TempfileControlFlags getTempfileControlDefaults()
|
||||
{
|
||||
using F = nvinfer1::TempfileControlFlag;
|
||||
return (1U << static_cast<uint32_t>(F::kALLOW_TEMPORARY_FILES))
|
||||
| (1U << static_cast<uint32_t>(F::kALLOW_IN_MEMORY_FILES));
|
||||
}
|
||||
|
||||
class BuildOptions : public Options
|
||||
{
|
||||
public:
|
||||
int32_t maxBatch{maxBatchNotProvided};
|
||||
double workspace{-1.0};
|
||||
double dlaSRAM{-1.0};
|
||||
double dlaLocalDRAM{-1.0};
|
||||
double dlaGlobalDRAM{-1.0};
|
||||
int32_t minTiming{defaultMinTiming};
|
||||
int32_t avgTiming{defaultAvgTiming};
|
||||
bool tf32{true};
|
||||
bool fp16{false};
|
||||
bool int8{false};
|
||||
bool fp8{false};
|
||||
bool directIO{false};
|
||||
PrecisionConstraints precisionConstraints{PrecisionConstraints::kNONE};
|
||||
LayerPrecisions layerPrecisions;
|
||||
LayerOutputTypes layerOutputTypes;
|
||||
LayerDeviceTypes layerDeviceTypes;
|
||||
bool safe{false};
|
||||
bool consistency{false};
|
||||
bool restricted{false};
|
||||
bool skipInference{false};
|
||||
bool save{false};
|
||||
bool load{false};
|
||||
bool refittable{false};
|
||||
bool heuristic{false};
|
||||
bool versionCompatible{false};
|
||||
bool excludeLeanRuntime{false};
|
||||
int32_t builderOptimizationLevel{defaultBuilderOptimizationLevel};
|
||||
SparsityFlag sparsity{SparsityFlag::kDISABLE};
|
||||
nvinfer1::ProfilingVerbosity profilingVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
|
||||
std::string engine;
|
||||
std::string calibration;
|
||||
using ShapeProfile = std::unordered_map<std::string, ShapeRange>;
|
||||
ShapeProfile shapes;
|
||||
ShapeProfile shapesCalib;
|
||||
std::vector<IOFormat> inputFormats;
|
||||
std::vector<IOFormat> outputFormats;
|
||||
nvinfer1::TacticSources enabledTactics{0};
|
||||
nvinfer1::TacticSources disabledTactics{0};
|
||||
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
|
||||
std::string timingCacheFile{};
|
||||
// C++11 does not automatically generate hash function for enum class.
|
||||
// Use int32_t to support C++11 compilers.
|
||||
std::unordered_map<int32_t, bool> previewFeatures;
|
||||
nvinfer1::HardwareCompatibilityLevel hardwareCompatibilityLevel{nvinfer1::HardwareCompatibilityLevel::kNONE};
|
||||
std::string tempdir{};
|
||||
nvinfer1::TempfileControlFlags tempfileControls{getTempfileControlDefaults()};
|
||||
RuntimeMode useRuntime{RuntimeMode::kFULL};
|
||||
std::string leanDLLPath{};
|
||||
int32_t maxAuxStreams{defaultMaxAuxStreams};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class SystemOptions : public Options
|
||||
{
|
||||
public:
|
||||
int32_t device{defaultDevice};
|
||||
int32_t DLACore{-1};
|
||||
bool fallback{false};
|
||||
bool ignoreParsedPluginLibs{false};
|
||||
std::vector<std::string> plugins;
|
||||
std::vector<std::string> setPluginsToSerialize;
|
||||
std::vector<std::string> dynamicPlugins;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class InferenceOptions : public Options
|
||||
{
|
||||
public:
|
||||
int32_t batch{batchNotProvided};
|
||||
int32_t iterations{defaultIterations};
|
||||
int32_t infStreams{defaultStreams};
|
||||
float warmup{defaultWarmUp};
|
||||
float duration{defaultDuration};
|
||||
float sleep{defaultSleep};
|
||||
float idle{defaultIdle};
|
||||
float persistentCacheRatio{defaultPersistentCacheRatio};
|
||||
bool overlap{true};
|
||||
bool skipTransfers{false};
|
||||
bool useManaged{false};
|
||||
bool spin{false};
|
||||
bool threads{false};
|
||||
bool graph{false};
|
||||
bool rerun{false};
|
||||
bool timeDeserialize{false};
|
||||
bool timeRefit{false};
|
||||
std::unordered_map<std::string, std::string> inputs;
|
||||
using ShapeProfile = std::unordered_map<std::string, std::vector<int32_t>>;
|
||||
ShapeProfile shapes;
|
||||
nvinfer1::ProfilingVerbosity nvtxVerbosity{nvinfer1::ProfilingVerbosity::kLAYER_NAMES_ONLY};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class ReportingOptions : public Options
|
||||
{
|
||||
public:
|
||||
bool verbose{false};
|
||||
int32_t avgs{defaultAvgRuns};
|
||||
std::vector<float> percentiles{defaultPercentiles.begin(), defaultPercentiles.end()};
|
||||
bool refit{false};
|
||||
bool output{false};
|
||||
bool dumpRawBindings{false};
|
||||
bool profile{false};
|
||||
bool layerInfo{false};
|
||||
std::string exportTimes;
|
||||
std::string exportOutput;
|
||||
std::string exportProfile;
|
||||
std::string exportLayerInfo;
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class SafeBuilderOptions : public Options
|
||||
{
|
||||
public:
|
||||
std::string serialized{};
|
||||
std::string onnxModelFile{};
|
||||
bool help{false};
|
||||
bool verbose{false};
|
||||
std::vector<IOFormat> inputFormats;
|
||||
std::vector<IOFormat> outputFormats;
|
||||
bool int8{false};
|
||||
bool fp8{false};
|
||||
std::string calibFile{};
|
||||
std::vector<std::string> plugins;
|
||||
bool consistency{false};
|
||||
bool standard{false};
|
||||
TimingCacheMode timingCacheMode{TimingCacheMode::kLOCAL};
|
||||
std::string timingCacheFile{};
|
||||
SparsityFlag sparsity{SparsityFlag::kDISABLE};
|
||||
int32_t minTiming{defaultMinTiming};
|
||||
int32_t avgTiming{defaultAvgTiming};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void printHelp(std::ostream& out);
|
||||
};
|
||||
|
||||
class AllOptions : public Options
|
||||
{
|
||||
public:
|
||||
ModelOptions model;
|
||||
BuildOptions build;
|
||||
SystemOptions system;
|
||||
InferenceOptions inference;
|
||||
ReportingOptions reporting;
|
||||
bool helps{false};
|
||||
|
||||
void parse(Arguments& arguments) override;
|
||||
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
class TaskInferenceOptions : public Options
|
||||
{
|
||||
public:
|
||||
std::string engine;
|
||||
int32_t device{defaultDevice};
|
||||
int32_t DLACore{-1};
|
||||
int32_t batch{batchNotProvided};
|
||||
bool graph{false};
|
||||
float persistentCacheRatio{defaultPersistentCacheRatio};
|
||||
void parse(Arguments& arguments) override;
|
||||
static void help(std::ostream& out);
|
||||
};
|
||||
|
||||
Arguments argsToArgumentsMap(int32_t argc, char* argv[]);
|
||||
|
||||
bool parseHelp(Arguments& arguments);
|
||||
|
||||
void helpHelp(std::ostream& out);
|
||||
|
||||
// Functions to print options
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const BaseModelOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const UffInput& input);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const IOFormat& format);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ShapeRange& dims);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ModelOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const BuildOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SystemOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const InferenceOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ReportingOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const AllOptions& options);
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options);
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims)
|
||||
{
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i)
|
||||
{
|
||||
os << (i ? "x" : "") << dims.d[i];
|
||||
}
|
||||
return os;
|
||||
}
|
||||
inline std::ostream& operator<<(std::ostream& os, const nvinfer1::WeightsRole role)
|
||||
{
|
||||
switch (role)
|
||||
{
|
||||
case nvinfer1::WeightsRole::kKERNEL:
|
||||
{
|
||||
os << "Kernel";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kBIAS:
|
||||
{
|
||||
os << "Bias";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kSHIFT:
|
||||
{
|
||||
os << "Shift";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kSCALE:
|
||||
{
|
||||
os << "Scale";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kCONSTANT:
|
||||
{
|
||||
os << "Constant";
|
||||
break;
|
||||
}
|
||||
case nvinfer1::WeightsRole::kANY:
|
||||
{
|
||||
os << "Any";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
inline std::ostream& operator<<(std::ostream& os, const std::vector<int32_t>& vec)
|
||||
{
|
||||
for (int32_t i = 0, e = static_cast<int32_t>(vec.size()); i < e; ++i)
|
||||
{
|
||||
os << (i ? "x" : "") << vec[i];
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLES_OPTIONS_H
|
@ -0,0 +1,579 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <exception>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <utility>
|
||||
|
||||
#include "sampleInference.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleReporting.h"
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
//!
|
||||
//! \brief Find percentile in an ascending sequence of timings
|
||||
//! \note percentile must be in [0, 100]. Otherwise, an exception is thrown.
|
||||
//!
|
||||
template <typename T>
|
||||
float findPercentile(float percentile, std::vector<InferenceTime> const& timings, T const& toFloat)
|
||||
{
|
||||
int32_t const all = static_cast<int32_t>(timings.size());
|
||||
int32_t const exclude = static_cast<int32_t>((1 - percentile / 100) * all);
|
||||
if (timings.empty())
|
||||
{
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
if (percentile < 0.F || percentile > 100.F)
|
||||
{
|
||||
throw std::runtime_error("percentile is not in [0, 100]!");
|
||||
}
|
||||
return toFloat(timings[std::max(all - 1 - exclude, 0)]);
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Find median in a sorted sequence of timings
|
||||
//!
|
||||
template <typename T>
|
||||
float findMedian(std::vector<InferenceTime> const& timings, T const& toFloat)
|
||||
{
|
||||
if (timings.empty())
|
||||
{
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
int32_t const m = timings.size() / 2;
|
||||
if (timings.size() % 2)
|
||||
{
|
||||
return toFloat(timings[m]);
|
||||
}
|
||||
|
||||
return (toFloat(timings[m - 1]) + toFloat(timings[m])) / 2;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \brief Find coefficient of variance (which is std / mean) in a sorted sequence of timings given the mean
|
||||
//!
|
||||
template <typename T>
|
||||
float findCoeffOfVariance(std::vector<InferenceTime> const& timings, T const& toFloat, float mean)
|
||||
{
|
||||
if (timings.empty())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (mean == 0.F)
|
||||
{
|
||||
return std::numeric_limits<float>::infinity();
|
||||
}
|
||||
|
||||
auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) {
|
||||
float const diff = toFloat(a) - mean;
|
||||
return acc + diff * diff;
|
||||
};
|
||||
float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size();
|
||||
|
||||
return std::sqrt(variance) / mean * 100.F;
|
||||
}
|
||||
|
||||
inline InferenceTime traceToTiming(const InferenceTrace& a)
|
||||
{
|
||||
return InferenceTime((a.enqEnd - a.enqStart), (a.h2dEnd - a.h2dStart), (a.computeEnd - a.computeStart),
|
||||
(a.d2hEnd - a.d2hStart));
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os)
|
||||
{
|
||||
os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl;
|
||||
os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl;
|
||||
}
|
||||
|
||||
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os)
|
||||
{
|
||||
int32_t count = 0;
|
||||
InferenceTime sum;
|
||||
|
||||
os << std::endl;
|
||||
os << "=== Trace details ===" << std::endl;
|
||||
os << "Trace averages of " << runsPerAvg << " runs:" << std::endl;
|
||||
for (auto const& t : timings)
|
||||
{
|
||||
sum += t;
|
||||
|
||||
if (++count == runsPerAvg)
|
||||
{
|
||||
// clang-format off
|
||||
os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg
|
||||
<< " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg
|
||||
<< " ms)" << std::endl;
|
||||
// clang-format on
|
||||
count = 0;
|
||||
sum.enq = 0;
|
||||
sum.h2d = 0;
|
||||
sum.compute = 0;
|
||||
sum.d2h = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void printMetricExplanations(std::ostream& os)
|
||||
{
|
||||
os << std::endl;
|
||||
os << "=== Explanations of the performance metrics ===" << std::endl;
|
||||
os << "Total Host Walltime: the host walltime from when the first query (after warmups) is enqueued to when the "
|
||||
"last query is completed."
|
||||
<< std::endl;
|
||||
os << "GPU Compute Time: the GPU latency to execute the kernels for a query." << std::endl;
|
||||
os << "Total GPU Compute Time: the summation of the GPU Compute Time of all the queries. If this is significantly "
|
||||
"shorter than Total Host Walltime, the GPU may be under-utilized because of host-side overheads or data "
|
||||
"transfers."
|
||||
<< std::endl;
|
||||
os << "Throughput: the observed throughput computed by dividing the number of queries by the Total Host Walltime. "
|
||||
"If this is significantly lower than the reciprocal of GPU Compute Time, the GPU may be under-utilized "
|
||||
"because of host-side overheads or data transfers."
|
||||
<< std::endl;
|
||||
os << "Enqueue Time: the host latency to enqueue a query. If this is longer than GPU Compute Time, the GPU may be "
|
||||
"under-utilized."
|
||||
<< std::endl;
|
||||
os << "H2D Latency: the latency for host-to-device data transfers for input tensors of a single query."
|
||||
<< std::endl;
|
||||
os << "D2H Latency: the latency for device-to-host data transfers for output tensors of a single query."
|
||||
<< std::endl;
|
||||
os << "Latency: the summation of H2D Latency, GPU Compute Time, and D2H Latency. This is the latency to infer a "
|
||||
"single query."
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
|
||||
std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles)
|
||||
{
|
||||
auto const metricComparator
|
||||
= [metricGetter](InferenceTime const& a, InferenceTime const& b) { return metricGetter(a) < metricGetter(b); };
|
||||
auto const metricAccumulator = [metricGetter](float acc, InferenceTime const& a) { return acc + metricGetter(a); };
|
||||
std::vector<InferenceTime> newTimings = timings;
|
||||
std::sort(newTimings.begin(), newTimings.end(), metricComparator);
|
||||
PerformanceResult result;
|
||||
result.min = metricGetter(newTimings.front());
|
||||
result.max = metricGetter(newTimings.back());
|
||||
result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0f, metricAccumulator) / newTimings.size();
|
||||
result.median = findMedian(newTimings, metricGetter);
|
||||
for (auto percentile : percentiles)
|
||||
{
|
||||
result.percentiles.emplace_back(findPercentile(percentile, newTimings, metricGetter));
|
||||
}
|
||||
result.coeffVar = findCoeffOfVariance(newTimings, metricGetter, result.mean);
|
||||
return result;
|
||||
}
|
||||
|
||||
void printEpilog(std::vector<InferenceTime> const& timings, float walltimeMs, std::vector<float> const& percentiles,
|
||||
int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
|
||||
{
|
||||
float const throughput = batchSize * timings.size() / walltimeMs * 1000;
|
||||
|
||||
auto const getLatency = [](InferenceTime const& t) { return t.latency(); };
|
||||
auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles);
|
||||
|
||||
auto const getEnqueue = [](InferenceTime const& t) { return t.enq; };
|
||||
auto const enqueueResult = getPerformanceResult(timings, getEnqueue, percentiles);
|
||||
|
||||
auto const getH2d = [](InferenceTime const& t) { return t.h2d; };
|
||||
auto const h2dResult = getPerformanceResult(timings, getH2d, percentiles);
|
||||
|
||||
auto const getCompute = [](InferenceTime const& t) { return t.compute; };
|
||||
auto const gpuComputeResult = getPerformanceResult(timings, getCompute, percentiles);
|
||||
|
||||
auto const getD2h = [](InferenceTime const& t) { return t.d2h; };
|
||||
auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles);
|
||||
|
||||
auto const toPerfString = [&](const PerformanceResult& r) {
|
||||
std::stringstream s;
|
||||
s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, "
|
||||
<< "median = " << r.median << " ms";
|
||||
for (int32_t i = 0, n = percentiles.size(); i < n; ++i)
|
||||
{
|
||||
s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms";
|
||||
}
|
||||
return s.str();
|
||||
};
|
||||
|
||||
osInfo << std::endl;
|
||||
osInfo << "=== Performance summary ===" << std::endl;
|
||||
osInfo << "Throughput: " << throughput << " qps" << std::endl;
|
||||
osInfo << "Latency: " << toPerfString(latencyResult) << std::endl;
|
||||
osInfo << "Enqueue Time: " << toPerfString(enqueueResult) << std::endl;
|
||||
osInfo << "H2D Latency: " << toPerfString(h2dResult) << std::endl;
|
||||
osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl;
|
||||
osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl;
|
||||
osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl;
|
||||
osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl;
|
||||
|
||||
// Report warnings if the throughput is bound by other factors than GPU Compute Time.
|
||||
constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F};
|
||||
if (enqueueResult.median > kENQUEUE_BOUND_REPORTING_THRESHOLD * gpuComputeResult.median)
|
||||
{
|
||||
osWarning
|
||||
<< "* Throughput may be bound by Enqueue Time rather than GPU Compute and the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " If not already in use, --useCudaGraph (utilize CUDA graphs where possible) may increase the "
|
||||
"throughput."
|
||||
<< std::endl;
|
||||
}
|
||||
if (h2dResult.median >= gpuComputeResult.median)
|
||||
{
|
||||
osWarning << "* Throughput may be bound by host-to-device transfers for the inputs rather than GPU Compute and "
|
||||
"the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl;
|
||||
}
|
||||
if (d2hResult.median >= gpuComputeResult.median)
|
||||
{
|
||||
osWarning << "* Throughput may be bound by device-to-host transfers for the outputs rather than GPU Compute "
|
||||
"and the GPU may be under-utilized."
|
||||
<< std::endl;
|
||||
osWarning << " Add --noDataTransfers flag to disable data transfers." << std::endl;
|
||||
}
|
||||
|
||||
// Report warnings if the GPU Compute Time is unstable.
|
||||
constexpr float kUNSTABLE_PERF_REPORTING_THRESHOLD{1.0F};
|
||||
if (gpuComputeResult.coeffVar > kUNSTABLE_PERF_REPORTING_THRESHOLD)
|
||||
{
|
||||
osWarning << "* GPU compute time is unstable, with coefficient of variance = " << gpuComputeResult.coeffVar
|
||||
<< "%." << std::endl;
|
||||
osWarning << " If not already in use, locking GPU clock frequency or adding --useSpinWait may improve the "
|
||||
<< "stability." << std::endl;
|
||||
}
|
||||
|
||||
// Report warnings if multiple inference streams are used.
|
||||
if (infStreams > 1)
|
||||
{
|
||||
osWarning << "* Multiple inference streams are used. Latencies may not be accurate since inferences may run in "
|
||||
<< " parallel. Please use \"Throughput\" as the performance metric instead." << std::endl;
|
||||
}
|
||||
|
||||
// Explain what the metrics mean.
|
||||
osInfo << "Explanations of the performance metrics are printed in the verbose logs." << std::endl;
|
||||
printMetricExplanations(osVerbose);
|
||||
|
||||
osInfo << std::endl;
|
||||
}
|
||||
|
||||
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
|
||||
InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose)
|
||||
{
|
||||
int32_t batchSize = infOpts.batch;
|
||||
float const warmupMs = infOpts.warmup;
|
||||
auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; };
|
||||
auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup);
|
||||
int32_t const warmups = noWarmup - trace.begin();
|
||||
float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart;
|
||||
// when implicit batch used, batchSize = options.inference.batch, which is parsed through --batch
|
||||
// when explicit batch used, batchSize = options.inference.batch = 0
|
||||
// treat inference with explicit batch as a single query and report the throughput
|
||||
batchSize = batchSize ? batchSize : 1;
|
||||
printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo);
|
||||
|
||||
std::vector<InferenceTime> timings(trace.size() - warmups);
|
||||
std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming);
|
||||
printTiming(timings, reportingOpts.avgs, osInfo);
|
||||
printEpilog(
|
||||
timings, benchTime, reportingOpts.percentiles, batchSize, infOpts.infStreams, osInfo, osWarning, osVerbose);
|
||||
|
||||
if (!reportingOpts.exportTimes.empty())
|
||||
{
|
||||
exportJSONTrace(trace, reportingOpts.exportTimes, warmups);
|
||||
}
|
||||
}
|
||||
|
||||
//! Printed format:
|
||||
//! [ value, ...]
|
||||
//! value ::= { "start enq : time, "end enq" : time, "start h2d" : time, "end h2d" : time, "start compute" : time,
|
||||
//! "end compute" : time, "start d2h" : time, "end d2h" : time, "h2d" : time, "compute" : time,
|
||||
//! "d2h" : time, "latency" : time }
|
||||
//!
|
||||
void exportJSONTrace(std::vector<InferenceTrace> const& trace, std::string const& fileName, int32_t const nbWarmups)
|
||||
{
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
os << "[" << std::endl;
|
||||
char const* sep = " ";
|
||||
for (auto iter = trace.begin() + nbWarmups; iter < trace.end(); ++iter)
|
||||
{
|
||||
auto const& t = *iter;
|
||||
InferenceTime const it(traceToTiming(t));
|
||||
os << sep << "{ ";
|
||||
sep = ", ";
|
||||
// clang-format off
|
||||
os << "\"startEnqMs\" : " << t.enqStart << sep << "\"endEnqMs\" : " << t.enqEnd << sep
|
||||
<< "\"startH2dMs\" : " << t.h2dStart << sep << "\"endH2dMs\" : " << t.h2dEnd << sep
|
||||
<< "\"startComputeMs\" : " << t.computeStart << sep << "\"endComputeMs\" : " << t.computeEnd << sep
|
||||
<< "\"startD2hMs\" : " << t.d2hStart << sep << "\"endD2hMs\" : " << t.d2hEnd << sep
|
||||
<< "\"h2dMs\" : " << it.h2d << sep << "\"computeMs\" : " << it.compute << sep
|
||||
<< "\"d2hMs\" : " << it.d2h << sep << "\"latencyMs\" : " << it.latency() << " }"
|
||||
<< std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
void Profiler::reportLayerTime(char const* layerName, float timeMs) noexcept
|
||||
{
|
||||
if (mIterator == mLayers.end())
|
||||
{
|
||||
bool const first = !mLayers.empty() && mLayers.begin()->name == layerName;
|
||||
mUpdatesCount += mLayers.empty() || first;
|
||||
if (first)
|
||||
{
|
||||
mIterator = mLayers.begin();
|
||||
}
|
||||
else
|
||||
{
|
||||
mLayers.emplace_back();
|
||||
mLayers.back().name = layerName;
|
||||
mIterator = mLayers.end() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
mIterator->timeMs.push_back(timeMs);
|
||||
++mIterator;
|
||||
}
|
||||
|
||||
void Profiler::print(std::ostream& os) const noexcept
|
||||
{
|
||||
std::string const nameHdr("Layer");
|
||||
std::string const timeHdr(" Time (ms)");
|
||||
std::string const avgHdr(" Avg. Time (ms)");
|
||||
std::string const medHdr(" Median Time (ms)");
|
||||
std::string const percentageHdr(" Time %");
|
||||
|
||||
float const totalTimeMs = getTotalTime();
|
||||
|
||||
auto const cmpLayer = [](LayerProfile const& a, LayerProfile const& b) { return a.name.size() < b.name.size(); };
|
||||
auto const longestName = std::max_element(mLayers.begin(), mLayers.end(), cmpLayer);
|
||||
auto const nameLength = std::max(longestName->name.size() + 1, nameHdr.size());
|
||||
auto const timeLength = timeHdr.size();
|
||||
auto const avgLength = avgHdr.size();
|
||||
auto const medLength = medHdr.size();
|
||||
auto const percentageLength = percentageHdr.size();
|
||||
|
||||
os << std::endl
|
||||
<< "=== Profile (" << mUpdatesCount << " iterations ) ===" << std::endl
|
||||
<< std::setw(nameLength) << nameHdr << timeHdr << avgHdr << medHdr << percentageHdr << std::endl;
|
||||
|
||||
for (auto const& p : mLayers)
|
||||
{
|
||||
if (p.timeMs.empty() || getTotalTime(p) == 0.F)
|
||||
{
|
||||
// there is no point to print profiling for layer that didn't run at all
|
||||
continue;
|
||||
}
|
||||
// clang-format off
|
||||
os << std::setw(nameLength) << p.name << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p)
|
||||
<< std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p)
|
||||
<< std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p)
|
||||
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100
|
||||
<< std::endl;
|
||||
}
|
||||
{
|
||||
os << std::setw(nameLength) << "Total" << std::setw(timeLength) << std::fixed << std::setprecision(2)
|
||||
<< totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount
|
||||
<< std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime()
|
||||
<< std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << std::endl;
|
||||
}
|
||||
|
||||
void Profiler::exportJSONProfile(std::string const& fileName) const noexcept
|
||||
{
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
os << "[" << std::endl << " { \"count\" : " << mUpdatesCount << " }" << std::endl;
|
||||
|
||||
auto const totalTimeMs = getTotalTime();
|
||||
|
||||
for (auto const& l : mLayers)
|
||||
{
|
||||
// clang-format off
|
||||
os << ", {" << R"( "name" : ")" << l.name << R"(")"
|
||||
R"(, "timeMs" : )" << getTotalTime(l)
|
||||
<< R"(, "averageMs" : )" << getAvgTime(l)
|
||||
<< R"(, "medianMs" : )" << getMedianTime(l)
|
||||
<< R"(, "percentage" : )" << getTotalTime(l) / totalTimeMs * 100
|
||||
<< " }" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os)
|
||||
{
|
||||
os << "Input Tensors:" << std::endl;
|
||||
bindings.dumpInputs(context, os);
|
||||
}
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpOutputs(ContextType const& context, Bindings const& bindings, std::ostream& os)
|
||||
{
|
||||
os << "Output Tensors:" << std::endl;
|
||||
bindings.dumpOutputs(context, os);
|
||||
}
|
||||
|
||||
template
|
||||
void dumpOutputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
||||
template
|
||||
void dumpOutputs(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpRawBindingsToFiles(ContextType const& context, Bindings const& bindings, std::ostream& os)
|
||||
{
|
||||
bindings.dumpRawBindingToFiles(context, os);
|
||||
}
|
||||
|
||||
template
|
||||
void dumpRawBindingsToFiles(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
template
|
||||
void dumpRawBindingsToFiles(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
template <typename ContextType>
|
||||
void exportJSONOutput(
|
||||
ContextType const& context, Bindings const& bindings, std::string const& fileName, int32_t batch)
|
||||
{
|
||||
std::ofstream os(fileName, std::ofstream::trunc);
|
||||
std::string sep = " ";
|
||||
auto const output = bindings.getOutputBindings();
|
||||
os << "[" << std::endl;
|
||||
for (auto const& binding : output)
|
||||
{
|
||||
// clang-format off
|
||||
os << sep << R"({ "name" : ")" << binding.first << "\"" << std::endl;
|
||||
sep = ", ";
|
||||
os << " " << sep << R"("dimensions" : ")";
|
||||
bindings.dumpBindingDimensions(binding.second, context, os);
|
||||
os << "\"" << std::endl;
|
||||
os << " " << sep << "\"values\" : [ ";
|
||||
bindings.dumpBindingValues(context, binding.second, os, sep, batch);
|
||||
os << " ]" << std::endl << " }" << std::endl;
|
||||
// clang-format on
|
||||
}
|
||||
os << "]" << std::endl;
|
||||
}
|
||||
|
||||
template
|
||||
void exportJSONOutput(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::string const& fileName, int32_t batch);
|
||||
|
||||
template void exportJSONOutput(nvinfer1::safe::IExecutionContext const& context, Bindings const& bindings,
|
||||
std::string const& fileName, int32_t batch);
|
||||
|
||||
bool printLayerInfo(
|
||||
ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context)
|
||||
{
|
||||
if (reporting.layerInfo)
|
||||
{
|
||||
sample::gLogInfo << "Layer Information:" << std::endl;
|
||||
sample::gLogInfo << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kONELINE)
|
||||
<< std::flush;
|
||||
}
|
||||
if (!reporting.exportLayerInfo.empty())
|
||||
{
|
||||
std::ofstream os(reporting.exportLayerInfo, std::ofstream::trunc);
|
||||
os << getLayerInformation(engine, context, nvinfer1::LayerInformationFormat::kJSON) << std::flush;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv)
|
||||
{
|
||||
if (reporting.profile)
|
||||
{
|
||||
iEnv.profiler->print(sample::gLogInfo);
|
||||
}
|
||||
if (!reporting.exportProfile.empty())
|
||||
{
|
||||
iEnv.profiler->exportJSONProfile(reporting.exportProfile);
|
||||
}
|
||||
|
||||
// Print an warning about total per-layer latency when auxiliary streams are used.
|
||||
if (!iEnv.safe && (reporting.profile || !reporting.exportProfile.empty()))
|
||||
{
|
||||
int32_t const nbAuxStreams = iEnv.engine.get()->getNbAuxStreams();
|
||||
if (nbAuxStreams > 0)
|
||||
{
|
||||
sample::gLogWarning << "The engine uses " << nbAuxStreams << " auxiliary streams, so the \"Total\" latency "
|
||||
<< "may not be accurate because some layers may have run in parallel!" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace details
|
||||
{
|
||||
template <typename ContextType>
|
||||
void dump(std::unique_ptr<ContextType> const& context, std::unique_ptr<Bindings> const& binding,
|
||||
ReportingOptions const& reporting, int32_t batch)
|
||||
{
|
||||
if (!context)
|
||||
{
|
||||
sample::gLogError << "Empty context! Skip printing outputs." << std::endl;
|
||||
return;
|
||||
}
|
||||
if (reporting.output)
|
||||
{
|
||||
dumpOutputs(*context, *binding, sample::gLogInfo);
|
||||
}
|
||||
if (reporting.dumpRawBindings)
|
||||
{
|
||||
dumpRawBindingsToFiles(*context, *binding, sample::gLogInfo);
|
||||
}
|
||||
if (!reporting.exportOutput.empty())
|
||||
{
|
||||
exportJSONOutput(*context, *binding, reporting.exportOutput, batch);
|
||||
}
|
||||
}
|
||||
} // namespace details
|
||||
|
||||
void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch)
|
||||
{
|
||||
auto const& binding = iEnv.bindings.at(0);
|
||||
if (!binding)
|
||||
{
|
||||
sample::gLogError << "Empty bindings! Skip printing outputs." << std::endl;
|
||||
return;
|
||||
}
|
||||
|
||||
if (iEnv.safe)
|
||||
{
|
||||
auto const& context = iEnv.safeContexts.at(0);
|
||||
details::dump(context, binding, reporting, batch);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto const& context = iEnv.contexts.at(0);
|
||||
details::dump(context, binding, reporting, batch);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace sample
|
@ -0,0 +1,302 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_REPORTING_H
|
||||
#define TRT_SAMPLE_REPORTING_H
|
||||
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "sampleDevice.h"
|
||||
#include "sampleInference.h"
|
||||
#include "sampleOptions.h"
|
||||
#include "sampleUtils.h"
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
class Bindings;
|
||||
|
||||
//!
|
||||
//! \struct InferenceTime
|
||||
//! \brief Measurement times in milliseconds
|
||||
//!
|
||||
struct InferenceTime
|
||||
{
|
||||
InferenceTime(float q, float i, float c, float o)
|
||||
: enq(q)
|
||||
, h2d(i)
|
||||
, compute(c)
|
||||
, d2h(o)
|
||||
{
|
||||
}
|
||||
|
||||
InferenceTime() = default;
|
||||
InferenceTime(InferenceTime const&) = default;
|
||||
InferenceTime(InferenceTime&&) = default;
|
||||
InferenceTime& operator=(InferenceTime const&) = default;
|
||||
InferenceTime& operator=(InferenceTime&&) = default;
|
||||
~InferenceTime() = default;
|
||||
|
||||
float enq{0}; // Enqueue
|
||||
float h2d{0}; // Host to Device
|
||||
float compute{0}; // Compute
|
||||
float d2h{0}; // Device to Host
|
||||
|
||||
// ideal latency
|
||||
float latency() const
|
||||
{
|
||||
return h2d + compute + d2h;
|
||||
}
|
||||
};
|
||||
|
||||
//!
|
||||
//! \struct InferenceTrace
|
||||
//! \brief Measurement points in milliseconds
|
||||
//!
|
||||
struct InferenceTrace
|
||||
{
|
||||
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
|
||||
: stream(s)
|
||||
, enqStart(es)
|
||||
, enqEnd(ee)
|
||||
, h2dStart(is)
|
||||
, h2dEnd(ie)
|
||||
, computeStart(cs)
|
||||
, computeEnd(ce)
|
||||
, d2hStart(os)
|
||||
, d2hEnd(oe)
|
||||
{
|
||||
}
|
||||
|
||||
InferenceTrace() = default;
|
||||
InferenceTrace(InferenceTrace const&) = default;
|
||||
InferenceTrace(InferenceTrace&&) = default;
|
||||
InferenceTrace& operator=(InferenceTrace const&) = default;
|
||||
InferenceTrace& operator=(InferenceTrace&&) = default;
|
||||
~InferenceTrace() = default;
|
||||
|
||||
int32_t stream{0};
|
||||
float enqStart{0};
|
||||
float enqEnd{0};
|
||||
float h2dStart{0};
|
||||
float h2dEnd{0};
|
||||
float computeStart{0};
|
||||
float computeEnd{0};
|
||||
float d2hStart{0};
|
||||
float d2hEnd{0};
|
||||
};
|
||||
|
||||
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
|
||||
{
|
||||
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
|
||||
}
|
||||
|
||||
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
|
||||
{
|
||||
return a = a + b;
|
||||
}
|
||||
|
||||
//!
|
||||
//! \struct PerformanceResult
|
||||
//! \brief Performance result of a performance metric
|
||||
//!
|
||||
struct PerformanceResult
|
||||
{
|
||||
float min{0.F};
|
||||
float max{0.F};
|
||||
float mean{0.F};
|
||||
float median{0.F};
|
||||
std::vector<float> percentiles;
|
||||
float coeffVar{0.F}; // coefficient of variation
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Print benchmarking time and number of traces collected
|
||||
//!
|
||||
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print a timing trace
|
||||
//!
|
||||
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print the performance summary of a trace
|
||||
//!
|
||||
void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
|
||||
std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
|
||||
|
||||
//!
|
||||
//! \brief Get the result of a specific performance metric from a trace
|
||||
//!
|
||||
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
|
||||
std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);
|
||||
|
||||
//!
|
||||
//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
|
||||
//!
|
||||
void printMetricExplanations(std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print and summarize a timing trace
|
||||
//!
|
||||
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
|
||||
InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
|
||||
|
||||
//!
|
||||
//! \brief Export a timing trace to JSON file
|
||||
//!
|
||||
void exportJSONTrace(
|
||||
std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);
|
||||
|
||||
//!
|
||||
//! \brief Print input tensors to stream
|
||||
//!
|
||||
void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Print output tensors to stream
|
||||
//!
|
||||
template <typename ContextType>
|
||||
void dumpOutputs(ContextType const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
template <typename ContextType>
|
||||
void dumpRawBindingsToFiles(ContextType const& context, Bindings const& bindings, std::ostream& os);
|
||||
|
||||
//!
|
||||
//! \brief Export output tensors to JSON file
|
||||
//!
|
||||
template <typename ContextType>
|
||||
void exportJSONOutput(
|
||||
ContextType const& context, Bindings const& bindings, std::string const& fileName, int32_t batch);
|
||||
|
||||
|
||||
//!
|
||||
//! \struct LayerProfile
|
||||
//! \brief Layer profile information
|
||||
//!
|
||||
struct LayerProfile
|
||||
{
|
||||
std::string name;
|
||||
std::vector<float> timeMs;
|
||||
};
|
||||
|
||||
//!
|
||||
//! \class Profiler
|
||||
//! \brief Collect per-layer profile information, assuming times are reported in the same order
|
||||
//!
|
||||
class Profiler : public nvinfer1::IProfiler
|
||||
{
|
||||
|
||||
public:
|
||||
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
|
||||
|
||||
void print(std::ostream& os) const noexcept;
|
||||
|
||||
//!
|
||||
//! \brief Export a profile to JSON file
|
||||
//!
|
||||
void exportJSONProfile(std::string const& fileName) const noexcept;
|
||||
|
||||
private:
|
||||
float getTotalTime() const noexcept
|
||||
{
|
||||
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
|
||||
return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
|
||||
};
|
||||
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
|
||||
}
|
||||
|
||||
float getMedianTime() const noexcept
|
||||
{
|
||||
if (mLayers.empty())
|
||||
{
|
||||
return 0.F;
|
||||
}
|
||||
std::vector<float> totalTime;
|
||||
for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
|
||||
{
|
||||
auto const layerTime
|
||||
= [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
|
||||
auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
|
||||
totalTime.push_back(t);
|
||||
}
|
||||
return median(totalTime);
|
||||
}
|
||||
|
||||
float getMedianTime(LayerProfile const& p) const noexcept
|
||||
{
|
||||
return median(p.timeMs);
|
||||
}
|
||||
|
||||
static float median(std::vector<float> vals)
|
||||
{
|
||||
if (vals.empty())
|
||||
{
|
||||
return 0.F;
|
||||
}
|
||||
std::sort(vals.begin(), vals.end());
|
||||
if (vals.size() % 2U == 1U)
|
||||
{
|
||||
return vals[vals.size() / 2U];
|
||||
}
|
||||
return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
|
||||
}
|
||||
|
||||
//! return the total runtime of given layer profile
|
||||
float getTotalTime(LayerProfile const& p) const noexcept
|
||||
{
|
||||
auto const& vals = p.timeMs;
|
||||
return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
|
||||
}
|
||||
|
||||
float getAvgTime(LayerProfile const& p) const noexcept
|
||||
{
|
||||
return getTotalTime(p) / p.timeMs.size();
|
||||
}
|
||||
|
||||
std::vector<LayerProfile> mLayers;
|
||||
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
|
||||
int32_t mUpdatesCount{0};
|
||||
};
|
||||
|
||||
//!
|
||||
//! \brief Print layer info to logger or export it to output JSON file.
|
||||
//!
|
||||
bool printLayerInfo(
|
||||
ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);
|
||||
|
||||
//! Forward declaration.
|
||||
struct InferenceEnvironment;
|
||||
|
||||
//!
|
||||
//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
|
||||
//!
|
||||
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv);
|
||||
|
||||
//!
|
||||
//! \brief Print binding output values to logger or export them to output JSON file.
|
||||
//!
|
||||
void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch);
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_REPORTING_H
|
@ -0,0 +1,528 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "sampleUtils.h"
|
||||
#include "half.h"
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
size_t dataTypeSize(nvinfer1::DataType dataType)
|
||||
{
|
||||
switch (dataType)
|
||||
{
|
||||
case nvinfer1::DataType::kINT32:
|
||||
case nvinfer1::DataType::kFLOAT: return 4U;
|
||||
case nvinfer1::DataType::kHALF: return 2U;
|
||||
case nvinfer1::DataType::kBOOL:
|
||||
case nvinfer1::DataType::kUINT8:
|
||||
case nvinfer1::DataType::kINT8:
|
||||
case nvinfer1::DataType::kFP8: return 1U;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch)
|
||||
{
|
||||
int32_t maxNbElems = 1;
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i)
|
||||
{
|
||||
// Get effective length of axis.
|
||||
int32_t d = dims.d[i];
|
||||
// Any dimension is 0, it is an empty tensor.
|
||||
if (d == 0)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
if (i == vecDim)
|
||||
{
|
||||
d = samplesCommon::divUp(d, comps);
|
||||
}
|
||||
maxNbElems = std::max(maxNbElems, d * strides.d[i]);
|
||||
}
|
||||
return static_cast<int64_t>(maxNbElems) * batch * (vecDim < 0 ? 1 : comps);
|
||||
}
|
||||
|
||||
nvinfer1::Dims toDims(std::vector<int32_t> const& vec)
|
||||
{
|
||||
int32_t limit = static_cast<int32_t>(nvinfer1::Dims::MAX_DIMS);
|
||||
if (static_cast<int32_t>(vec.size()) > limit)
|
||||
{
|
||||
sample::gLogWarning << "Vector too long, only first 8 elements are used in dimension." << std::endl;
|
||||
}
|
||||
// Pick first nvinfer1::Dims::MAX_DIMS elements
|
||||
nvinfer1::Dims dims{std::min(static_cast<int32_t>(vec.size()), limit), {}};
|
||||
std::copy_n(vec.begin(), dims.nbDims, std::begin(dims.d));
|
||||
return dims;
|
||||
}
|
||||
|
||||
void loadFromFile(std::string const& fileName, char* dst, size_t size)
|
||||
{
|
||||
ASSERT(dst);
|
||||
|
||||
std::ifstream file(fileName, std::ios::in | std::ios::binary);
|
||||
if (file.is_open())
|
||||
{
|
||||
file.read(dst, size);
|
||||
size_t const nbBytesRead = file.gcount();
|
||||
file.close();
|
||||
if (nbBytesRead != size)
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "Unexpected file size for input file: " << fileName << ". Note: Expected: " << size
|
||||
<< " bytes but only read: " << nbBytesRead << " bytes";
|
||||
throw std::invalid_argument(msg.str());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ostringstream msg;
|
||||
msg << "Cannot open file " << fileName << "!";
|
||||
throw std::invalid_argument(msg.str());
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> splitToStringVec(std::string const& s, char separator)
|
||||
{
|
||||
std::vector<std::string> splitted;
|
||||
|
||||
for (size_t start = 0; start < s.length();)
|
||||
{
|
||||
size_t separatorIndex = s.find(separator, start);
|
||||
if (separatorIndex == std::string::npos)
|
||||
{
|
||||
separatorIndex = s.length();
|
||||
}
|
||||
splitted.emplace_back(s.substr(start, separatorIndex - start));
|
||||
start = separatorIndex + 1;
|
||||
}
|
||||
|
||||
return splitted;
|
||||
}
|
||||
|
||||
bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput /*= true*/)
|
||||
{
|
||||
bool broadcast = formats.size() == 1;
|
||||
bool validFormatsCount = broadcast || (formats.size() == nbBindings);
|
||||
if (!formats.empty() && !validFormatsCount)
|
||||
{
|
||||
if (isInput)
|
||||
{
|
||||
throw std::invalid_argument(
|
||||
"The number of inputIOFormats must match network's inputs or be one for broadcasting.");
|
||||
}
|
||||
|
||||
throw std::invalid_argument(
|
||||
"The number of outputIOFormats must match network's outputs or be one for broadcasting.");
|
||||
}
|
||||
return broadcast;
|
||||
}
|
||||
|
||||
void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
|
||||
{
|
||||
using TensorToLayer = std::unordered_map<nvinfer1::ITensor*, nvinfer1::ILayer*>;
|
||||
using LayerToTensor = std::unordered_map<nvinfer1::ILayer*, nvinfer1::ITensor*>;
|
||||
|
||||
// 1. Collect layers and tensors information from the network.
|
||||
TensorToLayer matmulI2L;
|
||||
TensorToLayer constO2L;
|
||||
TensorToLayer shuffleI2L;
|
||||
LayerToTensor shuffleL2O;
|
||||
auto collectMappingInfo = [&](int32_t const idx)
|
||||
{
|
||||
ILayer* l = network.getLayer(idx);
|
||||
switch (l->getType())
|
||||
{
|
||||
case nvinfer1::LayerType::kMATRIX_MULTIPLY:
|
||||
{
|
||||
// assume weights on the second input.
|
||||
matmulI2L.insert({l->getInput(1), l});
|
||||
break;
|
||||
}
|
||||
case nvinfer1::LayerType::kCONSTANT:
|
||||
{
|
||||
DataType const dtype = static_cast<nvinfer1::IConstantLayer*>(l)->getWeights().type;
|
||||
if (dtype == nvinfer1::DataType::kFLOAT || dtype == nvinfer1::DataType::kHALF)
|
||||
{
|
||||
// Sparsify float only.
|
||||
constO2L.insert({l->getOutput(0), l});
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nvinfer1::LayerType::kSHUFFLE:
|
||||
{
|
||||
shuffleI2L.insert({l->getInput(0), l});
|
||||
shuffleL2O.insert({l, l->getOutput(0)});
|
||||
break;
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
};
|
||||
int32_t const nbLayers = network.getNbLayers();
|
||||
for (int32_t i = 0; i < nbLayers; ++i)
|
||||
{
|
||||
collectMappingInfo(i);
|
||||
}
|
||||
if (matmulI2L.size() == 0 || constO2L.size() == 0)
|
||||
{
|
||||
// No MatrixMultiply or Constant layer found, no weights to sparsify.
|
||||
return;
|
||||
}
|
||||
|
||||
// Helper for analysis
|
||||
auto isTranspose
|
||||
= [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); };
|
||||
auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; };
|
||||
auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool
|
||||
{
|
||||
for (int32_t i = 0; i < dims.nbDims; ++i)
|
||||
{
|
||||
if (dims.d[i] != i || dims.d[i] != -1)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor*
|
||||
{
|
||||
while (shuffleI2L.find(t) != shuffleI2L.end())
|
||||
{
|
||||
nvinfer1::IShuffleLayer* s = static_cast<nvinfer1::IShuffleLayer*>(shuffleI2L.at(t));
|
||||
if (!is2D(s->getInput(0)->getDimensions()) || !is2D(s->getReshapeDimensions())
|
||||
|| !isIdenticalReshape(s->getReshapeDimensions()))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (isTranspose(s->getFirstTranspose()))
|
||||
{
|
||||
needTranspose = !needTranspose;
|
||||
}
|
||||
if (isTranspose(s->getSecondTranspose()))
|
||||
{
|
||||
needTranspose = !needTranspose;
|
||||
}
|
||||
|
||||
t = shuffleL2O.at(s);
|
||||
}
|
||||
return t;
|
||||
};
|
||||
|
||||
// 2. Forward analysis to collect the Constant layers connected to MatMul via Transpose
|
||||
std::unordered_map<nvinfer1::IConstantLayer*, bool> constantLayerToSparse;
|
||||
for (auto& o2l : constO2L)
|
||||
{
|
||||
// If need to transpose the weights of the Constant layer.
|
||||
// Need to transpose by default due to semantic difference.
|
||||
bool needTranspose{true};
|
||||
ITensor* t = tensorReachedViaTranspose(o2l.first, needTranspose);
|
||||
if (matmulI2L.find(t) == matmulI2L.end())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// check MatMul params...
|
||||
IMatrixMultiplyLayer* mm = static_cast<nvinfer1::IMatrixMultiplyLayer*>(matmulI2L.at(t));
|
||||
bool const twoInputs = mm->getNbInputs() == 2;
|
||||
bool const all2D = is2D(mm->getInput(0)->getDimensions()) && is2D(mm->getInput(1)->getDimensions());
|
||||
bool const isSimple = mm->getOperation(0) == nvinfer1::MatrixOperation::kNONE
|
||||
&& mm->getOperation(1) != nvinfer1::MatrixOperation::kVECTOR;
|
||||
if (!(twoInputs && all2D && isSimple))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (mm->getOperation(1) == nvinfer1::MatrixOperation::kTRANSPOSE)
|
||||
{
|
||||
needTranspose = !needTranspose;
|
||||
}
|
||||
|
||||
constantLayerToSparse.insert({static_cast<IConstantLayer*>(o2l.second), needTranspose});
|
||||
}
|
||||
|
||||
// 3. Finally, sparsify the weights
|
||||
auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose)
|
||||
{
|
||||
Dims dims = layer->getOutput(0)->getDimensions();
|
||||
ASSERT(dims.nbDims == 2);
|
||||
int32_t const idxN = needTranspose ? 1 : 0;
|
||||
int32_t const n = dims.d[idxN];
|
||||
int32_t const k = dims.d[1 - idxN];
|
||||
sparseWeights.emplace_back();
|
||||
std::vector<int8_t>& spw = sparseWeights.back();
|
||||
Weights w = layer->getWeights();
|
||||
DataType const dtype = w.type;
|
||||
ASSERT(dtype == nvinfer1::DataType::kFLOAT
|
||||
|| dtype == nvinfer1::DataType::kHALF); // non-float weights should have been ignored.
|
||||
|
||||
if (needTranspose)
|
||||
{
|
||||
if (dtype == nvinfer1::DataType::kFLOAT)
|
||||
{
|
||||
spw.resize(w.count * sizeof(float));
|
||||
transpose2DWeights<float>(spw.data(), w.values, k, n);
|
||||
}
|
||||
else if (dtype == nvinfer1::DataType::kHALF)
|
||||
{
|
||||
spw.resize(w.count * sizeof(half_float::half));
|
||||
transpose2DWeights<half_float::half>(spw.data(), w.values, k, n);
|
||||
}
|
||||
|
||||
w.values = spw.data();
|
||||
std::vector<int8_t> tmpW;
|
||||
sparsify(w, n, 1, tmpW);
|
||||
|
||||
if (dtype == nvinfer1::DataType::kFLOAT)
|
||||
{
|
||||
transpose2DWeights<float>(spw.data(), tmpW.data(), n, k);
|
||||
}
|
||||
else if (dtype == nvinfer1::DataType::kHALF)
|
||||
{
|
||||
transpose2DWeights<half_float::half>(spw.data(), tmpW.data(), n, k);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
sparsify(w, n, 1, spw);
|
||||
}
|
||||
|
||||
w.values = spw.data();
|
||||
layer->setWeights(w);
|
||||
};
|
||||
for (auto& l : constantLayerToSparse)
|
||||
{
|
||||
sparsifyConstantWeights(l.first, l.second);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename L>
|
||||
void setSparseWeights(L& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
|
||||
{
|
||||
auto weights = l.getKernelWeights();
|
||||
sparsify(weights, k, trs, sparseWeights);
|
||||
weights.values = sparseWeights.data();
|
||||
l.setKernelWeights(weights);
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void setSparseWeights<IConvolutionLayer>(
|
||||
IConvolutionLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
|
||||
template void setSparseWeights<IFullyConnectedLayer>(
|
||||
IFullyConnectedLayer& l, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
|
||||
|
||||
void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights)
|
||||
{
|
||||
for (int32_t l = 0; l < network.getNbLayers(); ++l)
|
||||
{
|
||||
auto* layer = network.getLayer(l);
|
||||
auto const t = layer->getType();
|
||||
if (t == nvinfer1::LayerType::kCONVOLUTION)
|
||||
{
|
||||
auto& conv = *static_cast<IConvolutionLayer*>(layer);
|
||||
auto const& dims = conv.getKernelSizeNd();
|
||||
ASSERT(dims.nbDims == 2 || dims.nbDims == 3);
|
||||
auto const k = conv.getNbOutputMaps();
|
||||
auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies<int32_t>());
|
||||
sparseWeights.emplace_back();
|
||||
setSparseWeights(conv, k, trs, sparseWeights.back());
|
||||
}
|
||||
else if (t == nvinfer1::LayerType::kFULLY_CONNECTED)
|
||||
{
|
||||
auto& fc = *static_cast<nvinfer1::IFullyConnectedLayer*>(layer);
|
||||
auto const k = fc.getNbOutputChannels();
|
||||
sparseWeights.emplace_back();
|
||||
setSparseWeights(fc, k, 1, sparseWeights.back());
|
||||
}
|
||||
}
|
||||
|
||||
sparsifyMatMulKernelWeights(network, sparseWeights);
|
||||
}
|
||||
|
||||
void sparsify(Weights const& weights, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
|
||||
{
|
||||
switch (weights.type)
|
||||
{
|
||||
case DataType::kFLOAT:
|
||||
sparsify(static_cast<float const*>(weights.values), weights.count, k, trs, sparseWeights);
|
||||
break;
|
||||
case DataType::kHALF:
|
||||
sparsify(static_cast<half_float::half const*>(weights.values), weights.count, k, trs, sparseWeights);
|
||||
break;
|
||||
case DataType::kINT8:
|
||||
case DataType::kINT32:
|
||||
case DataType::kUINT8:
|
||||
case DataType::kBOOL:
|
||||
case DataType::kFP8: break;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void print(std::ostream& os, T v)
|
||||
{
|
||||
os << v;
|
||||
}
|
||||
|
||||
void print(std::ostream& os, int8_t v)
|
||||
{
|
||||
os << static_cast<int32_t>(v);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv)
|
||||
{
|
||||
auto const vol = volume(dims);
|
||||
T const* typedBuffer = static_cast<T const*>(buffer);
|
||||
std::string sep;
|
||||
for (int64_t v = 0; v < vol; ++v)
|
||||
{
|
||||
int64_t curV = v;
|
||||
int32_t dataOffset = 0;
|
||||
for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex)
|
||||
{
|
||||
int32_t dimVal = curV % dims.d[dimIndex];
|
||||
if (dimIndex == vectorDim)
|
||||
{
|
||||
dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv;
|
||||
}
|
||||
else
|
||||
{
|
||||
dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv);
|
||||
}
|
||||
curV /= dims.d[dimIndex];
|
||||
ASSERT(curV >= 0);
|
||||
}
|
||||
|
||||
os << sep;
|
||||
sep = separator;
|
||||
print(os, typedBuffer[dataOffset]);
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void dumpBuffer<bool>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
template void dumpBuffer<int32_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
template void dumpBuffer<int8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
template void dumpBuffer<float>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
template void dumpBuffer<__half>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
template void dumpBuffer<uint8_t>(void const* buffer, std::string const& separator, std::ostream& os, Dims const& dims,
|
||||
Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
|
||||
template <typename T>
|
||||
void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights)
|
||||
{
|
||||
auto const c = count / (k * trs);
|
||||
sparseWeights.resize(count * sizeof(T));
|
||||
auto* sparseValues = reinterpret_cast<T*>(sparseWeights.data());
|
||||
|
||||
constexpr int32_t window = 4;
|
||||
constexpr int32_t nonzeros = 2;
|
||||
|
||||
int32_t const crs = c * trs;
|
||||
auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; };
|
||||
|
||||
for (int64_t ki = 0; ki < k; ++ki)
|
||||
{
|
||||
for (int64_t rsi = 0; rsi < trs; ++rsi)
|
||||
{
|
||||
int32_t w = 0;
|
||||
int32_t nz = 0;
|
||||
for (int64_t ci = 0; ci < c; ++ci)
|
||||
{
|
||||
auto const index = getIndex(ki, ci, rsi);
|
||||
if (nz < nonzeros)
|
||||
{
|
||||
sparseValues[index] = values[index];
|
||||
++nz;
|
||||
}
|
||||
else
|
||||
{
|
||||
sparseValues[index] = 0;
|
||||
}
|
||||
if (++w == window)
|
||||
{
|
||||
w = 0;
|
||||
nz = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void sparsify<float>(
|
||||
float const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
|
||||
template void sparsify<half_float::half>(
|
||||
half_float::half const* values, int64_t count, int32_t k, int32_t trs, std::vector<int8_t>& sparseWeights);
|
||||
|
||||
template <typename T>
|
||||
void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n)
|
||||
{
|
||||
ASSERT(dst != src);
|
||||
T* tdst = reinterpret_cast<T*>(dst);
|
||||
T const* tsrc = reinterpret_cast<T const*>(src);
|
||||
for (int32_t mi = 0; mi < m; ++mi)
|
||||
{
|
||||
for (int32_t ni = 0; ni < n; ++ni)
|
||||
{
|
||||
int32_t const isrc = mi * n + ni;
|
||||
int32_t const idst = ni * m + mi;
|
||||
tdst[idst] = tsrc[isrc];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void transpose2DWeights<float>(void* dst, void const* src, int32_t const m, int32_t const n);
|
||||
template void transpose2DWeights<half_float::half>(void* dst, void const* src, int32_t const m, int32_t const n);
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type>
|
||||
void fillBuffer(void* buffer, int64_t volume, T min, T max)
|
||||
{
|
||||
T* typedBuffer = static_cast<T*>(buffer);
|
||||
std::default_random_engine engine;
|
||||
std::uniform_int_distribution<int32_t> distribution(min, max);
|
||||
auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
|
||||
std::generate(typedBuffer, typedBuffer + volume, generator);
|
||||
}
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, int32_t>::type>
|
||||
void fillBuffer(void* buffer, int64_t volume, T min, T max)
|
||||
{
|
||||
T* typedBuffer = static_cast<T*>(buffer);
|
||||
std::default_random_engine engine;
|
||||
std::uniform_real_distribution<float> distribution(min, max);
|
||||
auto generator = [&engine, &distribution]() { return static_cast<T>(distribution(engine)); };
|
||||
std::generate(typedBuffer, typedBuffer + volume, generator);
|
||||
}
|
||||
|
||||
// Explicit instantiation
|
||||
template void fillBuffer<bool>(void* buffer, int64_t volume, bool min, bool max);
|
||||
template void fillBuffer<float>(void* buffer, int64_t volume, float min, float max);
|
||||
template void fillBuffer<int32_t>(void* buffer, int64_t volume, int32_t min, int32_t max);
|
||||
template void fillBuffer<int8_t>(void* buffer, int64_t volume, int8_t min, int8_t max);
|
||||
template void fillBuffer<__half>(void* buffer, int64_t volume, __half min, __half max);
|
||||
template void fillBuffer<uint8_t>(void* buffer, int64_t volume, uint8_t min, uint8_t max);
|
||||
|
||||
} // namespace sample
|
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef TRT_SAMPLE_UTILS_H
|
||||
#define TRT_SAMPLE_UTILS_H
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <random>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include <cuda.h>
|
||||
#include <cuda_fp16.h>
|
||||
|
||||
#include "NvInfer.h"
|
||||
|
||||
#include "common.h"
|
||||
#include "logger.h"
|
||||
|
||||
#define SMP_RETVAL_IF_FALSE(condition, msg, retval, err) \
|
||||
{ \
|
||||
if ((condition) == false) \
|
||||
{ \
|
||||
(err) << (msg) << std::endl; \
|
||||
return retval; \
|
||||
} \
|
||||
}
|
||||
|
||||
namespace sample
|
||||
{
|
||||
|
||||
size_t dataTypeSize(nvinfer1::DataType dataType);
|
||||
|
||||
template <typename T>
|
||||
inline T roundUp(T m, T n)
|
||||
{
|
||||
return ((m + n - 1) / n) * n;
|
||||
}
|
||||
|
||||
//! comps is the number of components in a vector. Ignored if vecDim < 0.
|
||||
int64_t volume(nvinfer1::Dims const& dims, nvinfer1::Dims const& strides, int32_t vecDim, int32_t comps, int32_t batch);
|
||||
|
||||
using samplesCommon::volume;
|
||||
|
||||
nvinfer1::Dims toDims(std::vector<int32_t> const& vec);
|
||||
|
||||
template <typename T, typename std::enable_if<std::is_integral<T>::value, bool>::type = true>
|
||||
void fillBuffer(void* buffer, int64_t volume, T min, T max);
|
||||
|
||||
template <typename T, typename std::enable_if<!std::is_integral<T>::value, int32_t>::type = 0>
|
||||
void fillBuffer(void* buffer, int64_t volume, T min, T max);
|
||||
|
||||
template <typename T>
|
||||
void dumpBuffer(void const* buffer, std::string const& separator, std::ostream& os, nvinfer1::Dims const& dims,
|
||||
nvinfer1::Dims const& strides, int32_t vectorDim, int32_t spv);
|
||||
|
||||
void loadFromFile(std::string const& fileName, char* dst, size_t size);
|
||||
|
||||
std::vector<std::string> splitToStringVec(std::string const& option, char separator);
|
||||
|
||||
bool broadcastIOFormats(std::vector<IOFormat> const& formats, size_t nbBindings, bool isInput = true);
|
||||
|
||||
int32_t getCudaDriverVersion();
|
||||
|
||||
int32_t getCudaRuntimeVersion();
|
||||
|
||||
void sparsify(nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
|
||||
void sparsify(nvinfer1::Weights const& weights, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
|
||||
|
||||
// Walk the weights elements and overwrite (at most) 2 out of 4 elements to 0.
|
||||
template <typename T>
|
||||
void sparsify(T const* values, int64_t count, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
|
||||
|
||||
template <typename L>
|
||||
void setSparseWeights(L& l, int32_t k, int32_t rs, std::vector<int8_t>& sparseWeights);
|
||||
|
||||
// Sparsify the weights of Constant layers that are fed to MatMul via Shuffle layers.
|
||||
// Forward analysis on the API graph to determine which weights to sparsify.
|
||||
void sparsifyMatMulKernelWeights(
|
||||
nvinfer1::INetworkDefinition& network, std::vector<std::vector<int8_t>>& sparseWeights);
|
||||
|
||||
template <typename T>
|
||||
void transpose2DWeights(void* dst, void const* src, int32_t const m, int32_t const n);
|
||||
|
||||
} // namespace sample
|
||||
|
||||
#endif // TRT_SAMPLE_UTILS_H
|
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
#include "utils.h"
|
||||
#include "kernel_function.cuh"
|
||||
|
||||
|
||||
class MI_VisionInterface
|
||||
{
|
||||
public:
|
||||
~MI_VisionInterface() = default;
|
||||
|
||||
// 初始化engine
|
||||
virtual bool initEngine(const std::string& _onnxFileName) = 0;
|
||||
// 检查模型输入输出
|
||||
virtual bool check() = 0;
|
||||
// 推理
|
||||
virtual bool doTRTInfer(const std::vector<MN_VisionImage::MS_ImageParam>& _bufImg, std::vector<utils::MR_Result>* _detectRes, int* _user) = 0;
|
||||
// 推理
|
||||
virtual bool doTRTInfer(const std::vector<cv::Mat>& _matImgs, std::vector<utils::MR_Result>* _detectRes, int* _user) = 0;
|
||||
// 获取错误信息
|
||||
virtual std::string getError() = 0;
|
||||
// 释放数据/内存
|
||||
virtual void freeMemeory() = 0;
|
||||
|
||||
|
||||
virtual bool measureAxis(std::vector<double>& measureRes, const MN_VisionImage::MS_ImageParam& _bufImg) = 0;
|
||||
|
||||
};
|
||||
|
||||
// 导出接口类
|
||||
MI_ALGORITHM_EXPORT MI_VisionInterface* getInterfacePtr(const utils::InitParameter& _params);
|
||||
|
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
typedef unsigned char uchar;
|
||||
|
||||
namespace MN_VisionImage {
|
||||
|
||||
enum class ME_ImageType
|
||||
{
|
||||
E_GRAY = 0,
|
||||
E_RGB,
|
||||
E_RGBA
|
||||
};
|
||||
|
||||
struct MS_ImageParam
|
||||
{
|
||||
//无参构造
|
||||
MS_ImageParam() :
|
||||
m_width(-1),
|
||||
m_height(-1),
|
||||
m_channels(0),
|
||||
mImgType(MN_VisionImage::ME_ImageType::E_RGB)
|
||||
{}
|
||||
|
||||
//有参构造函数
|
||||
MS_ImageParam(uchar* _buffer, int _nW, int _nH, const ME_ImageType& _imgType)
|
||||
{
|
||||
int _nChannels = 0;
|
||||
if (_imgType == ME_ImageType::E_GRAY)
|
||||
{
|
||||
_nChannels = 1;
|
||||
}
|
||||
else if (_imgType == ME_ImageType::E_RGBA)
|
||||
{
|
||||
_nChannels = 4;
|
||||
}
|
||||
else
|
||||
{
|
||||
_nChannels = 3;
|
||||
}
|
||||
|
||||
m_width = _nW;
|
||||
m_height = _nH;
|
||||
m_channels = _nChannels;
|
||||
mImgType = _imgType;
|
||||
int iSize = _nW * _nH * _nChannels; //图像的像素数
|
||||
m_data = std::shared_ptr<uchar>(new uchar[iSize], [](uchar* p) {
|
||||
if (p != nullptr)
|
||||
{
|
||||
delete[] p;
|
||||
p = nullptr;
|
||||
}
|
||||
});
|
||||
|
||||
memcpy(m_data.get(), _buffer, iSize);
|
||||
}
|
||||
|
||||
std::shared_ptr<uchar> m_data; // 图像数据
|
||||
int m_width; // 图像宽度
|
||||
int m_height; // 图像高度
|
||||
int m_channels; // 图像通道数
|
||||
ME_ImageType mImgType; // 图像类型
|
||||
};
|
||||
}
|
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
// tensorrt
|
||||
#include "argsParser.h"
|
||||
#include "buffers.h"
|
||||
#include "common.h"
|
||||
#include "logger.h"
|
||||
#include "parserOnnxConfig.h"
|
||||
#include "NvOnnxParser.h"
|
||||
#include <NvInfer.h>
|
||||
// cuda
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda.h>
|
||||
#include <stdio.h>
|
||||
#include <thrust/sort.h>
|
||||
#include <cuda_device_runtime_api.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <device_launch_parameters.h>
|
||||
#include <device_atomic_functions.h>
|
||||
// opencv
|
||||
#include <opencv2/opencv.hpp>
|
||||
// cpp std
|
||||
#include "Windows.h"
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
#include <math.h>
|
||||
#include <filesystem>
|
||||
|
||||
#include "MS_Image_Param.h"
|
@ -0,0 +1,43 @@
|
||||
#pragma once
|
||||
#include "common_include.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define checkRuntime(op) __check_cuda_runtime((op), #op, __FILE__, __LINE__)
|
||||
|
||||
bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, int line);
|
||||
|
||||
#define BLOCK_SIZE 8
|
||||
|
||||
//note: resize rgb with padding
|
||||
void resizeDevice(const int& batch_size, float* src, int src_width, int src_height,
|
||||
float* dst, int dstWidth, int dstHeight,
|
||||
float paddingValue, utils::AffineMat matrix);
|
||||
|
||||
//overload:resize rgb with padding, but src's type is uin8
|
||||
void resizeDevice(const int& batch_size, unsigned char* src, int src_width, int src_height,
|
||||
float* dst, int dstWidth, int dstHeight,
|
||||
float paddingValue, utils::AffineMat matrix);
|
||||
|
||||
// overload: resize rgb/gray without padding
|
||||
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
||||
float* dst, int dstWidth, int dstHeight,
|
||||
utils::ColorMode mode, utils::AffineMat matrix);
|
||||
|
||||
void bgr2rgbDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
|
||||
float* dst, int dstWidth, int dstHeight);
|
||||
|
||||
void normDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
|
||||
float* dst, int dstWidth, int dstHeight,
|
||||
utils::InitParameter norm_param);
|
||||
|
||||
void hwc2chwDevice(const int& batch_size, float* src, int srcWidth, int srcHeight,
|
||||
float* dst, int dstWidth, int dstHeight);
|
||||
|
||||
void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcLength, float* dst, int dstWidth, int dstHeight);
|
||||
|
||||
// nms fast
|
||||
void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea);
|
||||
|
||||
// nms sort
|
||||
void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
|
||||
int* idx, float* conf);
|
@ -0,0 +1,280 @@
|
||||
#pragma once
|
||||
#include "common_include.h"
|
||||
|
||||
|
||||
// 接口导出
|
||||
#ifndef BUILD_STATIC
|
||||
# if defined(MI_ALGORITHM_LIB)
|
||||
# define MI_ALGORITHM_EXPORT __declspec(dllexport)
|
||||
# else
|
||||
# define MI_ALGORITHM_EXPORT __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# define MI_ALGORITHM_EXPORT
|
||||
#endif
|
||||
|
||||
|
||||
namespace utils
|
||||
{
|
||||
namespace dataSets
|
||||
{
|
||||
const std::vector<std::string> coco80 = {
|
||||
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
|
||||
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow",
|
||||
"elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee",
|
||||
"skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
|
||||
"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple",
|
||||
"sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch",
|
||||
"potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone",
|
||||
"microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear",
|
||||
"hair drier", "toothbrush"
|
||||
};
|
||||
const std::vector<std::string> coco91 = {
|
||||
"person", "bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light",
|
||||
"fire hydrant","street sign","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe",
|
||||
"hat","backpack","umbrella","shoe","eye glasses","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat",
|
||||
"baseball glove","skateboard","surfboard","tennis racket","bottle","plate","wine glass","cup","fork","knife","spoon","bowl","banana","apple",
|
||||
"sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","mirror","dining table","window",
|
||||
"desk","toilet","door","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","blender",
|
||||
"book","clock","vase","scissors","teddy bear","hair drier","toothbrush","hair brush"
|
||||
};
|
||||
const std::vector<std::string> voc20 = {
|
||||
"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable",
|
||||
"dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"
|
||||
};
|
||||
|
||||
const std::vector<std::string> face2 = { "non-face", "face" };
|
||||
|
||||
// flower_data
|
||||
const std::vector<std::string> flower_labels = { "dailsy", "dandelion", "rose", "sunflower", "tulip" };
|
||||
}
|
||||
namespace Colors
|
||||
{
|
||||
const std::vector<cv::Scalar> color80{
|
||||
cv::Scalar(128, 77, 207),cv::Scalar(65, 32, 208),cv::Scalar(0, 224, 45),cv::Scalar(3, 141, 219),cv::Scalar(80, 239, 253),cv::Scalar(239, 184, 12),
|
||||
cv::Scalar(7, 144, 145),cv::Scalar(161, 88, 57),cv::Scalar(0, 166, 46),cv::Scalar(218, 113, 53),cv::Scalar(193, 33, 128),cv::Scalar(190, 94, 113),
|
||||
cv::Scalar(113, 123, 232),cv::Scalar(69, 205, 80),cv::Scalar(18, 170, 49),cv::Scalar(89, 51, 241),cv::Scalar(153, 191, 154),cv::Scalar(27, 26, 69),
|
||||
cv::Scalar(20, 186, 194),cv::Scalar(210, 202, 167),cv::Scalar(196, 113, 204),cv::Scalar(9, 81, 88),cv::Scalar(191, 162, 67),cv::Scalar(227, 73, 120),
|
||||
cv::Scalar(177, 31, 19),cv::Scalar(133, 102, 137),cv::Scalar(146, 72, 97),cv::Scalar(145, 243, 208),cv::Scalar(2, 184, 176),cv::Scalar(219, 220, 93),
|
||||
cv::Scalar(238, 153, 134),cv::Scalar(197, 169, 160),cv::Scalar(204, 201, 106),cv::Scalar(13, 24, 129),cv::Scalar(40, 38, 4),cv::Scalar(5, 41, 34),
|
||||
cv::Scalar(46, 94, 129),cv::Scalar(102, 65, 107),cv::Scalar(27, 11, 208),cv::Scalar(191, 240, 183),cv::Scalar(225, 76, 38),cv::Scalar(193, 89, 124),
|
||||
cv::Scalar(30, 14, 175),cv::Scalar(144, 96, 90),cv::Scalar(181, 186, 86),cv::Scalar(102, 136, 34),cv::Scalar(158, 71, 15),cv::Scalar(183, 81, 247),
|
||||
cv::Scalar(73, 69, 89),cv::Scalar(123, 73, 232),cv::Scalar(4, 175, 57),cv::Scalar(87, 108, 23),cv::Scalar(105, 204, 142),cv::Scalar(63, 115, 53),
|
||||
cv::Scalar(105, 153, 126),cv::Scalar(247, 224, 137),cv::Scalar(136, 21, 188),cv::Scalar(122, 129, 78),cv::Scalar(145, 80, 81),cv::Scalar(51, 167, 149),
|
||||
cv::Scalar(162, 173, 20),cv::Scalar(252, 202, 17),cv::Scalar(10, 40, 3),cv::Scalar(150, 90, 254),cv::Scalar(169, 21, 68),cv::Scalar(157, 148, 180),
|
||||
cv::Scalar(131, 254, 90),cv::Scalar(7, 221, 102),cv::Scalar(19, 191, 184),cv::Scalar(98, 126, 199),cv::Scalar(210, 61, 56),cv::Scalar(252, 86, 59),
|
||||
cv::Scalar(102, 195, 55),cv::Scalar(160, 26, 91),cv::Scalar(60, 94, 66),cv::Scalar(204, 169, 193),cv::Scalar(126, 4, 181),cv::Scalar(229, 209, 196),
|
||||
cv::Scalar(195, 170, 186),cv::Scalar(155, 207, 148)
|
||||
};
|
||||
const std::vector<cv::Scalar> color91{
|
||||
cv::Scalar(148, 99, 164),cv::Scalar(65, 172, 90),cv::Scalar(18, 117, 190),cv::Scalar(173, 208, 229),cv::Scalar(37, 162, 147),cv::Scalar(121, 99, 42),
|
||||
cv::Scalar(218, 173, 104),cv::Scalar(193, 213, 138),cv::Scalar(142, 168, 45),cv::Scalar(107, 143, 94),cv::Scalar(242, 89, 7),cv::Scalar(87, 218, 248),
|
||||
cv::Scalar(126, 168, 9),cv::Scalar(86, 152, 105),cv::Scalar(155, 135, 251),cv::Scalar(73, 234, 44),cv::Scalar(177, 37, 42),cv::Scalar(219, 215, 54),
|
||||
cv::Scalar(124, 207, 143),cv::Scalar(7, 81, 209),cv::Scalar(254, 18, 130),cv::Scalar(71, 54, 73),cv::Scalar(172, 198, 63),cv::Scalar(64, 217, 224),
|
||||
cv::Scalar(105, 224, 25),cv::Scalar(41, 52, 130),cv::Scalar(220, 27, 193),cv::Scalar(65, 222, 86),cv::Scalar(250, 150, 201),cv::Scalar(201, 150, 105),
|
||||
cv::Scalar(104, 96, 142),cv::Scalar(111, 230, 54),cv::Scalar(105, 24, 22),cv::Scalar(42, 226, 101),cv::Scalar(67, 26, 144),cv::Scalar(155, 113, 106),
|
||||
cv::Scalar(152, 196, 216),cv::Scalar(58, 68, 152),cv::Scalar(68, 230, 213),cv::Scalar(169, 143, 129),cv::Scalar(191, 102, 41),cv::Scalar(5, 73, 170),
|
||||
cv::Scalar(15, 73, 233),cv::Scalar(95, 13, 71),cv::Scalar(25, 92, 218),cv::Scalar(85, 173, 16),cv::Scalar(247, 158, 17),cv::Scalar(36, 28, 8),
|
||||
cv::Scalar(31, 100, 134),cv::Scalar(131, 71, 45),cv::Scalar(158, 190, 91),cv::Scalar(90, 207, 220),cv::Scalar(125, 77, 228),cv::Scalar(40, 156, 67),
|
||||
cv::Scalar(35, 250, 69),cv::Scalar(229, 61, 245),cv::Scalar(210, 201, 106),cv::Scalar(184, 35, 131),cv::Scalar(47, 124, 120),cv::Scalar(1, 114, 23),
|
||||
cv::Scalar(99, 181, 17),cv::Scalar(77, 141, 151),cv::Scalar(79, 33, 95),cv::Scalar(194, 111, 146),cv::Scalar(187, 199, 138),cv::Scalar(129, 215, 40),
|
||||
cv::Scalar(160, 209, 144),cv::Scalar(139, 121, 58),cv::Scalar(97, 208, 197),cv::Scalar(185, 105, 171),cv::Scalar(160, 96, 136),cv::Scalar(232, 26, 26),
|
||||
cv::Scalar(34, 165, 109),cv::Scalar(19, 86, 215),cv::Scalar(205, 209, 199),cv::Scalar(131, 91, 25),cv::Scalar(51, 201, 16),cv::Scalar(64, 35, 128),
|
||||
cv::Scalar(120, 161, 247),cv::Scalar(123, 164, 190),cv::Scalar(15, 191, 40),cv::Scalar(11, 44, 117),cv::Scalar(198, 136, 70),cv::Scalar(14, 224, 240),
|
||||
cv::Scalar(60, 186, 193),cv::Scalar(253, 190, 129),cv::Scalar(134, 228, 173),cv::Scalar(219, 156, 214),cv::Scalar(137, 67, 254),cv::Scalar(178, 223, 250),
|
||||
cv::Scalar(219, 199, 139)
|
||||
};
|
||||
const std::vector<cv::Scalar> color20{
|
||||
cv::Scalar(128, 77, 207),cv::Scalar(65, 32, 208),cv::Scalar(0, 224, 45),cv::Scalar(3, 141, 219),cv::Scalar(80, 239, 253),cv::Scalar(239, 184, 12),
|
||||
cv::Scalar(7, 144, 145),cv::Scalar(161, 88, 57),cv::Scalar(0, 166, 46),cv::Scalar(218, 113, 53),cv::Scalar(193, 33, 128),cv::Scalar(190, 94, 113),
|
||||
cv::Scalar(113, 123, 232),cv::Scalar(69, 205, 80),cv::Scalar(18, 170, 49),cv::Scalar(89, 51, 241),cv::Scalar(153, 191, 154),cv::Scalar(27, 26, 69),
|
||||
cv::Scalar(20, 186, 194),cv::Scalar(210, 202, 167),cv::Scalar(196, 113, 204),cv::Scalar(9, 81, 88),cv::Scalar(191, 162, 67),cv::Scalar(227, 73, 120)
|
||||
};
|
||||
}
|
||||
|
||||
// JC_Xiong-20240424
|
||||
// 模型类型/功能,通过该枚举实例化模型对象
|
||||
enum class ME_ModelType
|
||||
{
|
||||
E_RESNET34 = 0,
|
||||
E_RESNET50,
|
||||
E_YOLOV8,
|
||||
};
|
||||
|
||||
// 当前产品检测结果
|
||||
enum class ME_DetectRes
|
||||
{
|
||||
E_DETECT_OK = 0,
|
||||
E_DETECT_NG,
|
||||
E_DETECT_NONE,
|
||||
};
|
||||
|
||||
// 图像分类返回结果
|
||||
typedef struct MS_Classification
|
||||
{
|
||||
MS_Classification() :mDetectRes(ME_DetectRes::E_DETECT_OK), mConfidence(0.0), mLabel("")
|
||||
{}
|
||||
|
||||
ME_DetectRes mDetectRes;
|
||||
double mConfidence;
|
||||
std::string mLabel;
|
||||
}MS_ClassificationParam;
|
||||
|
||||
struct Box
|
||||
{
|
||||
float left, top, right, bottom, confidence;
|
||||
int label;
|
||||
std::vector<cv::Point2i> land_marks;
|
||||
|
||||
Box() = default;
|
||||
Box(float left, float top, float right, float bottom, float confidence, int label) :
|
||||
left(left), top(top), right(right), bottom(bottom), confidence(confidence), label(label) {}
|
||||
|
||||
Box(float left, float top, float right, float bottom, float confidence, int label, int numLandMarks) :
|
||||
left(left), top(top), right(right), bottom(bottom), confidence(confidence), label(label)
|
||||
{
|
||||
land_marks.reserve(numLandMarks);
|
||||
}
|
||||
};
|
||||
|
||||
// 目标检测返回结果
|
||||
typedef struct MS_ObjectDetect
|
||||
{
|
||||
MS_ObjectDetect() :mDetectRes(ME_DetectRes::E_DETECT_OK), mBoxVec(std::vector<Box>())
|
||||
{}
|
||||
|
||||
ME_DetectRes mDetectRes;
|
||||
std::vector<Box> mBoxVec;
|
||||
}MS_ObjectDetectParam;
|
||||
|
||||
|
||||
struct MR_Result
|
||||
{
|
||||
MR_Result() : mObjectDecRes(), mClassifyDecRes()
|
||||
{}
|
||||
|
||||
MS_ClassificationParam mClassifyDecRes;
|
||||
std::vector<std::vector<Box>> mObjectDecRes;
|
||||
};
|
||||
|
||||
struct InitParameter
|
||||
{
|
||||
InitParameter() :num_class(5), dynamic_batch(false), batch_size(1), dst_h(0), dst_w(0), scale(255.0f),
|
||||
meanVec{ 0.0f,0.0f,0.0f }, stdVec{ 1.0f,1.0f,1.0f }, iou_thresh(0.5), conf_thresh(0.5), topK(1000),
|
||||
save_path(""), char_width(11), det_info_render_width(15), font_scale(0.6), is_show(false), is_save(false)
|
||||
{}
|
||||
|
||||
|
||||
ME_ModelType m_modelType; // 模型类型
|
||||
int num_class; // flower_data
|
||||
std::vector<std::string> class_names;
|
||||
std::vector<std::string> input_output_names;
|
||||
|
||||
bool dynamic_batch;
|
||||
int batch_size; // 推理的批量数据
|
||||
MN_VisionImage::MS_ImageParam mImage;
|
||||
int dst_h, dst_w; // 输入到模型的图像宽高
|
||||
|
||||
float scale;
|
||||
std::vector<float> stdVec;
|
||||
std::vector<float> meanVec;
|
||||
|
||||
float iou_thresh;
|
||||
float conf_thresh;
|
||||
|
||||
int topK;
|
||||
std::string save_path;
|
||||
|
||||
std::string winname = "TensorRT-Infer";
|
||||
int char_width;
|
||||
int det_info_render_width;
|
||||
double font_scale;
|
||||
bool is_show;
|
||||
bool is_save;
|
||||
};
|
||||
|
||||
// legacy
|
||||
struct CandidateObject
|
||||
{
|
||||
float mBboxAndkeyPoints[14]; // bbox:[x y w h] + 5 facial key points:[x1 y1 x2 y2 ...x5 y5]
|
||||
float mScore;
|
||||
bool mIsGood;
|
||||
CandidateObject()
|
||||
{
|
||||
std::fill_n(mBboxAndkeyPoints, 14, FLT_MAX);
|
||||
mScore = FLT_MAX;
|
||||
mIsGood = true;
|
||||
}
|
||||
CandidateObject(float* bboxAndkeyPoints, float score, bool isGood) :
|
||||
mScore(score),
|
||||
mIsGood(isGood)
|
||||
{
|
||||
memcpy(mBboxAndkeyPoints, bboxAndkeyPoints, 14 * sizeof(float));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
enum class InputStream { IMAGE, VIDEO, CAMERA };
|
||||
|
||||
enum class ColorMode { RGB, GRAY };
|
||||
|
||||
struct AffineMat
|
||||
{
|
||||
float v0, v1, v2;
|
||||
float v3, v4, v5;
|
||||
};
|
||||
|
||||
|
||||
void saveBinaryFile(float* vec, size_t len, const std::string& file);
|
||||
|
||||
std::vector<uint8_t> readBinaryFile(const std::string& file);
|
||||
|
||||
std::vector<unsigned char> loadModel(const std::string& file);
|
||||
|
||||
std::string getSystemTimeStr();
|
||||
|
||||
bool setInputStream(const InputStream& source, const std::string& imagePath, const std::string& videoPath, const int& cameraID,
|
||||
cv::VideoCapture& capture, int& totalBatches, int& delayTime, InitParameter& param);
|
||||
|
||||
void setRenderWindow(InitParameter& param);
|
||||
|
||||
std::string getTimeStamp();
|
||||
|
||||
void show(const std::vector<std::vector<Box>>& objectss,
|
||||
const std::vector<std::string>& classNames,
|
||||
const int& cvDelayTime, std::vector<cv::Mat>& imgsBatch);
|
||||
|
||||
void save(const std::vector<std::vector<Box>>& objectss,
|
||||
const std::vector<std::string>& classNames,
|
||||
const std::string& savePath, std::vector<cv::Mat>& imgsBatch,
|
||||
const int& batchSize, const int& batchi);
|
||||
|
||||
class HostTimer
|
||||
{
|
||||
public:
|
||||
HostTimer();
|
||||
float getUsedTime(); // while timing for cuda code, add "cudaDeviceSynchronize();" before this
|
||||
~HostTimer();
|
||||
|
||||
private:
|
||||
std::chrono::steady_clock::time_point t1;
|
||||
std::chrono::steady_clock::time_point t2;
|
||||
};
|
||||
|
||||
|
||||
class DeviceTimer
|
||||
{
|
||||
public:
|
||||
DeviceTimer();
|
||||
float getUsedTime();
|
||||
// overload
|
||||
DeviceTimer(cudaStream_t ctream);
|
||||
float getUsedTime(cudaStream_t ctream);
|
||||
|
||||
~DeviceTimer();
|
||||
|
||||
private:
|
||||
cudaEvent_t start, end;
|
||||
};
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ImportGroup Label="PropertySheets" />
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>..\MF_TRTInfer\lib\opencv_lib\include\opencv2;..\MF_TRTInfer\lib\opencv_lib\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalLibraryDirectories>..\MF_TRTInfer\lib\opencv_lib\x64\vc15\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>opencv_world453.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup />
|
||||
</Project>
|