You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
303 lines
8.4 KiB
C++
303 lines
8.4 KiB
C++
/*
|
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef TRT_SAMPLE_REPORTING_H
|
|
#define TRT_SAMPLE_REPORTING_H
|
|
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
|
|
#include "NvInfer.h"
|
|
|
|
#include "sampleDevice.h"
|
|
#include "sampleInference.h"
|
|
#include "sampleOptions.h"
|
|
#include "sampleUtils.h"
|
|
|
|
namespace sample
|
|
{
|
|
|
|
class Bindings;
|
|
|
|
//!
|
|
//! \struct InferenceTime
|
|
//! \brief Measurement times in milliseconds
|
|
//!
|
|
struct InferenceTime
|
|
{
|
|
InferenceTime(float q, float i, float c, float o)
|
|
: enq(q)
|
|
, h2d(i)
|
|
, compute(c)
|
|
, d2h(o)
|
|
{
|
|
}
|
|
|
|
InferenceTime() = default;
|
|
InferenceTime(InferenceTime const&) = default;
|
|
InferenceTime(InferenceTime&&) = default;
|
|
InferenceTime& operator=(InferenceTime const&) = default;
|
|
InferenceTime& operator=(InferenceTime&&) = default;
|
|
~InferenceTime() = default;
|
|
|
|
float enq{0}; // Enqueue
|
|
float h2d{0}; // Host to Device
|
|
float compute{0}; // Compute
|
|
float d2h{0}; // Device to Host
|
|
|
|
// ideal latency
|
|
float latency() const
|
|
{
|
|
return h2d + compute + d2h;
|
|
}
|
|
};
|
|
|
|
//!
|
|
//! \struct InferenceTrace
|
|
//! \brief Measurement points in milliseconds
|
|
//!
|
|
struct InferenceTrace
|
|
{
|
|
InferenceTrace(int32_t s, float es, float ee, float is, float ie, float cs, float ce, float os, float oe)
|
|
: stream(s)
|
|
, enqStart(es)
|
|
, enqEnd(ee)
|
|
, h2dStart(is)
|
|
, h2dEnd(ie)
|
|
, computeStart(cs)
|
|
, computeEnd(ce)
|
|
, d2hStart(os)
|
|
, d2hEnd(oe)
|
|
{
|
|
}
|
|
|
|
InferenceTrace() = default;
|
|
InferenceTrace(InferenceTrace const&) = default;
|
|
InferenceTrace(InferenceTrace&&) = default;
|
|
InferenceTrace& operator=(InferenceTrace const&) = default;
|
|
InferenceTrace& operator=(InferenceTrace&&) = default;
|
|
~InferenceTrace() = default;
|
|
|
|
int32_t stream{0};
|
|
float enqStart{0};
|
|
float enqEnd{0};
|
|
float h2dStart{0};
|
|
float h2dEnd{0};
|
|
float computeStart{0};
|
|
float computeEnd{0};
|
|
float d2hStart{0};
|
|
float d2hEnd{0};
|
|
};
|
|
|
|
inline InferenceTime operator+(InferenceTime const& a, InferenceTime const& b)
|
|
{
|
|
return InferenceTime(a.enq + b.enq, a.h2d + b.h2d, a.compute + b.compute, a.d2h + b.d2h);
|
|
}
|
|
|
|
inline InferenceTime operator+=(InferenceTime& a, InferenceTime const& b)
|
|
{
|
|
return a = a + b;
|
|
}
|
|
|
|
//!
|
|
//! \struct PerformanceResult
|
|
//! \brief Performance result of a performance metric
|
|
//!
|
|
struct PerformanceResult
|
|
{
|
|
float min{0.F};
|
|
float max{0.F};
|
|
float mean{0.F};
|
|
float median{0.F};
|
|
std::vector<float> percentiles;
|
|
float coeffVar{0.F}; // coefficient of variation
|
|
};
|
|
|
|
//!
|
|
//! \brief Print benchmarking time and number of traces collected
|
|
//!
|
|
void printProlog(int32_t warmups, int32_t timings, float warmupMs, float walltime, std::ostream& os);
|
|
|
|
//!
|
|
//! \brief Print a timing trace
|
|
//!
|
|
void printTiming(std::vector<InferenceTime> const& timings, int32_t runsPerAvg, std::ostream& os);
|
|
|
|
//!
|
|
//! \brief Print the performance summary of a trace
|
|
//!
|
|
void printEpilog(std::vector<InferenceTime> const& timings, std::vector<float> const& percentiles, int32_t batchSize,
|
|
std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
|
|
|
|
//!
|
|
//! \brief Get the result of a specific performance metric from a trace
|
|
//!
|
|
PerformanceResult getPerformanceResult(std::vector<InferenceTime> const& timings,
|
|
std::function<float(InferenceTime const&)> metricGetter, std::vector<float> const& percentiles);
|
|
|
|
//!
|
|
//! \brief Print the explanations of the performance metrics printed in printEpilog() function.
|
|
//!
|
|
void printMetricExplanations(std::ostream& os);
|
|
|
|
//!
|
|
//! \brief Print and summarize a timing trace
|
|
//!
|
|
void printPerformanceReport(std::vector<InferenceTrace> const& trace, ReportingOptions const& reportingOpts,
|
|
InferenceOptions const& infOpts, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose);
|
|
|
|
//!
|
|
//! \brief Export a timing trace to JSON file
|
|
//!
|
|
void exportJSONTrace(
|
|
std::vector<InferenceTrace> const& InferenceTime, std::string const& fileName, int32_t const nbWarmups);
|
|
|
|
//!
|
|
//! \brief Print input tensors to stream
|
|
//!
|
|
void dumpInputs(nvinfer1::IExecutionContext const& context, Bindings const& bindings, std::ostream& os);
|
|
|
|
//!
|
|
//! \brief Print output tensors to stream
|
|
//!
|
|
template <typename ContextType>
|
|
void dumpOutputs(ContextType const& context, Bindings const& bindings, std::ostream& os);
|
|
|
|
template <typename ContextType>
|
|
void dumpRawBindingsToFiles(ContextType const& context, Bindings const& bindings, std::ostream& os);
|
|
|
|
//!
|
|
//! \brief Export output tensors to JSON file
|
|
//!
|
|
template <typename ContextType>
|
|
void exportJSONOutput(
|
|
ContextType const& context, Bindings const& bindings, std::string const& fileName, int32_t batch);
|
|
|
|
|
|
//!
|
|
//! \struct LayerProfile
|
|
//! \brief Layer profile information
|
|
//!
|
|
struct LayerProfile
|
|
{
|
|
std::string name;
|
|
std::vector<float> timeMs;
|
|
};
|
|
|
|
//!
|
|
//! \class Profiler
|
|
//! \brief Collect per-layer profile information, assuming times are reported in the same order
|
|
//!
|
|
class Profiler : public nvinfer1::IProfiler
|
|
{
|
|
|
|
public:
|
|
void reportLayerTime(char const* layerName, float timeMs) noexcept override;
|
|
|
|
void print(std::ostream& os) const noexcept;
|
|
|
|
//!
|
|
//! \brief Export a profile to JSON file
|
|
//!
|
|
void exportJSONProfile(std::string const& fileName) const noexcept;
|
|
|
|
private:
|
|
float getTotalTime() const noexcept
|
|
{
|
|
auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) {
|
|
return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus<float>());
|
|
};
|
|
return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime);
|
|
}
|
|
|
|
float getMedianTime() const noexcept
|
|
{
|
|
if (mLayers.empty())
|
|
{
|
|
return 0.F;
|
|
}
|
|
std::vector<float> totalTime;
|
|
for (size_t run = 0; run < mLayers[0].timeMs.size(); ++run)
|
|
{
|
|
auto const layerTime
|
|
= [&run](float accumulator, LayerProfile const& lp) { return accumulator + lp.timeMs[run]; };
|
|
auto t = std::accumulate(mLayers.begin(), mLayers.end(), 0.F, layerTime);
|
|
totalTime.push_back(t);
|
|
}
|
|
return median(totalTime);
|
|
}
|
|
|
|
float getMedianTime(LayerProfile const& p) const noexcept
|
|
{
|
|
return median(p.timeMs);
|
|
}
|
|
|
|
static float median(std::vector<float> vals)
|
|
{
|
|
if (vals.empty())
|
|
{
|
|
return 0.F;
|
|
}
|
|
std::sort(vals.begin(), vals.end());
|
|
if (vals.size() % 2U == 1U)
|
|
{
|
|
return vals[vals.size() / 2U];
|
|
}
|
|
return (vals[vals.size() / 2U - 1U] + vals[vals.size() / 2U]) * 0.5F;
|
|
}
|
|
|
|
//! return the total runtime of given layer profile
|
|
float getTotalTime(LayerProfile const& p) const noexcept
|
|
{
|
|
auto const& vals = p.timeMs;
|
|
return std::accumulate(vals.begin(), vals.end(), 0.F, std::plus<float>());
|
|
}
|
|
|
|
float getAvgTime(LayerProfile const& p) const noexcept
|
|
{
|
|
return getTotalTime(p) / p.timeMs.size();
|
|
}
|
|
|
|
std::vector<LayerProfile> mLayers;
|
|
std::vector<LayerProfile>::iterator mIterator{mLayers.begin()};
|
|
int32_t mUpdatesCount{0};
|
|
};
|
|
|
|
//!
|
|
//! \brief Print layer info to logger or export it to output JSON file.
|
|
//!
|
|
bool printLayerInfo(
|
|
ReportingOptions const& reporting, nvinfer1::ICudaEngine* engine, nvinfer1::IExecutionContext* context);
|
|
|
|
//! Forward declaration.
|
|
struct InferenceEnvironment;
|
|
|
|
//!
|
|
//! \brief Print per-layer perf profile data to logger or export it to output JSON file.
|
|
//!
|
|
void printPerformanceProfile(ReportingOptions const& reporting, InferenceEnvironment& iEnv);
|
|
|
|
//!
|
|
//! \brief Print binding output values to logger or export them to output JSON file.
|
|
//!
|
|
void printOutput(ReportingOptions const& reporting, InferenceEnvironment const& iEnv, int32_t batch);
|
|
|
|
} // namespace sample
|
|
|
|
#endif // TRT_SAMPLE_REPORTING_H
|