You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
7.9 KiB
C++

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include "MF_Yolov8Infer.h"
#include "ML_Log.h"
namespace fs = std::filesystem;
MF_Yolov8Infer::MF_Yolov8Infer(const trtUtils::InitParameter& param) : MF_ObjectDetectBase(param)
{
}
MF_Yolov8Infer::~MF_Yolov8Infer()
{
checkRuntime(cudaFree(m_output_src_transpose_device));
}
bool MF_Yolov8Infer::initEngine(const std::string& _onnxFileName)
{
// 判断传入的onnx文件是否存在
fs::path onnx_path(_onnxFileName);
if (!fs::exists(onnx_path))
{
LOG_ERROR("init engine, onnx file does not exist. \n");
return false;
}
// 替换文件扩展名,将.onnx 替换为 .trt并判断 trt 模型是否已经存在
// 若本地存在trt模型则直接加载trt模型并构建engine
fs::path extension("trt");
fs::path trt_Path = onnx_path.replace_extension(extension);
// std::string trtFileName = trt_Path.string();
if (fs::exists(trt_Path))
{
LOG_INFO("trt model has existed.\n");
std::vector<uchar> engine_data;
int iRet = loadTRTModelData(trt_Path.string(), engine_data);
if (iRet != 0)
{
LOG_ERROR("load trt model failed.\n");
return false;
}
auto runtime = std::unique_ptr<nvinfer1::IRuntime>(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger()));
if (!runtime)
{
LOG_ERROR("on the init engine, create infer runtime failed.\n");
return false;
}
m_engine = std::unique_ptr<nvinfer1::ICudaEngine>(runtime->deserializeCudaEngine(engine_data.data(), engine_data.size()));
if (!m_engine)
{
LOG_ERROR("on the init engine, deserialize engine failed.\n");
return false;
}
m_context = std::unique_ptr<nvinfer1::IExecutionContext>(m_engine->createExecutionContext());
if (!m_context)
{
LOG_ERROR("on the init engine, create excution context failed.\n");
return false;
}
if (m_param.dynamic_batch)
{
m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w));
}
m_output_dims = this->m_context->getBindingDimensions(1);
m_total_objects = m_output_dims.d[2];
assert(m_param.batch_size <= m_output_dims.d[0]);
m_output_area = 1;
for (int i = 1; i < m_output_dims.nbDims; i++)
{
if (m_output_dims.d[i] != 0)
{
m_output_area *= m_output_dims.d[i];
}
}
checkRuntime(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float)));
checkRuntime(cudaMalloc(&m_output_src_transpose_device, m_param.batch_size * m_output_area * sizeof(float)));
float a = float(m_param.dst_h) / m_param.mImage.m_height;
float b = float(m_param.dst_w) / m_param.mImage.m_width;
float scale = a < b ? a : b;
cv::Mat src2dst = (cv::Mat_<float>(2, 3) << scale, 0.f, (-scale * m_param.mImage.m_width + m_param.dst_w + scale - 1) * 0.5,
0.f, scale, (-scale * m_param.mImage.m_height + m_param.dst_h + scale - 1) * 0.5);
cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1);
cv::invertAffineTransform(src2dst, dst2src);
m_dst2src.v0 = dst2src.ptr<float>(0)[0];
m_dst2src.v1 = dst2src.ptr<float>(0)[1];
m_dst2src.v2 = dst2src.ptr<float>(0)[2];
m_dst2src.v3 = dst2src.ptr<float>(1)[0];
m_dst2src.v4 = dst2src.ptr<float>(1)[1];
m_dst2src.v5 = dst2src.ptr<float>(1)[2];
return true;
}
return false;
}
bool MF_Yolov8Infer::doTRTInfer(const std::vector<MN_VisionImage::MS_ImageParam>& _bufImgs, std::vector<trtUtils::MR_Result>* _detectRes, int* _user)
{
std::vector<cv::Mat> matImgs;
for (auto _var : _bufImgs)
{
cv::Mat image;
bool bRet = buffer2Mat(_var, image);
if (!bRet)
{
LOG_ERROR("doinfer(), convert buffer to Mat failed. \n");
return false;
}
matImgs.emplace_back(image);
}
int iRet = 0;
iRet = this->copyToDevice(matImgs);
if (iRet != 0)
{
LOG_ERROR("doinfer(), copy image data from cpu to device failed. \n");
return false;
}
iRet = this->preProcess(matImgs);
if (iRet != 0)
{
LOG_ERROR("doinfer(), preprocess image failed. \n");
return false;
}
iRet = this->infer();
if (iRet != 0)
{
LOG_ERROR("doinfer(), infer failed. \n");
return false;
}
iRet = this->copyFromDevice(matImgs);
if (iRet != 0)
{
LOG_ERROR("doinfer(), copy image data from device to cpu failed. \n");
return false;
}
iRet = this->postProcess(matImgs);
if (iRet != 0)
{
LOG_ERROR("doinfer(), postprocess image failed. \n");
return false;
}
iRet = this->getDetectResult(*_detectRes);
if (iRet != 0)
{
LOG_ERROR("doinfer(), get detect result failed. \n");
return false;
}
return false;
}
bool MF_Yolov8Infer::doTRTInfer(const std::vector<cv::Mat>& _bufImgs, std::vector<trtUtils::MR_Result>* _detectRes, int* _user)
{
return false;
}
std::string MF_Yolov8Infer::getError()
{
return "";
}
void MF_Yolov8Infer::freeMemeory()
{
checkRuntime(cudaMemset(m_output_objects_device, 0, sizeof(float) * m_param.batch_size * (1 + 7 * m_param.topK)));
for (size_t bi = 0; bi < m_param.batch_size; bi++)
{
m_objectss[bi].clear();
}
}
int MF_Yolov8Infer::copyToDevice(const std::vector<cv::Mat>& _imgsBatch)
{
// update 20230302, faster.
// 1. move uint8_to_float in cuda kernel function. For 8*3*1920*1080, cost time 15ms -> 3.9ms
// 2. Todo
unsigned char* pi = m_input_src_device;
for (size_t i = 0; i < _imgsBatch.size(); i++)
{
checkRuntime(cudaMemcpy(pi, _imgsBatch[i].data, sizeof(unsigned char) * 3 * m_param.mImage.m_height * m_param.mImage.m_width, cudaMemcpyHostToDevice));
pi += 3 * m_param.mImage.m_height * m_param.mImage.m_width;
}
return 0;
}
int MF_Yolov8Infer::preProcess(const std::vector<cv::Mat>& _imgsBatch)
{
resizeDevice(m_param.batch_size, m_input_src_device, m_param.mImage.m_width, m_param.mImage.m_height,
m_input_resize_device, m_param.dst_w, m_param.dst_h, 114, m_dst2src);
bgr2rgbDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h,
m_input_rgb_device, m_param.dst_w, m_param.dst_h);
normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h,
m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param);
hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h,
m_input_hwc_device, m_param.dst_w, m_param.dst_h);
return 0;
}
int MF_Yolov8Infer::infer()
{
float* bindings[] = { m_input_hwc_device, m_output_src_device };
bool bRet = m_context->executeV2((void**)bindings);
if (!bRet)
{
LOG_ERROR("infer failed.\n");
return 1;
}
return 0;
}
int MF_Yolov8Infer::copyFromDevice(const std::vector<cv::Mat>& _imgsBatch)
{
return 0;
}
int MF_Yolov8Infer::postProcess(const std::vector<cv::Mat>& _imgsBatch)
{
decodeDevice(m_param, m_output_src_device, 5 + m_param.num_class, m_total_objects,
m_output_area, m_output_objects_device, m_output_objects_width, m_param.topK);
// nmsv1(nms faster)
nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1);
// nmsv2(nms sort)
//nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device);
checkRuntime(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost));
for (size_t bi = 0; bi < _imgsBatch.size(); bi++)
{
int num_boxes = (std::min)((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK);
for (size_t i = 0; i < num_boxes; i++)
{
float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1;
int keep_flag = ptr[6];
if (keep_flag)
{
float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2;
float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5;
float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2;
float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5;
m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]);
}
}
}
return 0;
}
int MF_Yolov8Infer::getDetectResult(std::vector<trtUtils::MR_Result>& _result)
{
if (_result.size() <= 0)
{
LOG_INFO("get detect result faild. \n");
return 1;
}
return 0;
}