#include "MF_Yolov8Infer.h" #include "ML_Log.h" namespace fs = std::filesystem; MF_Yolov8Infer::MF_Yolov8Infer(const trtUtils::InitParameter& param) : MF_ObjectDetectBase(param) { } MF_Yolov8Infer::~MF_Yolov8Infer() { checkRuntime(cudaFree(m_output_src_transpose_device)); } bool MF_Yolov8Infer::initEngine(const std::string& _onnxFileName) { // 判断传入的onnx文件是否存在 fs::path onnx_path(_onnxFileName); if (!fs::exists(onnx_path)) { LOG_ERROR("init engine, onnx file does not exist. \n"); return false; } // 替换文件扩展名，将.onnx 替换为 .trt，并判断 trt 模型是否已经存在 // 若本地存在trt模型，则直接加载trt模型并构建engine fs::path extension("trt"); fs::path trt_Path = onnx_path.replace_extension(extension); // std::string trtFileName = trt_Path.string(); if (fs::exists(trt_Path)) { LOG_INFO("trt model has existed.\n"); std::vector engine_data; int iRet = loadTRTModelData(trt_Path.string(), engine_data); if (iRet != 0) { LOG_ERROR("load trt model failed.\n"); return false; } auto runtime = std::unique_ptr(nvinfer1::createInferRuntime(sample::gLogger.getTRTLogger())); if (!runtime) { LOG_ERROR("on the init engine, create infer runtime failed.\n"); return false; } m_engine = std::unique_ptr(runtime->deserializeCudaEngine(engine_data.data(), engine_data.size())); if (!m_engine) { LOG_ERROR("on the init engine, deserialize engine failed.\n"); return false; } m_context = std::unique_ptr(m_engine->createExecutionContext()); if (!m_context) { LOG_ERROR("on the init engine, create excution context failed.\n"); return false; } if (m_param.dynamic_batch) { m_context->setBindingDimensions(0, nvinfer1::Dims4(m_param.batch_size, 3, m_param.dst_h, m_param.dst_w)); } m_output_dims = this->m_context->getBindingDimensions(1); m_total_objects = m_output_dims.d[2]; assert(m_param.batch_size <= m_output_dims.d[0]); m_output_area = 1; for (int i = 1; i < m_output_dims.nbDims; i++) { if (m_output_dims.d[i] != 0) { m_output_area *= m_output_dims.d[i]; } } checkRuntime(cudaMalloc(&m_output_src_device, m_param.batch_size * m_output_area * sizeof(float))); checkRuntime(cudaMalloc(&m_output_src_transpose_device, m_param.batch_size * m_output_area * sizeof(float))); float a = float(m_param.dst_h) / m_param.mImage.m_height; float b = float(m_param.dst_w) / m_param.mImage.m_width; float scale = a < b ? a : b; cv::Mat src2dst = (cv::Mat_(2, 3) << scale, 0.f, (-scale * m_param.mImage.m_width + m_param.dst_w + scale - 1) * 0.5, 0.f, scale, (-scale * m_param.mImage.m_height + m_param.dst_h + scale - 1) * 0.5); cv::Mat dst2src = cv::Mat::zeros(2, 3, CV_32FC1); cv::invertAffineTransform(src2dst, dst2src); m_dst2src.v0 = dst2src.ptr(0)[0]; m_dst2src.v1 = dst2src.ptr(0)[1]; m_dst2src.v2 = dst2src.ptr(0)[2]; m_dst2src.v3 = dst2src.ptr(1)[0]; m_dst2src.v4 = dst2src.ptr(1)[1]; m_dst2src.v5 = dst2src.ptr(1)[2]; return true; } return false; } bool MF_Yolov8Infer::doTRTInfer(const std::vector& _bufImgs, std::vector* _detectRes, int* _user) { std::vector matImgs; for (auto _var : _bufImgs) { cv::Mat image; bool bRet = buffer2Mat(_var, image); if (!bRet) { LOG_ERROR("doinfer(), convert buffer to Mat failed. \n"); return false; } matImgs.emplace_back(image); } int iRet = 0; iRet = this->copyToDevice(matImgs); if (iRet != 0) { LOG_ERROR("doinfer(), copy image data from cpu to device failed. \n"); return false; } iRet = this->preProcess(matImgs); if (iRet != 0) { LOG_ERROR("doinfer(), preprocess image failed. \n"); return false; } iRet = this->infer(); if (iRet != 0) { LOG_ERROR("doinfer(), infer failed. \n"); return false; } iRet = this->copyFromDevice(matImgs); if (iRet != 0) { LOG_ERROR("doinfer(), copy image data from device to cpu failed. \n"); return false; } iRet = this->postProcess(matImgs); if (iRet != 0) { LOG_ERROR("doinfer(), postprocess image failed. \n"); return false; } iRet = this->getDetectResult(*_detectRes); if (iRet != 0) { LOG_ERROR("doinfer(), get detect result failed. \n"); return false; } return false; } bool MF_Yolov8Infer::doTRTInfer(const std::vector& _bufImgs, std::vector* _detectRes, int* _user) { return false; } std::string MF_Yolov8Infer::getError() { return ""; } void MF_Yolov8Infer::freeMemeory() { checkRuntime(cudaMemset(m_output_objects_device, 0, sizeof(float) * m_param.batch_size * (1 + 7 * m_param.topK))); for (size_t bi = 0; bi < m_param.batch_size; bi++) { m_objectss[bi].clear(); } } int MF_Yolov8Infer::copyToDevice(const std::vector& _imgsBatch) { // update 20230302, faster. // 1. move uint8_to_float in cuda kernel function. For 8*3*1920*1080, cost time 15ms -> 3.9ms // 2. Todo unsigned char* pi = m_input_src_device; for (size_t i = 0; i < _imgsBatch.size(); i++) { checkRuntime(cudaMemcpy(pi, _imgsBatch[i].data, sizeof(unsigned char) * 3 * m_param.mImage.m_height * m_param.mImage.m_width, cudaMemcpyHostToDevice)); pi += 3 * m_param.mImage.m_height * m_param.mImage.m_width; } return 0; } int MF_Yolov8Infer::preProcess(const std::vector& _imgsBatch) { resizeDevice(m_param.batch_size, m_input_src_device, m_param.mImage.m_width, m_param.mImage.m_height, m_input_resize_device, m_param.dst_w, m_param.dst_h, 114, m_dst2src); bgr2rgbDevice(m_param.batch_size, m_input_resize_device, m_param.dst_w, m_param.dst_h, m_input_rgb_device, m_param.dst_w, m_param.dst_h); normDevice(m_param.batch_size, m_input_rgb_device, m_param.dst_w, m_param.dst_h, m_input_norm_device, m_param.dst_w, m_param.dst_h, m_param); hwc2chwDevice(m_param.batch_size, m_input_norm_device, m_param.dst_w, m_param.dst_h, m_input_hwc_device, m_param.dst_w, m_param.dst_h); return 0; } int MF_Yolov8Infer::infer() { float* bindings[] = { m_input_hwc_device, m_output_src_device }; bool bRet = m_context->executeV2((void**)bindings); if (!bRet) { LOG_ERROR("infer failed.\n"); return 1; } return 0; } int MF_Yolov8Infer::copyFromDevice(const std::vector& _imgsBatch) { return 0; } int MF_Yolov8Infer::postProcess(const std::vector& _imgsBatch) { decodeDevice(m_param, m_output_src_device, 5 + m_param.num_class, m_total_objects, m_output_area, m_output_objects_device, m_output_objects_width, m_param.topK); // nmsv1(nms faster) nmsDeviceV1(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1); // nmsv2(nms sort) //nmsDeviceV2(m_param, m_output_objects_device, m_output_objects_width, m_param.topK, m_param.topK * m_output_objects_width + 1, m_output_idx_device, m_output_conf_device); checkRuntime(cudaMemcpy(m_output_objects_host, m_output_objects_device, m_param.batch_size * sizeof(float) * (1 + 7 * m_param.topK), cudaMemcpyDeviceToHost)); for (size_t bi = 0; bi < _imgsBatch.size(); bi++) { int num_boxes = (std::min)((int)(m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1))[0], m_param.topK); for (size_t i = 0; i < num_boxes; i++) { float* ptr = m_output_objects_host + bi * (m_param.topK * m_output_objects_width + 1) + m_output_objects_width * i + 1; int keep_flag = ptr[6]; if (keep_flag) { float x_lt = m_dst2src.v0 * ptr[0] + m_dst2src.v1 * ptr[1] + m_dst2src.v2; float y_lt = m_dst2src.v3 * ptr[0] + m_dst2src.v4 * ptr[1] + m_dst2src.v5; float x_rb = m_dst2src.v0 * ptr[2] + m_dst2src.v1 * ptr[3] + m_dst2src.v2; float y_rb = m_dst2src.v3 * ptr[2] + m_dst2src.v4 * ptr[3] + m_dst2src.v5; m_objectss[bi].emplace_back(x_lt, y_lt, x_rb, y_rb, ptr[4], (int)ptr[5]); } } } return 0; } int MF_Yolov8Infer::getDetectResult(std::vector& _result) { if (_result.size() <= 0) { LOG_INFO("get detect result faild. \n"); return 1; } return 0; }