|
|
@ -11,7 +11,7 @@ bool __check_cuda_runtime(cudaError_t code, const char* op, const char* file, in
|
|
|
|
return true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__device__ void affine_project_device_kernel(utils::AffineMat* matrix, int x, int y, float* proj_x, float* proj_y)
|
|
|
|
__device__ void affine_project_device_kernel(trtUtils::AffineMat* matrix, int x, int y, float* proj_x, float* proj_y)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
*proj_x = matrix->v0 * x + matrix->v1 * y + matrix->v2;
|
|
|
|
*proj_x = matrix->v0 * x + matrix->v1 * y + matrix->v2;
|
|
|
|
*proj_y = matrix->v3 * x + matrix->v4 * y + matrix->v5;
|
|
|
|
*proj_y = matrix->v3 * x + matrix->v4 * y + matrix->v5;
|
|
|
@ -19,7 +19,7 @@ __device__ void affine_project_device_kernel(utils::AffineMat* matrix, int x, in
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void resize_rgb_padding_device_kernel(float* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
__global__ void resize_rgb_padding_device_kernel(float* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
int batch_size, float padding_value, utils::AffineMat matrix)
|
|
|
|
int batch_size, float padding_value, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
@ -81,7 +81,7 @@ __global__ void resize_rgb_padding_device_kernel(float* src, int src_width, int
|
|
|
|
|
|
|
|
|
|
|
|
__global__ void resize_rgb_padding_device_kernel(unsigned char* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
__global__ void resize_rgb_padding_device_kernel(unsigned char* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
int batch_size, float padding_value, utils::AffineMat matrix)
|
|
|
|
int batch_size, float padding_value, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
@ -143,7 +143,7 @@ __global__ void resize_rgb_padding_device_kernel(unsigned char* src, int src_wid
|
|
|
|
}
|
|
|
|
}
|
|
|
|
__global__ void resize_rgb_without_padding_device_kernel(float* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
__global__ void resize_rgb_without_padding_device_kernel(float* src, int src_width, int src_height, int src_area, int src_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area, int dst_volume,
|
|
|
|
int batch_size, utils::AffineMat matrix)
|
|
|
|
int batch_size, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
@ -207,7 +207,7 @@ __global__ void resize_rgb_without_padding_device_kernel(float* src, int src_wid
|
|
|
|
__global__ void resize_gray_without_padding_device_kernel(
|
|
|
|
__global__ void resize_gray_without_padding_device_kernel(
|
|
|
|
float* src, int src_width, int src_height, int src_area,
|
|
|
|
float* src, int src_width, int src_height, int src_area,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area,
|
|
|
|
float* dst, int dst_width, int dst_height, int dst_area,
|
|
|
|
int batch_size, utils::AffineMat matrix)
|
|
|
|
int batch_size, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dx = blockDim.x * blockIdx.x + threadIdx.x;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
|
int dy = blockDim.y * blockIdx.y + threadIdx.y;
|
|
|
@ -344,7 +344,7 @@ __global__ void hwc2chw_device_kernel(float* src, float* dst,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
float* dst, int dstWidth, int dstHeight, float paddingValue, utils::AffineMat matrix)
|
|
|
|
float* dst, int dstWidth, int dstHeight, float paddingValue, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
|
|
@ -363,7 +363,7 @@ void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void resizeDevice(const int& batchSize, unsigned char* src, int srcWidth, int srcHeight,
|
|
|
|
void resizeDevice(const int& batchSize, unsigned char* src, int srcWidth, int srcHeight,
|
|
|
|
float* dst, int dstWidth, int dstHeight, float paddingValue, utils::AffineMat matrix)
|
|
|
|
float* dst, int dstWidth, int dstHeight, float paddingValue, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
@ -380,7 +380,7 @@ void resizeDevice(const int& batchSize, unsigned char* src, int srcWidth, int sr
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
float* dst, int dstWidth, int dstHeight, utils::ColorMode mode, utils::AffineMat matrix)
|
|
|
|
float* dst, int dstWidth, int dstHeight, trtUtils::ColorMode mode, trtUtils::AffineMat matrix)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight + BLOCK_SIZE - 1) / BLOCK_SIZE, (batchSize + BLOCK_SIZE - 1) / BLOCK_SIZE);
|
|
|
@ -392,13 +392,13 @@ void resizeDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
|
|
|
|
|
|
|
|
switch (mode)
|
|
|
|
switch (mode)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
case utils::ColorMode::RGB:
|
|
|
|
case trtUtils::ColorMode::RGB:
|
|
|
|
resize_rgb_without_padding_device_kernel << < grid_size, block_size, 0, nullptr >> > (
|
|
|
|
resize_rgb_without_padding_device_kernel << < grid_size, block_size, 0, nullptr >> > (
|
|
|
|
src, srcWidth, srcHeight, src_area, src_volume,
|
|
|
|
src, srcWidth, srcHeight, src_area, src_volume,
|
|
|
|
dst, dstWidth, dstHeight, dst_area, dst_volume,
|
|
|
|
dst, dstWidth, dstHeight, dst_area, dst_volume,
|
|
|
|
batchSize, matrix);
|
|
|
|
batchSize, matrix);
|
|
|
|
return;
|
|
|
|
return;
|
|
|
|
case utils::ColorMode::GRAY:
|
|
|
|
case trtUtils::ColorMode::GRAY:
|
|
|
|
resize_gray_without_padding_device_kernel << < grid_size, block_size, 0, nullptr >> > (
|
|
|
|
resize_gray_without_padding_device_kernel << < grid_size, block_size, 0, nullptr >> > (
|
|
|
|
src, srcWidth, srcHeight, src_area,
|
|
|
|
src, srcWidth, srcHeight, src_area,
|
|
|
|
dst, dstWidth, dstHeight, dst_area, batchSize, matrix);
|
|
|
|
dst, dstWidth, dstHeight, dst_area, batchSize, matrix);
|
|
|
@ -423,7 +423,7 @@ void bgr2rgbDevice(const int& batchSize, float* src,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void normDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
void normDevice(const int& batchSize, float* src, int srcWidth, int srcHeight,
|
|
|
|
float* dst, int dstWidth, int dstHeight, utils::InitParameter param)
|
|
|
|
float* dst, int dstWidth, int dstHeight, trtUtils::InitParameter param)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((dstWidth * dstHeight * 3 + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
|
dim3 grid_size((dstWidth * dstHeight * 3 + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
@ -622,7 +622,7 @@ __global__ void nms_sort_kernel(int topK, int batch_size, float iou_thresh,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
|
|
|
|
void decodeDevice(trtUtils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea, float* dst, int dstWidth, int dstHeight)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
|
dim3 grid_size((srcHeight + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
@ -636,7 +636,7 @@ void decodeDevice(utils::InitParameter param, float* src, int srcWidth, int srcH
|
|
|
|
dst, dstWidth, dstHeight, dstArea);
|
|
|
|
dst, dstWidth, dstHeight, dstArea);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea)
|
|
|
|
void nmsDeviceV1(trtUtils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 grid_size((param.topK + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
|
dim3 grid_size((param.topK + BLOCK_SIZE - 1) / BLOCK_SIZE,
|
|
|
@ -647,7 +647,7 @@ void nmsDeviceV1(utils::InitParameter param, float* src, int srcWidth, int srcHe
|
|
|
|
src, srcWidth, srcHeight, srcArea);
|
|
|
|
src, srcWidth, srcHeight, srcArea);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void nmsDeviceV2(utils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
|
|
|
|
void nmsDeviceV2(trtUtils::InitParameter param, float* src, int srcWidth, int srcHeight, int srcArea,
|
|
|
|
int* idx, float* conf)
|
|
|
|
int* idx, float* conf)
|
|
|
|
{
|
|
|
|
{
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|
dim3 block_size(BLOCK_SIZE, BLOCK_SIZE);
|
|
|
|