前视拼接cuda算子实现

main
wangchongwu 5 months ago
parent 7a9c689e89
commit f389ec6211

@ -1,8 +1,10 @@
cmake_minimum_required(VERSION 3.15.0)
project(stitch VERSION 0.1.0 LANGUAGES C CXX)
project(stitch VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 17)
SET(ArithStitchDir stitch)
IF(WIN32)

@ -5,6 +5,9 @@ set(build_time ${COMPILE_TIME})
#
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/Version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/Version.h)
enable_language(CUDA)
if(MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
add_compile_options(/wd4996)
@ -35,14 +38,18 @@ find_package(CUDA REQUIRED)
set(CMAKE_CUDA_STANDARD 11)
include_directories(${CUDA_INCLUDE_DIRS})
# cuda
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/MD")
# CUDA
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -D__CUDA_NO_HALF_SUPPORT")
set(ArithSrcDIR_MAIN "src") #
# 使Common
file(GLOB libsrcs ${ArithSrcDIR_MAIN}/*.cpp ${ArithSrcDIR_MAIN}/*.c ${ArithSrcDIR_MAIN}/*.h ${ArithSrcDIR_MAIN}/*.hpp)
file(GLOB libsrcs ${ArithSrcDIR_MAIN}/mapKernel.cu ${ArithSrcDIR_MAIN}/*.cpp ${ArithSrcDIR_MAIN}/*.c ${ArithSrcDIR_MAIN}/*.h ${ArithSrcDIR_MAIN}/*.hpp)
file(GLOB CommonSrc ${ArithSrcDIR_MAIN}/utils/*.cpp ${ArithSrcDIR_MAIN}/utils/*.c ${ArithSrcDIR_MAIN}/utils/*.h ${ArithSrcDIR_MAIN}/utils/*.hpp)
add_library(${LIB_STITCH} SHARED ${libsrcs} ${CommonSrc}) #
cuda_add_library(${LIB_STITCH} SHARED ${libsrcs} ${CommonSrc}) #
#
target_include_directories(${LIB_STITCH} PUBLIC
@ -56,6 +63,7 @@ target_link_libraries(${LIB_STITCH}
OpenMP::OpenMP_CXX
${OpenCV_LIBS}
${CERES_LIBRARIES}
${CUDA_LIBRARIES}
)
# # # gcc0

@ -0,0 +1,61 @@
#include "Arith_FrontProj.h"
#include "Arith_Utils.h"
#include "StitchStruct.h"
#include "Arith_GeoSolver.h"
Pole getPoleFromImgWithH(cv::Mat H, cv::Point2f pt, float dep)
{
// 投影到地面坐标,这一步可以利用前面优化成果
cv::Point2f grdPt = warpPointWithH(H, pt);
// 补深度信息并转为常用的NUE坐标系
PointXYZ grdPtXYZ = { 0 };
grdPtXYZ.X = grdPt.y;
grdPtXYZ.Y = -dep;
grdPtXYZ.Z = grdPt.x;
// 地面点转极坐标
Pole pole = getPoleFromXYZ(grdPtXYZ);
return pole;
}
cv::Point2f getImgPosFromPole(cv::Mat H_inv, Pole _pole, float dep)
{
PointXYZ virPt = getXYZFromPole(_pole);
//虚拟点投影到地面
float ratio = -dep / virPt.Y;
PointXYZ realPt = { 0 };
realPt.X = virPt.X * ratio;
realPt.Y = virPt.Y * ratio;
realPt.Z = virPt.Z * ratio;
// 转东北地
PointXYZ realPtGeo = { 0 };
realPtGeo.X = realPt.Z;
realPtGeo.Y = realPt.X;
realPtGeo.Z = -realPt.Y;
// 投影回像方
cv::Point2f px = warpPointWithH(H_inv, cv::Point2f(realPtGeo.X, realPtGeo.Y));
return px;
}
Pole getPoleFromFPan(cv::Point2f pt, FPanInfo _panPara)
{
Pole _pole = { 0 };
_pole.beta = _panPara.center.fAz + (pt.x - _panPara.m_pan_width / 2) * _panPara.fAglRes;
_pole.alpha = _panPara.center.fPt + (_panPara.m_pan_height / 2 - pt.y) * _panPara.fAglRes;
return _pole;
}
cv::Point2f getFPanFromPole(Pole _pole, FPanInfo _panPara)
{
cv::Point2f pt = { 0 };
pt.x = DEGLIM(_pole.beta - _panPara.center.fAz) / _panPara.fAglRes + _panPara.m_pan_width / 2;
pt.y = DEGLIM(_panPara.center.fPt - _pole.alpha) / _panPara.fAglRes + _panPara.m_pan_height / 2;
return pt;
}

@ -0,0 +1,11 @@
#include "Arith_CoordModule.h"
#include "Arith_SysStruct.h"
#include "opencv2/opencv.hpp"
#include "StitchStruct.h"
// 前视扫描投影变换模型
Pole getPoleFromImgWithH(cv::Mat H, cv::Point2f pt, float dep);
cv::Point2f getImgPosFromPole(cv::Mat H_inv, Pole _pole, float dep);
Pole getPoleFromFPan(cv::Point2f pt, FPanInfo _panPara);
cv::Point2f getFPanFromPole(Pole _pole, FPanInfo _panPara);

@ -3,9 +3,10 @@
#include "Arith_Utils.h"
#include "Arith_CoordModule.h"
#include "Arith_FeaMatch.h"
#include "Arith_FrontProj.h"
#include <opencv2/opencv.hpp>
#include <omp.h>
#include <cuda_runtime.h>
using namespace std;
using namespace cv;
@ -34,8 +35,6 @@ FrontStitch::~FrontStitch()
FPanInfo FrontStitch::Init(FrameInfo info, float AzRange, float PtRange)
{
_panPara.fAglRes = 0.02;
@ -81,10 +80,10 @@ BYTE8 FrontStitch::GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo info)
Pole leftBottom_map = getPoleFromImgWithH(H, cv::Point2f(0,img.u32Height), info.nEvHeight);
// 转全景图的像方
cv::Point2f p_leftTop_map = getFPanFromPole(leftTop_map);
cv::Point2f p_rightTop_map = getFPanFromPole(rightTop_map);
cv::Point2f p_rightBottom_map = getFPanFromPole(rightBottom_map);
cv::Point2f p_leftBottom_map = getFPanFromPole(leftBottom_map);
cv::Point2f p_leftTop_map = getFPanFromPole(leftTop_map,_panPara);
cv::Point2f p_rightTop_map = getFPanFromPole(rightTop_map, _panPara);
cv::Point2f p_rightBottom_map = getFPanFromPole(rightBottom_map, _panPara);
cv::Point2f p_leftBottom_map = getFPanFromPole(leftBottom_map, _panPara);
// 计算全景图上范围
int right = max(max(max(p_leftTop_map.x, p_rightTop_map.x), p_rightBottom_map.x), p_leftBottom_map.x);
@ -96,30 +95,24 @@ BYTE8 FrontStitch::GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo info)
int yRnage = bottom - top;
//反映射到像素坐标
int valid_top = std::max(0, top);
int valid_bottom = std::min(_panImage.cols, bottom);
int valid_left = std::max(0, left);
int valid_right = std::min(_panImage.rows, right);
#if 1
#pragma omp parallel for
for (int i = valid_top; i < valid_bottom; i++)
{
for (int j = valid_left; j < valid_right; j++)
{
////转换为pixel坐标
cv::Point2f p_img = getImgPosFromPole(H, getPoleFromFPan(Point2f(j, i)), dep);
if (p_img.x >= 200 && p_img.y >= 200 && p_img.x < img.u32Width-200 && p_img.y < img.u32Height-200)
{
auto rgbVal = Interpolation_RGB24(rgb.data, rgb.cols, rgb.rows, p_img.x, p_img.y);
_panImage.data[3 * (i * _panImage.rows + j) + 0] = rgbVal.R;
_panImage.data[3 * (i * _panImage.rows + j) + 1] = rgbVal.G;
_panImage.data[3 * (i * _panImage.rows + j) + 2] = rgbVal.B;
}
}
}
#endif
MINMAXRECT32S RANGES = { 0 };
RANGES.minY = std::max(0, top);
RANGES.maxY = std::min(_panImage.cols, bottom);
RANGES.minX = std::max(0, left);
RANGES.maxX = std::min(_panImage.rows, right);
cv::Rect2d roi(RANGES.minX, RANGES.minY, RANGES.maxX - RANGES.minX, RANGES.maxY- RANGES.minY);
cv::Mat H_inv = H.inv();
//UpdatePan_CPU(rgb, _panImage, roi, H_inv, _panPara, dep);
UpdatePan_CUDA(rgb, _panImage, roi, H_inv, _panPara, dep);
return 0;
}
@ -159,60 +152,27 @@ GD_VIDEO_FRAME_S FrontStitch::ExportPanAddr()
return pan_out;
}
Pole FrontStitch::getPoleFromImgWithH(cv::Mat H, cv::Point2f pt, float dep)
{
// 投影到地面坐标,这一步可以利用前面优化成果
cv::Point2f grdPt = warpPointWithH(H, pt);
// 补深度信息并转为常用的NUE坐标系
PointXYZ grdPtXYZ = { 0 };
grdPtXYZ.X = grdPt.y;
grdPtXYZ.Y = -dep;
grdPtXYZ.Z = grdPt.x;
// 地面点转极坐标
Pole pole = getPoleFromXYZ(grdPtXYZ);
return pole;
}
cv::Point2f FrontStitch::getImgPosFromPole(cv::Mat H, Pole _pole, float dep)
void UpdatePan_CPU(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat H_inv, FPanInfo _panPara, float dep)
{
PointXYZ virPt = getXYZFromPole(_pole);
//虚拟点投影到地面
float ratio = -dep / virPt.Y;
PointXYZ realPt = { 0 };
realPt.X = virPt.X * ratio;
realPt.Y = virPt.Y * ratio;
realPt.Z = virPt.Z * ratio;
// 转东北地
PointXYZ realPtGeo = { 0 };
realPtGeo.X = realPt.Z;
realPtGeo.Y = realPt.X;
realPtGeo.Z = -realPt.Y;
// 投影回像方
cv::Point2f px = warpPointWithH(H.inv(), cv::Point2f(realPtGeo.X, realPtGeo.Y));
return px;
cv::Mat subImg = pan(roi);
// 遍历整个子图
#pragma omp parallel for
for (int i = 0; i < (int)roi.height; i++)
{
for (int j = 0; j < (int)roi.width; j++)
{
cv::Point2f p_img = getImgPosFromPole(H_inv, getPoleFromFPan(Point2f(j + roi.x, i + roi.y), _panPara), dep);
if (p_img.x >= 200 && p_img.y >= 200 && p_img.x < rgbFrame.cols - 200 && p_img.y < rgbFrame.rows - 200)
{
auto rgbVal = Interpolation_RGB24(rgbFrame.data, rgbFrame.cols, rgbFrame.rows, p_img.x, p_img.y);
uchar* row = subImg.ptr<uchar>(i);
row[3 * j + 0] = rgbVal.R;
row[3 * j + 1] = rgbVal.G;
row[3 * j + 2] = rgbVal.B;
}
}
}
}
Pole FrontStitch::getPoleFromFPan(cv::Point2f pt)
{
Pole _pole = { 0 };
_pole.beta = _panPara.center.fAz + (pt.x - _panPara.m_pan_width / 2) * _panPara.fAglRes;
_pole.alpha = _panPara.center.fPt + (_panPara.m_pan_height / 2 - pt.y) * _panPara.fAglRes;
return _pole;
}
cv::Point2f FrontStitch::getFPanFromPole(Pole _pole)
{
cv::Point2f pt = { 0 };
pt.x = DEGLIM(_pole.beta - _panPara.center.fAz) / _panPara.fAglRes + _panPara.m_pan_width / 2;
pt.y = DEGLIM(_panPara.center.fPt - _pole.alpha) / _panPara.fAglRes + _panPara.m_pan_height / 2;
return pt;
}

@ -14,6 +14,8 @@
#define _FRONTSTITCH_H
#include "Arith_GeoSolver.h"
#include <cuda_runtime.h>
#include "opencv2/opencv.hpp"
class FrontStitch:public API_FrontStitch
{
@ -37,20 +39,9 @@ public:
GD_VIDEO_FRAME_S ExportPanAddr();
private:
GeoSolver* _GeoSolver;
// 将像方投影为原点极坐标利用H矩阵. 需要深度。因为H丢失了深度
Pole getPoleFromImgWithH(cv::Mat H, cv::Point2f pt, float dep);
// 将原点极坐标转为像方
cv::Point2f getImgPosFromPole(cv::Mat H, Pole _pole, float dep);
// 全景图像方到原点极坐标
Pole getPoleFromFPan(cv::Point2f pt);
// 原点极坐标到全景图像
cv::Point2f getFPanFromPole(Pole _pole);
GeoSolver* _GeoSolver;
private:
FPanInfo _panPara;//全景配置
@ -59,5 +50,11 @@ private:
};
void UpdatePan_CPU(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep);
void UpdatePan_CUDA(cv::Mat rgbaFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep);
#endif

@ -306,6 +306,8 @@ cv::Point2f warpPointWithH(cv::Mat H, cv::Point2f srcPt)
return cv::Point2f(warpedX,warpedY);
}
vector<cv::Point2f> warpRectWithH(cv::Mat H,cv::Size size)
{
vector<cv::Point2f> _res;

@ -94,6 +94,7 @@ double computeQuadrilateralIOU(const vector<cv::Point2f>& quad1, const vector<cv
// H映射点
cv::Point2f warpPointWithH(cv::Mat H,cv::Point2f srcPt);
// H映射多边形按照顺时针
vector<cv::Point2f> warpRectWithH(cv::Mat H,cv::Size size);

@ -0,0 +1,272 @@
#include <cuda_runtime.h>
#include "Arith_FrontProj.h"
#include "Arith_Utils.h"
#include "Arith_SysStruct.h"
__device__ Pole device_getPoleFromFPan(POINT32S pt, FPanInfo _panPara)
{
Pole _pole = { 0 };
_pole.beta = _panPara.center.fAz + (pt.x - _panPara.m_pan_width / 2) * _panPara.fAglRes;
_pole.alpha = _panPara.center.fPt + (_panPara.m_pan_height / 2 - pt.y) * _panPara.fAglRes;
return _pole;
}
__device__ PointXYZ device_getXYZFromPole(Pole targetPole)
{
// 距离未知时,异原点坐标系之间不能转换
// 但由于同车坐标系的原点差一般远小于目标距离,故距离较大时误差较小,误差量级可以用线偏差/距离获得的弧度进行估计
// 如线偏差1米目标3000米则转换误差为(1/1000-1/3000)约为0.04°
// 吊舱无源定位允许最低高度10米
if (targetPole.distance < 10)
{
targetPole.distance = 10000;
}
PointXYZ result;
double aa, bb, cc;
//double ac = sqrt(targetPole.distance*targetPole.distance-bb*bb);
bb = sin(RADIAN(targetPole.alpha)) * targetPole.distance;
double ac = sqrt(targetPole.distance * targetPole.distance - bb * bb);
aa = cos(RADIAN(targetPole.beta)) * ac;
cc = sin(RADIAN(targetPole.beta)) * ac;
result.X = aa;
result.Y = bb;
result.Z = cc;
return result;
}
__device__ POINT32S device_getImgPosFromPole(double* H_inv, Pole _pole, float dep)
{
PointXYZ virPt = device_getXYZFromPole(_pole);
//虚拟点投影到地面
float ratio = -dep / virPt.Y;
PointXYZ realPt = { 0 };
realPt.X = virPt.X * ratio;
realPt.Y = virPt.Y * ratio;
realPt.Z = virPt.Z * ratio;
// 转东北地
PointXYZ realPtGeo = { 0 };
realPtGeo.X = realPt.Z;
realPtGeo.Y = realPt.X;
realPtGeo.Z = -realPt.Y;
// 投影回像方
POINT32S dstP = { 0 };
dstP.x = (H_inv[0] * realPtGeo.X + H_inv[1] * realPtGeo.Y + H_inv[2]) / (H_inv[6] * realPtGeo.X + H_inv[7] * realPtGeo.Y + H_inv[8]);
dstP.y = (H_inv[3] * realPtGeo.X + H_inv[4] * realPtGeo.Y + H_inv[5]) / (H_inv[6] * realPtGeo.X + H_inv[7] * realPtGeo.Y + H_inv[8]);
return dstP;
}
__device__ PXL_VAL device_Interpolation_RGB24(UBYTE8* pSrcImage, int iWidth, int iHeight, float x, float y)
{
PXL_VAL VAL = { 0 };
int nStride = iWidth * 3;
// 四个最临近象素的坐标(i1, j1), (i2, j1), (i1, j2), (i2, j2)
int i1, i2;
int j1, j2;
// 四个最临近象素值
UBYTE8 f1[3], f2[3], f3[3], f4[3];
// 计算四个最临近象素的坐标
i1 = MAX((int)x, 0);
i2 = MIN(i1 + 1, iWidth - 1);
j1 = MAX((int)y, 0);
j2 = MIN(j1 + 1, iHeight - 1);
float t, s; //t = x - [x], s = y- [y];
t = x - i1;
s = y - j1;
//四个最临近象素点的值的加权值
float uv0, uv1, uv2, uv3; //权重之和为1uv0+uv1+uv2+uv3 = 1
uv0 = (1 - s) * (1 - t);
uv1 = (1 - s) * t;
uv2 = (1 - t) * s;
uv3 = s * t;
//根据不同情况分别处理
if (x - iWidth + 1 > 0)
{
// 要计算的点在图像右边缘上
if (y - iHeight + 1 < 0)
{
// 要计算的点正好是图像最右/左下角那一个象素,直接返回该点象素值
VAL.R = *(pSrcImage + nStride * j1 + i1 * 3);
VAL.G = *(pSrcImage + nStride * j1 + i1 * 3 + 1);
VAL.B = *(pSrcImage + nStride * j1 + i1 * 3 + 2);
return VAL;
}
else
{
// 在图像右/左边缘上且不是最后一点,直接一次插值即可
f1[0] = *(pSrcImage + nStride * j1 + i1 * 3);
f1[1] = *(pSrcImage + nStride * j1 + i1 * 3 + 1);
f1[2] = *(pSrcImage + nStride * j1 + i1 * 3 + 2);
f3[0] = *(pSrcImage + nStride * j2 + i1 * 3 + 0);
f3[1] = *(pSrcImage + nStride * j2 + i1 * 3 + 1);
f3[2] = *(pSrcImage + nStride * j2 + i1 * 3 + 2);
VAL.R = f1[0] + s * (f3[0] - f1[0]);
VAL.G = f1[1] + s * (f3[1] - f1[1]);
VAL.B = f1[2] + s * (f3[2] - f1[2]);
// 返回插值结果
return VAL;
}
}
else if (y - iHeight + 1 > 0)
{
// 要计算的点在图像下边缘上且不是最后一点,直接一次插值即可
//f1 = *(pSrcImage + iWidth * j1 + i1);
//f2 = *(pSrcImage + iWidth * j1 + i2);
f1[0] = *(pSrcImage + nStride * j1 + i1 * 3);
f1[1] = *(pSrcImage + nStride * j1 + i1 * 3 + 1);
f1[2] = *(pSrcImage + nStride * j1 + i1 * 3 + 2);
f2[0] = *(pSrcImage + nStride * j1 + i2 * 3 + 0);
f2[1] = *(pSrcImage + nStride * j1 + i2 * 3 + 1);
f2[2] = *(pSrcImage + nStride * j1 + i2 * 3 + 2);
VAL.R = f1[0] + t * (f2[0] - f1[0]);
VAL.G = f1[1] + t * (f2[1] - f1[1]);
VAL.B = f1[2] + t * (f2[2] - f1[2]);
// 返回插值结果
return VAL;
}
else
{
// 计算四个最临近象素值
f1[0] = *(pSrcImage + nStride * j1 + i1 * 3);
f1[1] = *(pSrcImage + nStride * j1 + i1 * 3 + 1);
f1[2] = *(pSrcImage + nStride * j1 + i1 * 3 + 2);
f2[0] = *(pSrcImage + nStride * j1 + i2 * 3);
f2[1] = *(pSrcImage + nStride * j1 + i2 * 3 + 1);
f2[2] = *(pSrcImage + nStride * j1 + i2 * 3 + 2);
f3[0] = *(pSrcImage + nStride * j2 + i1 * 3);
f3[1] = *(pSrcImage + nStride * j2 + i1 * 3 + 1);
f3[2] = *(pSrcImage + nStride * j2 + i1 * 3 + 2);
f4[0] = *(pSrcImage + nStride * j2 + i2 * 3);
f4[1] = *(pSrcImage + nStride * j2 + i2 * 3 + 1);
f4[2] = *(pSrcImage + nStride * j2 + i2 * 3 + 2);
VAL.R = uv0 * f1[0] + uv1 * f2[0] + uv2 * f3[0] + uv3 * f4[0];
VAL.G = uv0 * f1[1] + uv1 * f2[1] + uv2 * f3[1] + uv3 * f4[1];
VAL.B = uv0 * f1[2] + uv1 * f2[2] + uv2 * f3[2] + uv3 * f4[2];
return VAL;
}
return VAL;
}
__global__ void projKernel(unsigned char* framePtr, SIZE32S size, unsigned char* panSubPtr,
RECT32S roi, double* h_inv, FPanInfo _panPara, float dep, unsigned char* mask)
{
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
POINT32S p_img = device_getImgPosFromPole(h_inv, device_getPoleFromFPan(POINT32S{ j + roi.x, i + roi.y }, _panPara), dep);
if (p_img.x >= 0 && p_img.y >= 0 && p_img.x < size.w && p_img.y < size.h)
{
auto rgbVal = device_Interpolation_RGB24(framePtr, size.w, size.h, p_img.x, p_img.y);
panSubPtr[(i * roi.w + j) * 3 + 0] = rgbVal.R;
panSubPtr[(i * roi.w + j) * 3 + 1] = rgbVal.G;
panSubPtr[(i * roi.w + j) * 3 + 2] = rgbVal.B;
mask[i * roi.w + j] = 255;
}
}
void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep)
{
unsigned char* cuda_Frame, * cuda_Pan, *cuda_pan_mask;
double* cuda_H_inv_data;
int pan_sub_w = roi.width;
int pan_sub_h = roi.height;
cudaMalloc((void**)&cuda_Frame, rgbFrame.cols * rgbFrame.rows * 3);//rgb
cudaMalloc((void**)&cuda_Pan, pan_sub_w * pan_sub_h * 3);//rgb
cudaMalloc((void**)&cuda_H_inv_data, 9 * sizeof(double));//
cudaMalloc((void**)&cuda_pan_mask, pan_sub_w * pan_sub_h * 1);//gray
// 拷贝帧数据到设备
cudaMemcpy(cuda_Frame, rgbFrame.data, rgbFrame.cols * rgbFrame.rows * 3, cudaMemcpyHostToDevice);
cudaMemcpy(cuda_H_inv_data, (double*)h_inv.data,9 * sizeof(double), cudaMemcpyHostToDevice);
// 定义线程块和网格大小
dim3 blockSize(32, 32);
dim3 gridSize((pan_sub_w + blockSize.x - 1) / blockSize.x, (pan_sub_h + blockSize.y - 1) / blockSize.y);
SIZE32S FrameSize = { rgbFrame.cols, rgbFrame.rows };
RECT32S RoiRect = { roi.x,roi.y,roi.width,roi.height };
cudaMemset(cuda_Pan, 0, roi.width * roi.height * 3);
cudaMemset(cuda_pan_mask, 0, roi.width * roi.height);
// 启动CUDA核函数
projKernel << <gridSize, blockSize >> > (cuda_Frame, FrameSize, cuda_Pan, RoiRect, cuda_H_inv_data, _panPara, dep, cuda_pan_mask);
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
printf("CUDA error: %s\n", cudaGetErrorString(err));
}
//cudaDeviceSynchronize();
// 拷贝结果回主机
cv::Mat pan_sub = pan(roi);
cv::Mat pan_sub_clone = cv::Mat::zeros(pan_sub.size(), CV_8UC3);
cv::Mat pan_sub_mask = cv::Mat::zeros(pan_sub.size(), CV_8UC1);
cudaMemcpy(pan_sub_clone.data, cuda_Pan, pan_sub_w * pan_sub_h * 3, cudaMemcpyDeviceToHost);
cudaMemcpy(pan_sub_mask.data, cuda_pan_mask, pan_sub_w * pan_sub_h * 1, cudaMemcpyDeviceToHost);
pan_sub_clone.copyTo(pan_sub, pan_sub_mask);
/* cv::Mat dst;
cv::resize(pan_sub_clone, dst, cv::Size(pan_sub_w / 4, pan_sub_h / 4));
imshow("pan_sub_clone", dst);
cv::waitKey(1);*/
// 释放设备内存
cudaFree(cuda_Frame);
cudaFree(cuda_Pan);
cudaFree(cuda_H_inv_data);
cudaFree(cuda_pan_mask);
}

@ -1,229 +0,0 @@
#include "FileCache.h"
#include <fstream>
#include <sstream>
#include <iomanip>
#include <stdexcept>
#include "StitchStruct.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#endif
#include "openssl/sha.h"
namespace fs = std::filesystem;
FileCache::FileCache(const std::string& cache_dir, size_t max_memory_items)
: cache_dir_(cache_dir), max_memory_items_(max_memory_items)
{
fs::create_directories(cache_dir_);
}
FileCache::~FileCache()
{
// Clean up memory-mapped files
for (auto& [key, mapped_file] : mapped_files_)
{
unmap_file(&mapped_file, sizeof(FrameCache));
}
}
std::string FileCache::hash_key(const KeyType& key) const
{
// Convert key to a string representation
std::string key_str = std::to_string(key);
// Compute SHA-256 hash
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX sha256;
SHA256_Init(&sha256);
SHA256_Update(&sha256, key_str.c_str(), key_str.size());
SHA256_Final(hash, &sha256);
// Convert hash to a hex string
std::ostringstream hex_stream;
for (int i = 0; i < SHA256_DIGEST_LENGTH; i++) {
hex_stream << std::hex << std::setw(2) << std::setfill('0') << (int)hash[i];
}
return hex_stream.str();
}
std::string FileCache::get_file_path(const KeyType& key) const
{
std::string hashed = hash_key(key);
return cache_dir_ + "/" + hashed.substr(2) + ".cache";
}
bool FileCache::get(KeyType key,FrameCache* pData)
{
{
std::lock_guard<std::mutex> lock(cache_mutex_);
if (memory_cache_.count(key))
{
memcpy(pData,&memory_cache_[key],sizeof(FrameCache));
std::cout << "get from memory_cache_:" << pData->_para.nFrmID << std::endl;
return true;
}
if (mapped_files_.count(key))
{
memcpy(pData,&mapped_files_[key],sizeof(FrameCache));
std::cout << "get from mapped_files_:" << pData->_para.nFrmID << std::endl;
return true;
}
}
auto file_path = get_file_path(key);
return load_from_disk(file_path,key,pData);
}
void FileCache::set(KeyType key, FrameCache* pData)
{
std::lock_guard<std::mutex> lock(cache_mutex_);
memcpy(&memory_cache_[key],pData,sizeof(FrameCache));
std::cout << "set memory_cache:" << memory_cache_[key]._para.nFrmID <<std::endl;
// Evict if necessary
if (memory_cache_.size() > max_memory_items_)
{
memory_cache_.erase(memory_cache_.begin());
}
// Async save to disk
auto file_path = get_file_path(key);
//save_to_disk(file_path, pData);
async_tasks_.push_back(std::async(std::launch::async, [this, file_path, pData]() {
save_to_disk(file_path, pData);
}));
}
bool FileCache::save_to_disk(const std::string& file_path, FrameCache* data)
{
std::ofstream out(file_path, std::ios::binary);
if (!out) return false;
auto nsize = sizeof(FrameCache);
//std::cout << "set file:" << data->_para.nFrmID <<std::endl;
out.write(reinterpret_cast<const char*>(data), sizeof(FrameCache));
return true;
}
bool FileCache::load_from_disk(const std::string& file_path,const KeyType& key,FrameCache* pData)
{
if (!fs::exists(file_path))
{
return false;
}
const char* str = file_path.c_str();
size_t file_size = fs::file_size(file_path);
void* mapped_data = map_file(file_path, file_size);
if (!mapped_data)
{
return false;
}
// Store the mapped file for later use
std::lock_guard<std::mutex> lock(cache_mutex_);
FrameCache* ptr = &mapped_files_[key];
// 存储到文件映射中
memcpy(ptr,mapped_data,sizeof(FrameCache));
// 返回
memcpy(pData,mapped_data,sizeof(FrameCache));
std::cout << "get from raw_files_:" << pData->_para.nFrmID << std::endl;
return true;
}
void FileCache::clear()
{
std::lock_guard<std::mutex> lock(cache_mutex_);
memory_cache_.clear();
for (const auto& [key, mapped_file] : mapped_files_)
{
unmap_file((void*)&mapped_file, sizeof(FrameCache));
}
mapped_files_.clear();
for (const auto& entry : fs::directory_iterator(cache_dir_))
{
fs::remove_all(entry.path());
}
}
SINT32 FileCache::get_mem_size()
{
return memory_cache_.size();
}
SINT32 FileCache::get_file_size()
{
return mapped_files_.size();
}
// Platform-specific memory mapping
void* FileCache::map_file(const std::string& file_path, size_t size)
{
#ifdef _WIN32
HANDLE file = CreateFileA(file_path.c_str(), GENERIC_READ, FILE_SHARE_READ, nullptr,
OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
if (file == INVALID_HANDLE_VALUE) return nullptr;
HANDLE mapping = CreateFileMapping(file, nullptr, PAGE_READONLY, 0, 0, nullptr);
if (!mapping)
{
CloseHandle(file);
return nullptr;
}
void* data = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, size);
CloseHandle(mapping);
CloseHandle(file);
return data;
#else
int fd = open(file_path.c_str(), O_RDONLY);
if (fd == -1) return nullptr;
void* data = mmap(nullptr, size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (data == MAP_FAILED) {
return nullptr;
}
return data;
#endif
}
void FileCache::unmap_file(void* data, size_t size)
{
#ifdef _WIN32
UnmapViewOfFile(data);
#else
munmap(data, size);
#endif
}

@ -1,56 +0,0 @@
#pragma once
#include <string>
#include <unordered_map>
#include <mutex>
#include <future>
#include <filesystem>
#include <optional>
#include <vector>
#include <cstdint>
#include "StitchStruct.h"
typedef int KeyType;
class FileCache
{
public:
FileCache(const std::string& cache_dir = "./tile_cache", size_t max_memory_items = 200);
~FileCache();
bool get(KeyType key,FrameCache* pData);
void set(KeyType key, FrameCache* pData);
void clear();
SINT32 get_mem_size();
SINT32 get_file_size();
private:
std::string get_file_path(const KeyType& key) const;
std::string hash_key(const KeyType& key) const;
bool save_to_disk(const std::string& file_path, FrameCache* data);
bool load_from_disk(const std::string& file_path,const KeyType& key,FrameCache* pData);
std::string cache_dir_;
size_t max_memory_items_;
// Memory cache
mutable std::mutex cache_mutex_;
std::unordered_map<KeyType, FrameCache> memory_cache_;
std::unordered_map<KeyType, FrameCache> mapped_files_;
// Async tasks
std::vector<std::future<void>> async_tasks_;
// Platform-specific memory mapping
void* map_file(const std::string& file_path, size_t size);
void unmap_file(void* data, size_t size);
};
Loading…
Cancel
Save