优化命名,cuda使用静态显存

main
wangchongwu 5 months ago
parent f389ec6211
commit 345cfd12cf

@ -4,7 +4,6 @@ project(stitch VERSION 0.1.0)
set(CMAKE_CXX_STANDARD 17)
SET(ArithStitchDir stitch)
IF(WIN32)

@ -0,0 +1,137 @@
import numpy as np
def inv_form(R, t):
"""
计算逆变换矩阵和平移向量
"""
R1 = np.linalg.inv(R)
t1 = -(R1 @ t.reshape(-1,1)).flatten()
return R1, t1
def warp_point(point, R, t):
"""
点坐标变换
"""
point = np.append(point, 1)
tmp = R @ point
tmp = tmp / tmp[2]
new_point = tmp + t
return new_point[:2]
def get_M_G2I(dH, fYaw, fPitch, fRoll, fAz, fPt, imgW, imgH, fd):
"""
获取从地面坐标系到图像坐标系的变换矩阵
"""
# 深度矩阵
M_het = np.array([
[1, 0, 0],
[0, 1, 0],
[0, 0, dH]
])
# 姿态矩阵
M_yaw = np.array([
[np.cos(np.deg2rad(fYaw)), -np.sin(np.deg2rad(fYaw)), 0],
[np.sin(np.deg2rad(fYaw)), np.cos(np.deg2rad(fYaw)), 0],
[0, 0, 1]
])
M_pitch = np.array([
[1, 0, 0],
[0, np.cos(np.deg2rad(fPitch)), -np.sin(np.deg2rad(fPitch))],
[0, np.sin(np.deg2rad(fPitch)), np.cos(np.deg2rad(fPitch))]
])
M_roll = np.array([
[np.cos(np.deg2rad(fRoll)), 0, np.sin(np.deg2rad(fRoll))],
[0, 1, 0],
[-np.sin(np.deg2rad(fRoll)), 0, np.cos(np.deg2rad(fRoll))]
])
# 伺服矩阵
M_beta = np.array([
[np.cos(np.deg2rad(fAz)), -np.sin(np.deg2rad(fAz)), 0],
[np.sin(np.deg2rad(fAz)), np.cos(np.deg2rad(fAz)), 0],
[0, 0, 1]
])
M_alaph = np.array([
[1, 0, 0],
[0, np.cos(np.deg2rad(fPt)), -np.sin(np.deg2rad(fPt))],
[0, np.sin(np.deg2rad(fPt)), np.cos(np.deg2rad(fPt))]
])
# 内参矩阵
M_cam = np.array([
[fd, 0, imgW/2],
[0, -fd, imgH/2],
[0, 0, 1]
])
# 计算最终变换矩阵
M = M_cam @ M_alaph @ M_beta @ M_roll @ M_pitch @ M_yaw @ M_het
return M
def main():
# 相机信息
imgW = 1280
imgH = 1024
f = 48
dsize = 7.5
fd = f/dsize*1000
# 吊舱参数
fAz = 44.1876869
fPt = 18.3241043
# 姿态参数
fYaw = 165.08250427246094
fPitch = 4.2472858428955078
fRoll = -0.37968909740447998
# 深度
dH = 2920
# 计算变换矩阵
M_G2I = get_M_G2I(dH, fYaw, fPitch, fRoll, fAz, fPt, imgW, imgH, fd)
M_I2G = np.linalg.inv(M_G2I)
t = np.array([200, 1000, 1])
point = np.array([640, 512])
# 计算变换点
npoint = warp_point(point, M_I2G, t)
R1, t1 = inv_form(M_I2G, t)
pxpoint = warp_point(npoint, R1, -t)
# 绘制视场范围
import matplotlib.pyplot as plt
plt.figure()
for fAz in range(44, 61, 5):
for fPt in range(18, 46, 10):
M_G2I = get_M_G2I(dH, fYaw, fPitch, fRoll, fAz, fPt, imgW, imgH, fd)
M_I2G = np.linalg.inv(M_G2I)
# 计算四个角点
points = np.array([[0,0,1], [640,0,1], [640,512,1], [0,512,1]])
transformed_points = []
for p in points:
p_transformed = M_I2G @ p
p_transformed = p_transformed / p_transformed[2]
transformed_points.append(p_transformed)
# 添加第一个点以闭合多边形
transformed_points.append(transformed_points[0])
# 转换为numpy数组以便绘图
points_array = np.array(transformed_points)
plt.plot(points_array[:,0], points_array[:,1])
plt.gca().set_aspect('equal')
plt.grid(True)
plt.show()
if __name__ == "__main__":
main()

@ -365,7 +365,7 @@ void ProcessFrontVL(string filePath)
SINT32 nVLFrameSize = 1.5 * IMAGE_WIDTH_VL * IMAGE_HEIGHT_VL + IMAGE_WIDTH_VL * PARA_IR_LINE;
_fseeki64(file, nVLFrameSize * 2200, SEEK_SET);
int i = 0;
while (!feof(file))
@ -440,7 +440,7 @@ void ProcessFrontVL(string filePath)
tm.start();
// 基于外参的快拼
stitcher->GeoStitch(frame, info);
stitcher->PoleStitch(frame, info);
tm.stop();
@ -450,7 +450,7 @@ void ProcessFrontVL(string filePath)
}
cv::Mat res;
cv::resize(mat_pan, res, cv::Size(pan.u32Width / 8, pan.u32Height / 8));
cv::resize(mat_pan, res, cv::Size(pan.u32Width / 2, pan.u32Height / 2));
imshow("pan_opt", res);
waitKey(1);
@ -459,7 +459,7 @@ void ProcessFrontVL(string filePath)
cv::Mat res;
cv::resize(mat_pan, res, cv::Size(pan.u32Width / 8, pan.u32Height / 8));
cv::resize(mat_pan, res, cv::Size(pan.u32Width / 2, pan.u32Height / 2));
imshow("pan_opt", res);
waitKey(0);
@ -471,12 +471,12 @@ void ProcessFrontVL(string filePath)
int main(int, char**)
{
//ProcessIR();
//ProcessVL("H:/vl_1920_1080_para40_y8/22.video","22");
ProcessVL("H:/vl_1920_1080_para40_y8/22.video","22");
//ProcessVL("H:/vl_1920_1080_para40_y8/20241219152643_1.video", "20241219152643_1");
//ProcessVL("H:/vl_1920_1080_para40_y8/20241219152917_4.video", "20241219152917_4");
//ProcessVL("H:/vl_1920_1080_para40_y8/20241219153515_10.video", "20241219153515_10");
//
//ProcessVL("H:/vl_1920_1080_para40_y8/1.video", "1");
ProcessFrontVL("H:/vl_1920_1080_para40_y8/1.video");
//ProcessFrontVL("H:/vl_1920_1080_para40_y8/1.video");
}

@ -5,8 +5,19 @@ set(build_time ${COMPILE_TIME})
#
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/src/Version.h.in ${CMAKE_CURRENT_SOURCE_DIR}/src/Version.h)
# cmakecuda
enable_language(CUDA)
set(CUDA_ARCHITECTURES "50;52;60;70;75;80")
# cuda
if(WIN32)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/MDd")
elseif(CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/MD")
elseif(CMAKE_BUILD_TYPE STREQUAL "Release")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/MD")
endif()
endif(MSVC)
if(MSVC)
add_definitions(-D_CRT_SECURE_NO_WARNINGS)
@ -19,6 +30,8 @@ IF(WIN32)
ELSE(WIN32)
ENDIF(WIN32)
find_package(Ceres REQUIRED)
include_directories(${CERES_INCLUDE_DIRS})
@ -34,22 +47,26 @@ link_directories(${OpenCV_LIBS_DIR})
include_directories(${OpenCV_INCLUDE_DIRS})
find_package(CUDA REQUIRED)
set(CMAKE_CUDA_STANDARD 11)
include_directories(${CUDA_INCLUDE_DIRS})
# cuda
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/MD")
# CUDA
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -D__CUDA_NO_HALF_SUPPORT")
set(ArithSrcDIR_MAIN "src") #
# 使Common
file(GLOB libsrcs ${ArithSrcDIR_MAIN}/mapKernel.cu ${ArithSrcDIR_MAIN}/*.cpp ${ArithSrcDIR_MAIN}/*.c ${ArithSrcDIR_MAIN}/*.h ${ArithSrcDIR_MAIN}/*.hpp)
file(GLOB CommonSrc ${ArithSrcDIR_MAIN}/utils/*.cpp ${ArithSrcDIR_MAIN}/utils/*.c ${ArithSrcDIR_MAIN}/utils/*.h ${ArithSrcDIR_MAIN}/utils/*.hpp)
message("-------------------")
message(STATUS ${CMAKE_BUILD_TYPE})
if(WIN32)
# CUDA
#set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler -D__CUDA_NO_HALF_SUPPORT")
cuda_add_library(${LIB_STITCH} SHARED ${libsrcs} ${CommonSrc}) #
else()
add_library(${LIB_STITCH} SHARED ${libsrcs} ${CommonSrc}) #
endif()
cuda_add_library(${LIB_STITCH} SHARED ${libsrcs} ${CommonSrc}) #
#
target_include_directories(${LIB_STITCH} PUBLIC
@ -66,12 +83,4 @@ target_link_libraries(${LIB_STITCH}
${CUDA_LIBRARIES}
)
# # # gcc0
# if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# #
# add_definitions(-fvisibility=hidden)
# #
# target_compile_options(${LIB_STITCH} PRIVATE -Werror -Wreturn-type)
# endif()
set(LIBRARY_OUTPUT_PATH ${CMAKE_SOURCE_DIR}/Bin) #

@ -30,8 +30,8 @@ public:
// 初始化拼接
virtual FPanInfo Init(FrameInfo info, float AzRange, float PtRange) = 0;
// 几何校正快拼
virtual BYTE8 GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo para) = 0;
// 极坐标快拼
virtual BYTE8 PoleStitch(GD_VIDEO_FRAME_S img, FrameInfo para) = 0;
// 获取全景图
virtual GD_VIDEO_FRAME_S ExportPanAddr() = 0;

@ -1,9 +1,21 @@
#pragma once
#include "Arith_CoordModule.h"
#include "Arith_SysStruct.h"
#include "opencv2/opencv.hpp"
#include "StitchStruct.h"
// 前视扫描投影变换模型
//
// cuda显存资源
struct cuda_Mem
{
unsigned char* global_cuda_Frame;
unsigned char* global_cuda_Pan;
unsigned char* global_cuda_pan_mask;
double* global_cuda_H_inv_data;
};
Pole getPoleFromImgWithH(cv::Mat H, cv::Point2f pt, float dep);
cv::Point2f getImgPosFromPole(cv::Mat H_inv, Pole _pole, float dep);

@ -29,12 +29,13 @@ FrontStitch::FrontStitch(SINT32 nWidth, SINT32 nHeight)
FrontStitch::~FrontStitch()
{
cudaFree(_cuda_mem.global_cuda_Frame);
cudaFree(_cuda_mem.global_cuda_Pan);
cudaFree(_cuda_mem.global_cuda_H_inv_data);
cudaFree(_cuda_mem.global_cuda_pan_mask);
}
FPanInfo FrontStitch::Init(FrameInfo info, float AzRange, float PtRange)
{
_panPara.fAglRes = 0.02;
@ -60,10 +61,17 @@ FPanInfo FrontStitch::Init(FrameInfo info, float AzRange, float PtRange)
_panImage = cv::Mat::zeros(_panPara.m_pan_height, _panPara.m_pan_width, CV_8UC3);
cudaMalloc((void**)&_cuda_mem.global_cuda_Frame, info.nWidth * info.nHeight * 3);//rgb
cudaMalloc((void**)&_cuda_mem.global_cuda_Pan, _panPara.m_pan_width * _panPara.m_pan_height * 3);//rgb
cudaMalloc((void**)&_cuda_mem.global_cuda_H_inv_data, 9 * sizeof(double));//
cudaMalloc((void**)&_cuda_mem.global_cuda_pan_mask, _panPara.m_pan_width * _panPara.m_pan_height * 1);//gray
return _panPara;
}
BYTE8 FrontStitch::GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo info)
BYTE8 FrontStitch::PoleStitch(GD_VIDEO_FRAME_S img, FrameInfo info)
{
cv::Mat H = _GeoSolver->findHomography(info);
float dep = info.nEvHeight;
@ -111,7 +119,7 @@ BYTE8 FrontStitch::GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo info)
//UpdatePan_CPU(rgb, _panImage, roi, H_inv, _panPara, dep);
UpdatePan_CUDA(rgb, _panImage, roi, H_inv, _panPara, dep);
UpdatePan_CUDA(rgb, _panImage, roi, H_inv, _panPara, dep, _cuda_mem);
return 0;
@ -173,6 +181,7 @@ void UpdatePan_CPU(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat H_inv,
}
}
}
}

@ -16,6 +16,9 @@
#include "Arith_GeoSolver.h"
#include <cuda_runtime.h>
#include "opencv2/opencv.hpp"
#include "Arith_FrontProj.h"
class FrontStitch:public API_FrontStitch
{
@ -26,8 +29,8 @@ public:
// 初始化拼接
FPanInfo Init(FrameInfo info, float AzRange, float PtRange);
// 几何校正快拼
BYTE8 GeoStitch(GD_VIDEO_FRAME_S img, FrameInfo para);
// 极坐标快拼
BYTE8 PoleStitch(GD_VIDEO_FRAME_S img, FrameInfo para);
// 缓存接收帧
SINT32 ReceiveFrame(GD_VIDEO_FRAME_S img, FrameInfo para);
@ -47,12 +50,16 @@ private:
FPanInfo _panPara;//全景配置
cv::Mat _panImage;
};
// device mem
private:
cuda_Mem _cuda_mem;
};
void UpdatePan_CPU(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep);
void UpdatePan_CUDA(cv::Mat rgbaFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep);
void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep, cuda_Mem cuda_resor);

@ -1,4 +1,4 @@
#include "googleTile.h"
#include "GoogleTile.h"
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <iostream>

@ -192,21 +192,32 @@ __global__ void projKernel(unsigned char* framePtr, SIZE32S size, unsigned char*
int i = blockIdx.y * blockDim.y + threadIdx.y;
POINT32S p_img = device_getImgPosFromPole(h_inv, device_getPoleFromFPan(POINT32S{ j + roi.x, i + roi.y }, _panPara), dep);
if (p_img.x >= 0 && p_img.y >= 0 && p_img.x < size.w && p_img.y < size.h)
if (j == 0 && i == 200)
{
printf("size:%d,%d\n", size.w, size.h);
auto rgbVal = device_Interpolation_RGB24(framePtr, size.w, size.h, p_img.x, p_img.y);
panSubPtr[(i * roi.w + j) * 3 + 0] = rgbVal.R;
panSubPtr[(i * roi.w + j) * 3 + 1] = rgbVal.G;
panSubPtr[(i * roi.w + j) * 3 + 2] = rgbVal.B;
mask[i * roi.w + j] = 255;
printf("%d:%d-%d,RGB:%d-%d-%d\n", j, p_img.x, p_img.y, rgbVal.R, rgbVal.G, rgbVal.B);
}
if (p_img.x >= 10 && p_img.y >= 10 && p_img.x < size.w - 10 && p_img.y < size.h -10)
{
auto rgbVal = device_Interpolation_RGB24(framePtr, size.w, size.h, p_img.x, p_img.y);
panSubPtr[(i * (roi.w) + j) * 3 + 0] = rgbVal.R;
panSubPtr[(i * (roi.w) + j) * 3 + 1] = rgbVal.G;
panSubPtr[(i * (roi.w) + j) * 3 + 2] = rgbVal.B;
mask[i * roi.w + j] = 255;
}
}
void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep)
#define DYNAMIC_MEM
void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv, FPanInfo _panPara, float dep, cuda_Mem cuda_res)
{
unsigned char* cuda_Frame, * cuda_Pan, *cuda_pan_mask;
@ -214,18 +225,23 @@ void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv
int pan_sub_w = roi.width;
int pan_sub_h = roi.height;
#ifdef DYNAMIC_MEM
cudaMalloc((void**)&cuda_Frame, rgbFrame.cols * rgbFrame.rows * 3);//rgb
cudaMalloc((void**)&cuda_Pan, pan_sub_w * pan_sub_h * 3);//rgb
cudaMalloc((void**)&cuda_H_inv_data, 9 * sizeof(double));//
cudaMalloc((void**)&cuda_pan_mask, pan_sub_w * pan_sub_h * 1);//gray
#else
cuda_Frame = cuda_res.global_cuda_Frame;
cuda_Pan = cuda_res.global_cuda_Pan;
cuda_H_inv_data = cuda_res.global_cuda_H_inv_data;
cuda_pan_mask = cuda_res.global_cuda_pan_mask;
#endif
// 拷贝帧数据到设备
cudaMemcpy(cuda_Frame, rgbFrame.data, rgbFrame.cols * rgbFrame.rows * 3, cudaMemcpyHostToDevice);
cudaMemcpy(cuda_H_inv_data, (double*)h_inv.data,9 * sizeof(double), cudaMemcpyHostToDevice);
// 定义线程块和网格大小
dim3 blockSize(32, 32);
dim3 blockSize(8, 8);
dim3 gridSize((pan_sub_w + blockSize.x - 1) / blockSize.x, (pan_sub_h + blockSize.y - 1) / blockSize.y);
SIZE32S FrameSize = { rgbFrame.cols, rgbFrame.rows };
@ -243,7 +259,7 @@ void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv
if (err != cudaSuccess) {
printf("CUDA error: %s\n", cudaGetErrorString(err));
}
//cudaDeviceSynchronize();
cudaDeviceSynchronize();
// 拷贝结果回主机
cv::Mat pan_sub = pan(roi);
@ -256,17 +272,20 @@ void UpdatePan_CUDA(cv::Mat rgbFrame, cv::Mat pan, cv::Rect2d roi, cv::Mat h_inv
pan_sub_clone.copyTo(pan_sub, pan_sub_mask);
/* cv::Mat dst;
cv::Mat dst;
cv::resize(pan_sub_clone, dst, cv::Size(pan_sub_w / 4, pan_sub_h / 4));
imshow("pan_sub_clone", dst);
cv::waitKey(1);*/
cv::waitKey(0);
#ifdef DYNAMIC_MEM
// 释放设备内存
cudaFree(cuda_Frame);
cudaFree(cuda_Pan);
cudaFree(cuda_H_inv_data);
cudaFree(cuda_pan_mask);
#endif
}

Loading…
Cancel
Save