StitchVideo/ProcDJ.cpp

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
#include <string>
#include <iostream>
#include "API_UnderStitch.h"
#include "API_FrontStitch.h"
#include "PlatformDefine.h"
#include <string.h>
#include "opencv2/opencv.hpp"
#include <random>
struct FrameData {
    int frameNumber;
    std::string timeRange;
    int frameCnt;
    std::string timestamp;
    float focal_len = 0;
    float dzoom_ratio = 0;
    double latitude = 0;
    double longitude = 0;
    float rel_alt = 0;
    float abs_alt = 0;
    float gb_yaw = 0;
    float gb_pitch = 0;
    float gb_roll = 0;


    // 衍生参数
    float real_focal_mm = 0;
    float pixel_size_um = 0;
};

float extractValueAfter(const std::string& line, const std::string& key) {
    size_t pos = line.find(key);
    if (pos == std::string::npos) return 0;

    pos += key.length();
    size_t end = line.find(' ', pos);
    if (end == std::string::npos) end = line.length();

    return std::stof(line.substr(pos, end - pos));
}

double extractDoubleAfter(const std::string& line, const std::string& key) {
    size_t pos = line.find(key);
    if (pos == std::string::npos) return 0;

    pos += key.length();
    size_t end = line.find(' ', pos);
    if (end == std::string::npos) end = line.length();

    return std::stod(line.substr(pos, end - pos));
}

// 获取真实焦距和像元尺寸
void inferCameraParams_H20(FrameData& frame, const std::string& filename)
{
    const float full_frame_diag = 43.3f;
    float sensor_diag = 0.0f;

    if (filename.find("_W") != std::string::npos) {
        sensor_diag = 9.4f;
        frame.pixel_size_um = 1.98f;
    }
    else if (filename.find("_Z") != std::string::npos) {
        sensor_diag = 7.7f;
        frame.pixel_size_um = 2.4f;
    }
    else if (filename.find("_T") != std::string::npos) {
        sensor_diag = 10.9f;
        frame.pixel_size_um = 12.0f;
    }
    else {
        sensor_diag = 9.4f;
        frame.pixel_size_um = 1.98f;
    }

    frame.real_focal_mm = frame.focal_len * (sensor_diag / full_frame_diag);
}
// 获取真实焦距和像元尺寸
void inferCameraParams_H30(FrameData& frame, const std::string& filename)
{
    const float full_frame_diag = 43.3f;
    float sensor_diag = 0.0f;

    if (filename.find("_W") != std::string::npos) {
    frame.real_focal_mm = 6.72;
    frame.pixel_size_um = 2.4;
    }
    else if (filename.find("_Z") != std::string::npos) {

    }
    else if (filename.find("_T") != std::string::npos) {
        frame.real_focal_mm = 24;
        frame.pixel_size_um = 12;
    }
    else {

    }

    //frame.real_focal_mm = frame.focal_len * (sensor_diag / full_frame_diag);
}
std::vector<FrameData> parseDJISRT(const std::string& filename) {
    std::vector<FrameData> frames;
    std::ifstream file(filename);

    if (!file.is_open()) {
        std::cerr << "Error: Could not open file " << filename << std::endl;
        return frames;
    }

    std::string lines[5];
    while (std::getline(file, lines[0])) {  // Frame number line
        // Read next 4 lines
        for (int i = 1; i < 5; ++i) {
            if (!std::getline(file, lines[i])) break;
        }

        FrameData frame;

        // Line 1: Frame number
        frame.frameNumber = std::stoi(lines[0]);

        // Line 2: Time range (00:00:00,000 --> 00:00:00,033)
        frame.timeRange = lines[1];

        // Line 3: FrameCnt and timestamp
        std::istringstream iss(lines[2]);
        std::string temp;
        iss >> temp >> frame.frameCnt >> frame.timestamp;

        // Line 4: Metadata (direct field extraction)
        const std::string& meta = lines[3];
        frame.focal_len = extractValueAfter(meta, "focal_len: ");
        frame.dzoom_ratio = extractValueAfter(meta, "dzoom_ratio: ");
        frame.latitude = extractDoubleAfter(meta, "latitude: ");
        frame.longitude = extractDoubleAfter(meta, "longitude: ");
        frame.rel_alt = extractValueAfter(meta, "rel_alt: ");
        frame.abs_alt = extractValueAfter(meta, "abs_alt: ");
        frame.gb_yaw = extractValueAfter(meta, "gb_yaw: ");
        frame.gb_pitch = extractValueAfter(meta, "gb_pitch: ");
        frame.gb_roll = extractValueAfter(meta, "gb_roll: ");

        inferCameraParams_H30(frame, filename);

        frames.push_back(frame);

        // Skip the empty line (line 5)
    }

    file.close();
    return frames;
}


void ProcDJVideo(vector<std::string> videoPathList, vector<std::string> srtPathList)
{
    auto stitcher = API_UnderStitch::Create();

    stitcher->SetOutput("DJI", "D:/google_tiles");


    GD_VIDEO_FRAME_S frame = { 0 };//输入帧
    GD_VIDEO_FRAME_S pan = { 0 };//输出全景

    cv::Mat mat_pan;//全景显示


    cv::VideoCapture cap(videoPathList[0]);

    // Get video properties
    double fps = cap.get(cv::CAP_PROP_FPS);
    int width = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_WIDTH));
    int height = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_HEIGHT));
    int frame_count = static_cast<int>(cap.get(cv::CAP_PROP_FRAME_COUNT));


    // dsample
    int nDownSample = 1;

    if (width > 3000)
    {
        nDownSample = 2;
    }


    std::vector<FrameData> srt_init = parseDJISRT(srtPathList[0]);

    FrameInfo info = { 0 };
    info.nFrmID = 0;
    info.camInfo.nFocus = srt_init[0].real_focal_mm;
    info.camInfo.fPixelSize = srt_init[0].pixel_size_um * nDownSample;

    info.craft.stAtt.fYaw = srt_init[0].gb_yaw;
    info.craft.stAtt.fPitch = srt_init[0].gb_pitch + 90;
    info.craft.stAtt.fRoll = srt_init[0].gb_roll;

    info.craft.stPos.B = srt_init[0].latitude;
    info.craft.stPos.L = srt_init[0].longitude;
    info.craft.stPos.H = srt_init[0].abs_alt;


    info.nEvHeight = srt_init[0].abs_alt;

    info.servoInfo.fServoAz = 0;
    info.servoInfo.fServoPt = 0;

    info.nWidth = width / nDownSample;
    info.nHeight = height / nDownSample;

    // 初始化
    stitcher->Init(info);

    mat_pan = stitcher->ExportPanMat();

    cap.release();

    cv::VideoWriter output;
	output.open("D:/DJ_stitchVL.mp4", cv::VideoWriter::fourcc('H', '2', '6', '4'), 5, cv::Size(mat_pan.cols/8, mat_pan.rows/8), true);

    for (size_t vid = 0; vid < videoPathList.size(); vid++)
    {
        printf("Proc %s\n", videoPathList[vid].c_str());
        cv::VideoCapture cap(videoPathList[vid]);
        std::vector<FrameData> srt = parseDJISRT(srtPathList[vid]);

        int frmID = 0;
        while (true)
        {
            cv::Mat mat;

            // Read a new frame
            cap >> mat;

            // Check if frame is empty (end of video)
            if (mat.empty()) {
                std::cout << "End of video\n";
                cap.release();
                break;
            }

            frmID++;
            Mat mat_ds2;

            cv::resize(mat, mat_ds2, cv::Size(width / nDownSample, height / nDownSample));


            FrameInfo info = { 0 };
            info.nFrmID = frmID;
            info.camInfo.nFocus = srt[frmID].real_focal_mm;
            info.camInfo.fPixelSize = srt[frmID].pixel_size_um * nDownSample;

            info.craft.stAtt.fYaw = srt[frmID].gb_yaw;
            info.craft.stAtt.fPitch = srt[frmID].gb_pitch + 90;
            info.craft.stAtt.fRoll = srt[frmID].gb_roll;

            info.craft.stPos.B = srt[frmID].latitude;
            info.craft.stPos.L = srt[frmID].longitude;
            info.craft.stPos.H = srt[frmID].abs_alt;


            info.nEvHeight = srt[frmID].abs_alt;

            info.servoInfo.fServoAz = 0;
            info.servoInfo.fServoPt = 0;

            info.nWidth = mat_ds2.cols;
            info.nHeight = mat_ds2.rows;


            frame.enPixelFormat = GD_PIXEL_FORMAT_RGB_PACKED;
            frame.u32Width = mat_ds2.cols;
            frame.u32Height = mat_ds2.rows;
            frame.u64VirAddr[0] = mat_ds2.data;

            if (frmID % 10 != 0)
            {
                continue;
            }

            int progress = float(frmID) / frame_count * 100;

            std::cout << progress << "% " << info.craft.stPos.B << " " << info.craft.stPos.L << " " << info.craft.stPos.H << " "
                << info.craft.stAtt.fYaw << " " << info.craft.stAtt.fPitch << " " << info.craft.stAtt.fRoll << " "
                << info.servoInfo.fServoAz << " " << info.servoInfo.fServoPt
                << std::endl;

            cv::TickMeter tm;
            tm.start();
            // 基于外参的快拼
            stitcher->Run(frame, info);

            tm.stop();


            Mat pan_rgb, pan_rgb_ds;
            cv::cvtColor(mat_pan, pan_rgb, cv::COLOR_BGRA2BGR);

            cv::resize(pan_rgb, pan_rgb_ds, cv::Size(pan_rgb.cols / 8, pan_rgb.rows / 8));

            output.write(pan_rgb_ds);

            imshow("pan_rgb", pan_rgb_ds);
            if (cv::waitKey(1) == 27)
            {
                break;
            }

        }

        cv::TickMeter tm;
        tm.start();
        // 处理帧
        stitcher->OptAndOutCurrPan();

        stitcher->Stop();

        tm.stop();

        std::cout << "time opt:" << tm.getTimeMilli() << std::endl;

        output.release();

    }
}


using std::string;

int main()
{
    vector<std::string> videoPathList;
    vector<std::string> srtPathList;

    string folder = "F:/DJI_202504181507_016/";

    //
    //videoPathList.push_back(folder + "DJI_20250418152649_0005_W.MP4");
	//srtPathList.push_back(folder + "DJI_20250418152649_0005_W.srt");

   //videoPathList.push_back(folder + "DJI_20250418153043_0006_T.MP4");
   //srtPathList.push_back(folder + "DJI_20250418153043_0006_T.srt");

   videoPathList.push_back(folder + "DJI_20250418152649_0005_T.MP4");
   srtPathList.push_back(folder + "DJI_20250418152649_0005_T.srt");


    ProcDJVideo(videoPathList, srtPathList);
    return 0;
}