StitchVideo/Detector/src/YOLO.cpp

#include "YOLO.h"
#include <fstream>
#include <cmath>
using namespace std;
using namespace cv;


#undef max
#undef min

yolo5::yolo5(string modelpath, string namesConfig, YoloParam para)
{
	//try
	{
		// 导入模型
		m_YoloNet = cv::dnn::readNet(modelpath);
		NetPath = modelpath;
		if (para.bUseCuda)
		{
			std::cout << "Attempty to use CUDA\n";
			m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
			m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
		}
		else
		{
			std::cout << "Running on CPU\n";
			m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
			m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
		}

		//names
		g_YoloClassName.clear();
		std::ifstream ifs(namesConfig);
		std::string line;
		while (getline(ifs, line))
		{
			g_YoloClassName.push_back(line);
		}

		// 参数
		m_para = para;
	}
	//catch(const std::exception& e)
	//{
	//	std::cerr << e.what() << '\n';
	//}

}


cv::Mat yolo5::keepRatio(const cv::Mat &source)
{
    int col = source.cols;
    int row = source.rows;
    int max_dim = std::max(col, row); // 获取宽高中的最大值

    // 创建正方形画布，将原图复制到左上角
    cv::Mat square_mat = cv::Mat::zeros(max_dim, max_dim, CV_8UC3);
    source.copyTo(square_mat(cv::Rect(0, 0, col, row)));

    // 调整正方形图像到目标尺寸(INPUT_WIDTH, INPUT_WIDTH)
    cv::Mat result;
    cv::resize(square_mat, result, cv::Size(INPUT_WIDTH, INPUT_WIDTH));

    return result;
}

int yolo5::detect(Mat& image)
{
    cv::Mat blob;
    int srcImgWidth = image.cols;
    int srcImgHeight = image.rows;

    cv::Mat input_image = keepRatio(image);

    cv::dnn::blobFromImage(input_image, blob, double(1./255.), cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
    m_YoloNet.setInput(blob);
    std::vector<cv::Mat> outputs;
	auto OutLayersNames = m_YoloNet.getUnconnectedOutLayersNames();
    m_YoloNet.forward(outputs, OutLayersNames);

    float x_factor = srcImgWidth / (FLOAT32)INPUT_WIDTH;
    float y_factor = srcImgWidth / (FLOAT32)INPUT_HEIGHT;

    float *data = (float *)outputs[0].data;

	//const int dimensions = 9;
	//const int rows = 64512;

    const int dimensions = 8;	// 7(通用)  8(带靶板模型)
    const int rows = 80640;

    std::vector<int> class_ids;
    std::vector<float> confidences;
    std::vector<cv::Rect> boxes;

	// 清空目标队列
	m_targetArray.clear();

    for (int i = 0; i < rows; ++i) {

        float confidence = data[4];
        if (confidence >= m_para.conf_threshold)
        {
            float * classes_scores = data + 5;
            cv::Mat scores(1, int(g_YoloClassName.size()), CV_32FC1, classes_scores);
            cv::Point class_id;
            double max_class_score;
            minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
            if (max_class_score > m_para.score_threshold) {

                confidences.push_back(confidence);

                class_ids.push_back(class_id.x);

                float x = data[0];
                float y = data[1];
                float w = data[2];
                float h = data[3];
                int left = int((x - 0.5 * w) * x_factor);
                int top = int((y - 0.5 * h) * y_factor);
                int width = int(w * x_factor);
                int height = int(h * y_factor);
                boxes.push_back(cv::Rect(left, top, width, height));
            }

        }

        data += dimensions;

    }

    std::vector<int> nms_result;
    cv::dnn::NMSBoxes(boxes, confidences, m_para.score_threshold, m_para.nms_threshold, nms_result);
    for (int i = 0; i < nms_result.size(); i++) {
        int idx = nms_result[i];
        Detection result;
        result.class_id = class_ids[idx] + 1;//初始化目标classID为0
        result.confidence = confidences[idx];
        result.box = boxes[idx];
        m_targetArray.push_back(result);
    }

	// 根据置信度进行排序
    // std::sort(m_targetArray.begin(), m_targetArray.end(), [](const Detection& a, const Detection& b) {
    //     return a.confidence > b.confidence;
    // });

	return int( m_targetArray.size());
}


yolox::yolox(std::string modelpath, std::string namesConfig, YoloParam para)
{
	try
	{
		// 导入模型
		m_YoloNet = cv::dnn::readNet(modelpath);
		NetPath = modelpath;
		if (para.bUseCuda)
		{
			std::cout << "Attempty to use CUDA\n";
			m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
			m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
		}
		else
		{
			std::cout << "Running on CPU\n";
			m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
			m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
		}

		//names
		g_YoloClassName.clear();
		std::ifstream ifs(namesConfig);
		std::string line;
		while (getline(ifs, line))
		{
			g_YoloClassName.push_back(line);
		}
		num_class = int(g_YoloClassName.size());
		// 参数
		m_para = para;
	}
	catch(const std::exception& e)
	{
		std::cerr << e.what() << '\n';
	}
}

Mat yolox::resize_image(Mat srcimg, float* scale)
{
	float r =float( std::min(this->input_shape[1] / (srcimg.cols*1.0), this->input_shape[0] / (srcimg.rows*1.0)));
	*scale = r;
	// r = std::min(r, 1.0f);
	int unpad_w = int(r * srcimg.cols);
	int unpad_h = int(r * srcimg.rows);
	Mat re(unpad_h, unpad_w, CV_8UC3);
	resize(srcimg, re, re.size());
	Mat out(this->input_shape[1], this->input_shape[0], CV_8UC3, Scalar(114, 114, 114));
	re.copyTo(out(Rect(0, 0, re.cols, re.rows)));
	return out;
}

void yolox::normalize(Mat& img)
{
	cvtColor(img, img, cv::COLOR_BGR2RGB);
	img.convertTo(img, CV_32F);
	int i = 0, j = 0;
	for (i = 0; i < img.rows; i++)
	{
		float* pdata = (float*)(img.data + i * img.step);
		for (j = 0; j < img.cols; j++)
		{
			pdata[0] = float((pdata[0] / 255.0f - this->mean[0]) / this->std[0]);
			pdata[1] = float((pdata[1] / 255.0f - this->mean[1]) / this->std[1]);
			pdata[2] = float((pdata[2] / 255.0f- this->mean[2]) / this->std[2]);
			pdata += 3;
		}
	}
}

int yolox::get_max_class(float* scores)
{
	float max_class_socre = 0, class_socre = 0;
	int max_class_id = 0, c = 0;
	for (c = 0; c < this->num_class; c++) //// get max socre
	{
		if (scores[c] > max_class_socre)
		{
			max_class_socre = scores[c];
			max_class_id = c;
		}
	}
	return max_class_id;
}

int yolox::detect(Mat& srcimg)
{
	float scale = 1.0;
	Mat dstimg = this->resize_image(srcimg, &scale);
	this->normalize(dstimg);
	Mat blob = cv::dnn::blobFromImage(dstimg);

	m_YoloNet.setInput(blob);
	vector<Mat> outs;
	m_YoloNet.forward(outs, m_YoloNet.getUnconnectedOutLayersNames());

	if (outs[0].dims == 3)
	{
		const int num_proposal = outs[0].size[1];
		outs[0] = outs[0].reshape(0, num_proposal);
	}
	/////generate proposals, decode outputs
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	float ratioh = (float)srcimg.rows / this->input_shape[0], ratiow = (float)srcimg.cols / this->input_shape[1];
	int n = 0, i = 0, j = 0, nout = int(g_YoloClassName.size()) + 5, row_ind = 0;
	float* pdata = (float*)outs[0].data;
	for (n = 0; n < 3; n++)   ///<2F>߶<EFBFBD>
	{
		const int num_grid_x = (int)(this->input_shape[1] / this->stride[n]);
		const int num_grid_y = (int)(this->input_shape[0] / this->stride[n]);
		for (i = 0; i < num_grid_y; i++)
		{
			for (j = 0; j < num_grid_x; j++)
			{
				float box_score = pdata[4];
				/*for (int class_idx = 0; class_idx < this->num_class; class_idx++)
				{
					float cls_score = pdata[5 + class_idx];
					float box_prob = box_score * cls_score;
					if (box_prob > this->prob_threshold)
					{
						float x_center = (pdata[0] + j) * this->stride[n];
						float y_center = (pdata[1] + i) * this->stride[n];
						float w = exp(pdata[2]) * this->stride[n];
						float h = exp(pdata[3]) * this->stride[n];
						float x0 = x_center - w * 0.5f;
						float y0 = y_center - h * 0.5f;

						classIds.push_back(class_idx);
						confidences.push_back(box_prob);
						boxes.push_back(Rect(int(x0), int(y0), (int)(w), (int)(h)));
					}
				}*/

				//int class_idx = this->get_max_class(pdata + 5);
				Mat scores = outs[0].row(row_ind).colRange(5, outs[0].cols);
				Point classIdPoint;
				double max_class_socre;
				// Get the value and location of the maximum score
				minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
				int class_idx = classIdPoint.x;

				float cls_score = pdata[5 + class_idx];
				float box_prob = box_score * cls_score;
				if (box_prob > m_para.conf_threshold)
				{
					float x_center = (pdata[0] + j) * this->stride[n];
					float y_center = (pdata[1] + i) * this->stride[n];
					float w = exp(pdata[2]) * this->stride[n];
					float h = exp(pdata[3]) * this->stride[n];
					float x0 = x_center - w * 0.5f;
					float y0 = y_center - h * 0.5f;

					classIds.push_back(class_idx);
					confidences.push_back(box_prob);
					boxes.push_back(Rect(int(x0), int(y0), (int)(w), (int)(h)));
				}

				pdata += nout;
				row_ind++;
			}
		}
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	// 清空目标队列
	m_targetArray.clear();
	cv::dnn::NMSBoxes(boxes, confidences, this->m_para.conf_threshold, this->m_para.nms_threshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		// adjust offset to original unpadded
		float x0 = (box.x) / scale;
		float y0 = (box.y) / scale;
		float x1 = (box.x + box.width) / scale;
		float y1 = (box.y + box.height) / scale;

		// clip
		x0 = std::max(std::min(x0, (float)(srcimg.cols - 1)), 0.f);
		y0 = std::max(std::min(y0, (float)(srcimg.rows - 1)), 0.f);
		x1 = std::max(std::min(x1, (float)(srcimg.cols - 1)), 0.f);
		y1 = std::max(std::min(y1, (float)(srcimg.rows - 1)), 0.f);


        Detection result;
        result.class_id = classIds[idx] + 1;//初始化目标classID为0
        result.confidence = confidences[idx];
        result.box = cv::Rect(Point(int(x0), int(y0)), Point(int(x1), int(y1)));
        m_targetArray.push_back(result);

		// rectangle(srcimg, Point(x0, y0), Point(x1, y1), Scalar(0, 0, 255), 2);
		// //Get the label for the class name and its confidence
		// string label = format("%.2f", confidences[idx]);
		// label = this->classes[classIds[idx]] + ":" + label;
		// //Display the label at the top of the bounding box
		// int baseLine;
		// Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
		// y0 = std::max(y0, (float)labelSize.height);
		// //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
		// putText(srcimg, label, Point(x0, y0), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
	}

    return int(m_targetArray.size());
}


void SLEEP(int ms)
{
	#ifdef _WIN32
		Sleep(DWORD(ms/1000.0));
	#else
		usleep(ms * 1000);
	#endif
}