|
|
#include "YOLO.h"
|
|
|
#include <fstream>
|
|
|
#include <cmath>
|
|
|
using namespace std;
|
|
|
using namespace cv;
|
|
|
|
|
|
|
|
|
#undef max
|
|
|
#undef min
|
|
|
|
|
|
yolo5::yolo5(string modelpath, string namesConfig, YoloParam para)
|
|
|
{
|
|
|
//try
|
|
|
{
|
|
|
// 导入模型
|
|
|
m_YoloNet = cv::dnn::readNet(modelpath);
|
|
|
NetPath = modelpath;
|
|
|
if (para.bUseCuda)
|
|
|
{
|
|
|
std::cout << "Attempty to use CUDA\n";
|
|
|
m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
|
|
m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
std::cout << "Running on CPU\n";
|
|
|
m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
|
|
m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
|
|
}
|
|
|
|
|
|
//names
|
|
|
g_YoloClassName.clear();
|
|
|
std::ifstream ifs(namesConfig);
|
|
|
std::string line;
|
|
|
while (getline(ifs, line))
|
|
|
{
|
|
|
g_YoloClassName.push_back(line);
|
|
|
}
|
|
|
|
|
|
// 参数
|
|
|
m_para = para;
|
|
|
}
|
|
|
//catch(const std::exception& e)
|
|
|
//{
|
|
|
// std::cerr << e.what() << '\n';
|
|
|
//}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
cv::Mat yolo5::keepRatio(const cv::Mat &source)
|
|
|
{
|
|
|
int col = source.cols;
|
|
|
int row = source.rows;
|
|
|
int max_dim = std::max(col, row); // 获取宽高中的最大值
|
|
|
|
|
|
// 创建正方形画布,将原图复制到左上角
|
|
|
cv::Mat square_mat = cv::Mat::zeros(max_dim, max_dim, CV_8UC3);
|
|
|
source.copyTo(square_mat(cv::Rect(0, 0, col, row)));
|
|
|
|
|
|
// 调整正方形图像到目标尺寸(INPUT_WIDTH, INPUT_WIDTH)
|
|
|
cv::Mat result;
|
|
|
cv::resize(square_mat, result, cv::Size(INPUT_WIDTH, INPUT_WIDTH));
|
|
|
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
int yolo5::detect(Mat& image)
|
|
|
{
|
|
|
cv::Mat blob;
|
|
|
int srcImgWidth = image.cols;
|
|
|
int srcImgHeight = image.rows;
|
|
|
|
|
|
cv::Mat input_image = keepRatio(image);
|
|
|
|
|
|
cv::dnn::blobFromImage(input_image, blob, double(1./255.), cv::Size(INPUT_WIDTH, INPUT_HEIGHT), cv::Scalar(), true, false);
|
|
|
m_YoloNet.setInput(blob);
|
|
|
std::vector<cv::Mat> outputs;
|
|
|
auto OutLayersNames = m_YoloNet.getUnconnectedOutLayersNames();
|
|
|
m_YoloNet.forward(outputs, OutLayersNames);
|
|
|
|
|
|
float x_factor = srcImgWidth / (FLOAT32)INPUT_WIDTH;
|
|
|
float y_factor = srcImgWidth / (FLOAT32)INPUT_HEIGHT;
|
|
|
|
|
|
float *data = (float *)outputs[0].data;
|
|
|
|
|
|
//const int dimensions = 9;
|
|
|
//const int rows = 64512;
|
|
|
|
|
|
const int dimensions = 8; // 7(通用) 8(带靶板模型)
|
|
|
const int rows = 80640;
|
|
|
|
|
|
std::vector<int> class_ids;
|
|
|
std::vector<float> confidences;
|
|
|
std::vector<cv::Rect> boxes;
|
|
|
|
|
|
// 清空目标队列
|
|
|
m_targetArray.clear();
|
|
|
|
|
|
for (int i = 0; i < rows; ++i) {
|
|
|
|
|
|
float confidence = data[4];
|
|
|
if (confidence >= m_para.conf_threshold)
|
|
|
{
|
|
|
float * classes_scores = data + 5;
|
|
|
cv::Mat scores(1, int(g_YoloClassName.size()), CV_32FC1, classes_scores);
|
|
|
cv::Point class_id;
|
|
|
double max_class_score;
|
|
|
minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
|
|
|
if (max_class_score > m_para.score_threshold) {
|
|
|
|
|
|
confidences.push_back(confidence);
|
|
|
|
|
|
class_ids.push_back(class_id.x);
|
|
|
|
|
|
float x = data[0];
|
|
|
float y = data[1];
|
|
|
float w = data[2];
|
|
|
float h = data[3];
|
|
|
int left = int((x - 0.5 * w) * x_factor);
|
|
|
int top = int((y - 0.5 * h) * y_factor);
|
|
|
int width = int(w * x_factor);
|
|
|
int height = int(h * y_factor);
|
|
|
boxes.push_back(cv::Rect(left, top, width, height));
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
data += dimensions;
|
|
|
|
|
|
}
|
|
|
|
|
|
std::vector<int> nms_result;
|
|
|
cv::dnn::NMSBoxes(boxes, confidences, m_para.score_threshold, m_para.nms_threshold, nms_result);
|
|
|
for (int i = 0; i < nms_result.size(); i++) {
|
|
|
int idx = nms_result[i];
|
|
|
Detection result;
|
|
|
result.class_id = class_ids[idx] + 1;//初始化目标classID为0
|
|
|
result.confidence = confidences[idx];
|
|
|
result.box = boxes[idx];
|
|
|
m_targetArray.push_back(result);
|
|
|
}
|
|
|
|
|
|
// 根据置信度进行排序
|
|
|
// std::sort(m_targetArray.begin(), m_targetArray.end(), [](const Detection& a, const Detection& b) {
|
|
|
// return a.confidence > b.confidence;
|
|
|
// });
|
|
|
|
|
|
return int( m_targetArray.size());
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
yolox::yolox(std::string modelpath, std::string namesConfig, YoloParam para)
|
|
|
{
|
|
|
try
|
|
|
{
|
|
|
// 导入模型
|
|
|
m_YoloNet = cv::dnn::readNet(modelpath);
|
|
|
NetPath = modelpath;
|
|
|
if (para.bUseCuda)
|
|
|
{
|
|
|
std::cout << "Attempty to use CUDA\n";
|
|
|
m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
|
|
|
m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
|
|
|
}
|
|
|
else
|
|
|
{
|
|
|
std::cout << "Running on CPU\n";
|
|
|
m_YoloNet.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
|
|
|
m_YoloNet.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
|
|
|
}
|
|
|
|
|
|
//names
|
|
|
g_YoloClassName.clear();
|
|
|
std::ifstream ifs(namesConfig);
|
|
|
std::string line;
|
|
|
while (getline(ifs, line))
|
|
|
{
|
|
|
g_YoloClassName.push_back(line);
|
|
|
}
|
|
|
num_class = int(g_YoloClassName.size());
|
|
|
// 参数
|
|
|
m_para = para;
|
|
|
}
|
|
|
catch(const std::exception& e)
|
|
|
{
|
|
|
std::cerr << e.what() << '\n';
|
|
|
}
|
|
|
}
|
|
|
|
|
|
Mat yolox::resize_image(Mat srcimg, float* scale)
|
|
|
{
|
|
|
float r =float( std::min(this->input_shape[1] / (srcimg.cols*1.0), this->input_shape[0] / (srcimg.rows*1.0)));
|
|
|
*scale = r;
|
|
|
// r = std::min(r, 1.0f);
|
|
|
int unpad_w = int(r * srcimg.cols);
|
|
|
int unpad_h = int(r * srcimg.rows);
|
|
|
Mat re(unpad_h, unpad_w, CV_8UC3);
|
|
|
resize(srcimg, re, re.size());
|
|
|
Mat out(this->input_shape[1], this->input_shape[0], CV_8UC3, Scalar(114, 114, 114));
|
|
|
re.copyTo(out(Rect(0, 0, re.cols, re.rows)));
|
|
|
return out;
|
|
|
}
|
|
|
|
|
|
void yolox::normalize(Mat& img)
|
|
|
{
|
|
|
cvtColor(img, img, cv::COLOR_BGR2RGB);
|
|
|
img.convertTo(img, CV_32F);
|
|
|
int i = 0, j = 0;
|
|
|
for (i = 0; i < img.rows; i++)
|
|
|
{
|
|
|
float* pdata = (float*)(img.data + i * img.step);
|
|
|
for (j = 0; j < img.cols; j++)
|
|
|
{
|
|
|
pdata[0] = float((pdata[0] / 255.0f - this->mean[0]) / this->std[0]);
|
|
|
pdata[1] = float((pdata[1] / 255.0f - this->mean[1]) / this->std[1]);
|
|
|
pdata[2] = float((pdata[2] / 255.0f- this->mean[2]) / this->std[2]);
|
|
|
pdata += 3;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
int yolox::get_max_class(float* scores)
|
|
|
{
|
|
|
float max_class_socre = 0, class_socre = 0;
|
|
|
int max_class_id = 0, c = 0;
|
|
|
for (c = 0; c < this->num_class; c++) //// get max socre
|
|
|
{
|
|
|
if (scores[c] > max_class_socre)
|
|
|
{
|
|
|
max_class_socre = scores[c];
|
|
|
max_class_id = c;
|
|
|
}
|
|
|
}
|
|
|
return max_class_id;
|
|
|
}
|
|
|
|
|
|
int yolox::detect(Mat& srcimg)
|
|
|
{
|
|
|
float scale = 1.0;
|
|
|
Mat dstimg = this->resize_image(srcimg, &scale);
|
|
|
this->normalize(dstimg);
|
|
|
Mat blob = cv::dnn::blobFromImage(dstimg);
|
|
|
|
|
|
m_YoloNet.setInput(blob);
|
|
|
vector<Mat> outs;
|
|
|
m_YoloNet.forward(outs, m_YoloNet.getUnconnectedOutLayersNames());
|
|
|
|
|
|
if (outs[0].dims == 3)
|
|
|
{
|
|
|
const int num_proposal = outs[0].size[1];
|
|
|
outs[0] = outs[0].reshape(0, num_proposal);
|
|
|
}
|
|
|
/////generate proposals, decode outputs
|
|
|
vector<int> classIds;
|
|
|
vector<float> confidences;
|
|
|
vector<Rect> boxes;
|
|
|
float ratioh = (float)srcimg.rows / this->input_shape[0], ratiow = (float)srcimg.cols / this->input_shape[1];
|
|
|
int n = 0, i = 0, j = 0, nout = int(g_YoloClassName.size()) + 5, row_ind = 0;
|
|
|
float* pdata = (float*)outs[0].data;
|
|
|
for (n = 0; n < 3; n++) ///<2F>߶<EFBFBD>
|
|
|
{
|
|
|
const int num_grid_x = (int)(this->input_shape[1] / this->stride[n]);
|
|
|
const int num_grid_y = (int)(this->input_shape[0] / this->stride[n]);
|
|
|
for (i = 0; i < num_grid_y; i++)
|
|
|
{
|
|
|
for (j = 0; j < num_grid_x; j++)
|
|
|
{
|
|
|
float box_score = pdata[4];
|
|
|
/*for (int class_idx = 0; class_idx < this->num_class; class_idx++)
|
|
|
{
|
|
|
float cls_score = pdata[5 + class_idx];
|
|
|
float box_prob = box_score * cls_score;
|
|
|
if (box_prob > this->prob_threshold)
|
|
|
{
|
|
|
float x_center = (pdata[0] + j) * this->stride[n];
|
|
|
float y_center = (pdata[1] + i) * this->stride[n];
|
|
|
float w = exp(pdata[2]) * this->stride[n];
|
|
|
float h = exp(pdata[3]) * this->stride[n];
|
|
|
float x0 = x_center - w * 0.5f;
|
|
|
float y0 = y_center - h * 0.5f;
|
|
|
|
|
|
classIds.push_back(class_idx);
|
|
|
confidences.push_back(box_prob);
|
|
|
boxes.push_back(Rect(int(x0), int(y0), (int)(w), (int)(h)));
|
|
|
}
|
|
|
}*/
|
|
|
|
|
|
//int class_idx = this->get_max_class(pdata + 5);
|
|
|
Mat scores = outs[0].row(row_ind).colRange(5, outs[0].cols);
|
|
|
Point classIdPoint;
|
|
|
double max_class_socre;
|
|
|
// Get the value and location of the maximum score
|
|
|
minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
|
|
|
int class_idx = classIdPoint.x;
|
|
|
|
|
|
float cls_score = pdata[5 + class_idx];
|
|
|
float box_prob = box_score * cls_score;
|
|
|
if (box_prob > m_para.conf_threshold)
|
|
|
{
|
|
|
float x_center = (pdata[0] + j) * this->stride[n];
|
|
|
float y_center = (pdata[1] + i) * this->stride[n];
|
|
|
float w = exp(pdata[2]) * this->stride[n];
|
|
|
float h = exp(pdata[3]) * this->stride[n];
|
|
|
float x0 = x_center - w * 0.5f;
|
|
|
float y0 = y_center - h * 0.5f;
|
|
|
|
|
|
classIds.push_back(class_idx);
|
|
|
confidences.push_back(box_prob);
|
|
|
boxes.push_back(Rect(int(x0), int(y0), (int)(w), (int)(h)));
|
|
|
}
|
|
|
|
|
|
pdata += nout;
|
|
|
row_ind++;
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// Perform non maximum suppression to eliminate redundant overlapping boxes with
|
|
|
// lower confidences
|
|
|
vector<int> indices;
|
|
|
// 清空目标队列
|
|
|
m_targetArray.clear();
|
|
|
cv::dnn::NMSBoxes(boxes, confidences, this->m_para.conf_threshold, this->m_para.nms_threshold, indices);
|
|
|
for (size_t i = 0; i < indices.size(); ++i)
|
|
|
{
|
|
|
int idx = indices[i];
|
|
|
Rect box = boxes[idx];
|
|
|
// adjust offset to original unpadded
|
|
|
float x0 = (box.x) / scale;
|
|
|
float y0 = (box.y) / scale;
|
|
|
float x1 = (box.x + box.width) / scale;
|
|
|
float y1 = (box.y + box.height) / scale;
|
|
|
|
|
|
// clip
|
|
|
x0 = std::max(std::min(x0, (float)(srcimg.cols - 1)), 0.f);
|
|
|
y0 = std::max(std::min(y0, (float)(srcimg.rows - 1)), 0.f);
|
|
|
x1 = std::max(std::min(x1, (float)(srcimg.cols - 1)), 0.f);
|
|
|
y1 = std::max(std::min(y1, (float)(srcimg.rows - 1)), 0.f);
|
|
|
|
|
|
|
|
|
Detection result;
|
|
|
result.class_id = classIds[idx] + 1;//初始化目标classID为0
|
|
|
result.confidence = confidences[idx];
|
|
|
result.box = cv::Rect(Point(int(x0), int(y0)), Point(int(x1), int(y1)));
|
|
|
m_targetArray.push_back(result);
|
|
|
|
|
|
// rectangle(srcimg, Point(x0, y0), Point(x1, y1), Scalar(0, 0, 255), 2);
|
|
|
// //Get the label for the class name and its confidence
|
|
|
// string label = format("%.2f", confidences[idx]);
|
|
|
// label = this->classes[classIds[idx]] + ":" + label;
|
|
|
// //Display the label at the top of the bounding box
|
|
|
// int baseLine;
|
|
|
// Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
|
|
|
// y0 = std::max(y0, (float)labelSize.height);
|
|
|
// //rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
|
|
|
// putText(srcimg, label, Point(x0, y0), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
|
|
|
}
|
|
|
|
|
|
return int(m_targetArray.size());
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void SLEEP(int ms)
|
|
|
{
|
|
|
#ifdef _WIN32
|
|
|
Sleep(DWORD(ms/1000.0));
|
|
|
#else
|
|
|
usleep(ms * 1000);
|
|
|
#endif
|
|
|
}
|
|
|
|