You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

406 lines
18 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include <opencv2/imgproc.hpp>
#include "use_mnn.hpp"
#include <thread>
using namespace std;
using namespace GUD::ALG;
ForwardNet::ForwardNet(const gud_siamrpn_config_t conf)
{
#if (1 == D_CPU_BIGCORE)
cpu_set_t mask; //CPU核的集合
cpu_set_t get; //获取在集合中的CPU
//int num= sysconf(_SC_NPROCESSORS_CONF);
int j = 0;
CPU_ZERO(&mask); /* 初始化set集将set置为空*/
/*将本进程绑定到CPU0上*/
CPU_SET(3, &mask); // 3559 core 3
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
{
printf("Set CPU affinity failue, ERROR:%s\n", strerror(errno));
exit(-1);
}
else
{
printf("Set CPU affinity Sucess, Just run ForwardNet Class\n");
}
#endif
numThread_ = conf.thread_num;
prec_type_ = conf.precision_type;
deviceType_ = conf.device_type;
t1_ = steady_clock::now();
net_init(0, conf.mnnModel_0);
net_init(1, conf.mnnModel_1);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#MNN Model Load time is %f ms.\n",1000 * time_span_.count());
clsOut_dims_[2] = 1;
clsOut_dims_[3] = 1;
regOut_dims_[2] = 1;
regOut_dims_[3] = 1;
updataCKDone_ = 0;
updataRKDone_ = 0;
/** create inputs & output tensor for two networks **/
clsMapTensor_ = MNN::Tensor::create<float>(clsMap_dims_, NULL, MNN::Tensor::CAFFE);
clsKernelTensor_ = MNN::Tensor::create<float>(ck_dims_, NULL, MNN::Tensor::CAFFE);
regMapTensor_ = MNN::Tensor::create<float>(regMap_dims_, NULL, MNN::Tensor::CAFFE);
regKernelTensor_ = MNN::Tensor::create<float>(rk_dims_, NULL, MNN::Tensor::CAFFE);
clsOutTensor_ = MNN::Tensor::create<float>(clsOut_dims_, NULL, MNN::Tensor::CAFFE);
regOutTensor_ = MNN::Tensor::create<float>(regOut_dims_, NULL, MNN::Tensor::CAFFE);
#if (DEBUG_TMP == 1)
printf ("Create Tensor clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
printf ("Create Tensor RegOut_dims_[%d %d %d %d]\n",regOut_dims_[0],regOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
#endif
}
ForwardNet::~ForwardNet() {
net_deinit(0);
net_deinit(1);
}
int ForwardNet::net_init(int model_idx,const char* model_path)
{
//numThread_ = 4;
printf ("Start Loading Net:[%d] (%s)#\n",model_idx,model_path);
if (model_idx == 0)
{
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
if (deviceType_ == 0){
config.type = MNN_FORWARD_CPU;
}
else if (deviceType_ == 1){
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
}
config.numThread = numThread_;
net_cls_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
if (prec_type_ == 0) {
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
}
else if (prec_type_ == 1) {
bnconfig.precision = MNN::BackendConfig::Precision_High;
} else if (prec_type_ == 2) {
bnconfig.precision = MNN::BackendConfig::Precision_Low;
}
else if (prec_type_ == 3) {
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
}
config.backendConfig = &bnconfig;
MNNconfig_cls_ = config;
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
ck_dims_ = inputs[clsKernelName]->shape();
clsMap_dims_ = inputs[clsMapName]->shape();
auto Routput0 = net->getSessionOutput(session, clsOutputName.data()); // signal output
clsOut_dims_ = Routput0->shape();
session_cls_ = session;
/** set inputs cls tensor **/
inputClsTensor = net_cls_->getSessionInputAll(session_cls_);
#if (DEBUG_TMP == 1)
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("RefSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision,bnconfig.power);
#endif
printf ("#MNN NetInit Load clsModel:{%s} Sucess; Thread_num:[%d] Device:[%d] #\n",model_path, numThread_,config.type);
}
else if (model_idx == 1)
{
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
if (deviceType_ == 0){
config.type = MNN_FORWARD_CPU;
}
else if (deviceType_ == 1){
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
}
config.numThread = numThread_;
net_reg_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
if (prec_type_ == 0) {
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
}
else if (prec_type_ == 1) {
bnconfig.precision = MNN::BackendConfig::Precision_High;
} else if (prec_type_ == 2) {
bnconfig.precision = MNN::BackendConfig::Precision_Low;
}
else if (prec_type_ == 3) {
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
}
config.backendConfig = &bnconfig;
MNNconfig_reg_ = config;
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
rk_dims_ = inputs[regKernelNane]->shape();
regMap_dims_ = inputs[regMapName]->shape();
auto Routput1 = net->getSessionOutput(session, regOutputName.data()); // signal output
regOut_dims_ = Routput1->shape();
session_reg_ = session;
/** set inputs reg tensor **/
inputRegTensor = net_reg_->getSessionInputAll(session_reg_);
#if (DEBUG_TMP == 1)
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
#endif
printf ("#MNN NetInit Load regModel:{%s} Sucess; Thread_num:[%d] Device:[%d]#\n",model_path, numThread_, config.type);
}
#if (USE_DOUBLE_HEADER == 1)
else if (model_idx == 2)
{
/** net input & output names **/
std::string clsKernelInput = "clskernel";
std::string clsMapInput = "x";
std::string clsOutput = "57";
std::string regKernelInput = "regkernel";
std::string regMapInput = "y";
std::string regOutput = "62";
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU MNN_FORWARD_OPENCL
config.numThread = numThread_;
if (config.type == MNN_FORWARD_OPENCL)
config.mode = MNN_GPU_TUNING_FAST;
net_double_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
bnconfig.precision = MNN::BackendConfig::Precision_Low; // Precision_Normal = 0, Precision_High, Precision_Low Precision_Low_BF16
config.backendConfig = &bnconfig;
MNNconfig_double_ = config;
//net->setSessionMode(MNN::Interpreter::Session_Input_User);
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
ck_dims_ = inputs[clsKernelInput]->shape();
clsMap_dims_ = inputs[clsMapInput]->shape();
rk_dims_ = inputs[regKernelInput]->shape();
regMap_dims_ = inputs[regMapInput]->shape();
auto Coutput1 = net->getSessionOutput(session, regOutput.data()); // signal output
clsOut_dims_ = Coutput1->shape();
auto Routput1 = net->getSessionOutput(session, regOutput.data()); // signal output
regOut_dims_ = Routput1->shape();
session_double_ = session;
/** set inputs reg tensor **/
input4Tensors = net_double_->getSessionInputAll(session_double_);
input4Tensors[clsKernelInput] = clsKernelTensor_;
input4Tensors[clsMapInput] = clsMapTensor_;
input4Tensors[regKernelInput] = regKernelTensor_;
input4Tensors[regMapInput] = regMapTensor_;
// net_double_->resizeSession(session_double_);
#if (DEBUG_TMP == 1)
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
#endif
printf ("#MNN NetInit Load DoubleHeaderModel:{%s} Sucess!; Thread_num:[%d] Device:[%d] #\n",
model_path, numThread_, config.type);
}
#endif
initialized_ = true;
printf("*** Loading Model sucess!!! ***\n");
return 0;
}
int ForwardNet::net_deinit(int model_idx)
{
if (model_idx == 0)
net_cls_->releaseSession(session_cls_);
if (model_idx == 1)
net_reg_->releaseSession(session_reg_);
return 0;
}
int ForwardNet::clsMnnForward(const float *cmap_data, float* coutput_data, bool dataReady)
{
t1_ = steady_clock::now();
if (!dataReady){
auto clsM_data = clsMapTensor_->host<float>();
auto clsM_size = static_cast<int>(clsMapTensor_->elementSize());
::memcpy(clsM_data, cmap_data, sizeof(float)*clsM_size);
}
auto clsKernelInput = inputClsTensor[clsKernelName];
auto clsMapInput = inputClsTensor[clsMapName];
clsKernelInput->copyFromHostTensor(clsKernelTensor_);
clsMapInput->copyFromHostTensor(clsMapTensor_);
/**run network **/
net_cls_->runSession(session_cls_);
/**get output data **/
auto outTensor = net_cls_->getSessionOutput(session_cls_, clsOutputName.data());
auto outshape = outTensor->shape();
int outsize = outTensor->elementSize();
int outClsSize = outshape[0]*outshape[1];
outTensor->copyToHostTensor(clsOutTensor_);
//printf ("######### showTensor(clsOutTensor_) ##############\n");
//showTensor(clsOutTensor_,outClsSize,5*19*19);
::memcpy(coutput_data, clsOutTensor_->host<float>(), sizeof(float)*outClsSize);
t2_ = steady_clock::now();
#if (DEBUG_TMP == 1)
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#Run cls getSessionOutput time is %f ms.\n",1000 * time_span_.count());
#endif
return 0;
}
int ForwardNet::regMnnForward(const float *rmap_data, float* routput_data, bool dataReady)
{
t1_ = steady_clock::now();
if (!dataReady) {
auto regM_data = regMapTensor_->host<float>();
auto regM_size = static_cast<int>(regMapTensor_->elementSize());
::memcpy(regM_data, rmap_data, sizeof(float)*regM_size);
}
auto regKernelInput = inputRegTensor[regKernelNane];
auto regMapInput = inputRegTensor[regMapName];
regKernelInput->copyFromHostTensor(regKernelTensor_);
regMapInput->copyFromHostTensor(regMapTensor_);
/**run network **/
net_reg_->runSession(session_reg_);
/**get output data **/
auto outTensor = net_reg_->getSessionOutput(session_reg_, regOutputName.data());
auto outshape = outTensor->shape();
int outsize = outTensor->elementSize();
int outRegSize = outshape[0]*outshape[1];
outTensor->copyToHostTensor(regOutTensor_);
//printf ("######### showTensor(regOutTensor_) ##############\n");
//showTensor(regOutTensor_,outRegSize,5*19*19);
::memcpy(routput_data, regOutTensor_->host<float>(), sizeof(float)*outRegSize);
t2_ = steady_clock::now();
#if (DEBUG_TMP == 1)
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#Run reg getSessionOutput time is %f ms.\n",1000 * time_span_.count());
#endif
return 0;
}
int ForwardNet::updateTwoKernels(const float *ckernel_data, const float *rkernel_data)
{
auto clsK_data = clsKernelTensor_->host<float>();
auto clsK_size = static_cast<int>(clsKernelTensor_->elementSize());
auto regK_data = regKernelTensor_->host<float>();
auto regK_size = static_cast<int>(regKernelTensor_->elementSize());
auto cshape = clsKernelTensor_->shape();
auto rshape = regKernelTensor_->shape();
clsK_size = cshape[0] * cshape[1] * cshape[2] * cshape[3];
regK_size = rshape[0] * rshape[1] * rshape[2] * rshape[3];
t1_ = steady_clock::now();
::memcpy(clsK_data, ckernel_data, sizeof(float)*clsK_size);
::memcpy(regK_data, rkernel_data, sizeof(float)*regK_size);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
updataRKDone_ = 1;
updataCKDone_ = 1;
// printf("#Run updateTwoKernels time is %f ms.\n",1000 * time_span_.count());
// printf ("######### showTensor(clsKernelTensor_) ##############\n");
// showTensor(clsKernelTensor_,10*256*4*4,256*4*4);
// printf ("######### showTensor(regKernelTensor_) ##############\n");
// showTensor(regKernelTensor_,20*256*4*4,256*4*4);
return 0;
}
int ForwardNet::updateTwoMaps(const float *cm_data, const float *rm_data)
{
auto cMapData = clsMapTensor_->host<float>();
auto cm_size = static_cast<int>(clsMapTensor_->elementSize());
auto rMapData = regMapTensor_->host<float>();
auto rm_size = static_cast<int>(regMapTensor_->elementSize());
auto cmshape = clsMapTensor_->shape();
auto rmshape = regMapTensor_->shape();
int clsM_size = cmshape[0] * cmshape[1] * cmshape[2] * cmshape[3];
int regM_size = rmshape[0] * rmshape[1] * rmshape[2] * rmshape[3];
t1_ = steady_clock::now();
::memcpy(cMapData, cm_data, sizeof(float)*clsM_size);
::memcpy(rMapData, rm_data, sizeof(float)*regM_size);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
//printf("#Run updateTwoMaps time is %f ms.\n",1000 * time_span_.count());
updataRMDone_ = 1;
updataCMDone_ = 1;
return 0;
}
void ForwardNet::showTensor(MNN::Tensor *TensorIn, int len, int diff)
{
auto Tshape = TensorIn->shape();
auto Tdata = TensorIn->host<float>();
auto Tsize = static_cast<int>(TensorIn->size());
printf ("\n***** Tensor shape:[%d %d %d %d], size:%d ******\n",Tshape[0],Tshape[1],Tshape[2],Tshape[3],Tsize);
printf ("DimensionType: %d dimensions:%d elementSize:%d\n",TensorIn->getDimensionType(), TensorIn->dimensions(), TensorIn->elementSize());
int lenN = 1;
for (int i = 0; i < len; ++i)
{
//printf ("%.4f ",Tdata[i]);
printf ("%.3f ",Tdata[i]);
if (i%(diff) == 0 and i!=0){
printf("\n#[%d]#\n",lenN);
lenN++;
}
}
printf ("\n***** END ******\n");
}