You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

406 lines
18 KiB

#include <opencv2/imgproc.hpp>
#include "use_mnn.hpp"
#include <thread>
using namespace std;
using namespace GUD::ALG;
ForwardNet::ForwardNet(const gud_siamrpn_config_t conf)
{
#if (1 == D_CPU_BIGCORE)
cpu_set_t mask; //CPU核的集合
cpu_set_t get; //获取在集合中的CPU
//int num= sysconf(_SC_NPROCESSORS_CONF);
int j = 0;
CPU_ZERO(&mask); /* 初始化set集将set置为空*/
/*将本进程绑定到CPU0上*/
CPU_SET(3, &mask); // 3559 core 3
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
{
printf("Set CPU affinity failue, ERROR:%s\n", strerror(errno));
exit(-1);
}
else
{
printf("Set CPU affinity Sucess, Just run ForwardNet Class\n");
}
#endif
numThread_ = conf.thread_num;
prec_type_ = conf.precision_type;
deviceType_ = conf.device_type;
t1_ = steady_clock::now();
net_init(0, conf.mnnModel_0);
net_init(1, conf.mnnModel_1);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#MNN Model Load time is %f ms.\n",1000 * time_span_.count());
clsOut_dims_[2] = 1;
clsOut_dims_[3] = 1;
regOut_dims_[2] = 1;
regOut_dims_[3] = 1;
updataCKDone_ = 0;
updataRKDone_ = 0;
/** create inputs & output tensor for two networks **/
clsMapTensor_ = MNN::Tensor::create<float>(clsMap_dims_, NULL, MNN::Tensor::CAFFE);
clsKernelTensor_ = MNN::Tensor::create<float>(ck_dims_, NULL, MNN::Tensor::CAFFE);
regMapTensor_ = MNN::Tensor::create<float>(regMap_dims_, NULL, MNN::Tensor::CAFFE);
regKernelTensor_ = MNN::Tensor::create<float>(rk_dims_, NULL, MNN::Tensor::CAFFE);
clsOutTensor_ = MNN::Tensor::create<float>(clsOut_dims_, NULL, MNN::Tensor::CAFFE);
regOutTensor_ = MNN::Tensor::create<float>(regOut_dims_, NULL, MNN::Tensor::CAFFE);
#if (DEBUG_TMP == 1)
printf ("Create Tensor clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
printf ("Create Tensor RegOut_dims_[%d %d %d %d]\n",regOut_dims_[0],regOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
#endif
}
ForwardNet::~ForwardNet() {
net_deinit(0);
net_deinit(1);
}
int ForwardNet::net_init(int model_idx,const char* model_path)
{
//numThread_ = 4;
printf ("Start Loading Net:[%d] (%s)#\n",model_idx,model_path);
if (model_idx == 0)
{
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
if (deviceType_ == 0){
config.type = MNN_FORWARD_CPU;
}
else if (deviceType_ == 1){
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
}
config.numThread = numThread_;
net_cls_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
if (prec_type_ == 0) {
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
}
else if (prec_type_ == 1) {
bnconfig.precision = MNN::BackendConfig::Precision_High;
} else if (prec_type_ == 2) {
bnconfig.precision = MNN::BackendConfig::Precision_Low;
}
else if (prec_type_ == 3) {
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
}
config.backendConfig = &bnconfig;
MNNconfig_cls_ = config;
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
ck_dims_ = inputs[clsKernelName]->shape();
clsMap_dims_ = inputs[clsMapName]->shape();
auto Routput0 = net->getSessionOutput(session, clsOutputName.data()); // signal output
clsOut_dims_ = Routput0->shape();
session_cls_ = session;
/** set inputs cls tensor **/
inputClsTensor = net_cls_->getSessionInputAll(session_cls_);
#if (DEBUG_TMP == 1)
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("RefSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision,bnconfig.power);
#endif
printf ("#MNN NetInit Load clsModel:{%s} Sucess; Thread_num:[%d] Device:[%d] #\n",model_path, numThread_,config.type);
}
else if (model_idx == 1)
{
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
if (deviceType_ == 0){
config.type = MNN_FORWARD_CPU;
}
else if (deviceType_ == 1){
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
}
config.numThread = numThread_;
net_reg_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
if (prec_type_ == 0) {
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
}
else if (prec_type_ == 1) {
bnconfig.precision = MNN::BackendConfig::Precision_High;
} else if (prec_type_ == 2) {
bnconfig.precision = MNN::BackendConfig::Precision_Low;
}
else if (prec_type_ == 3) {
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
}
config.backendConfig = &bnconfig;
MNNconfig_reg_ = config;
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
rk_dims_ = inputs[regKernelNane]->shape();
regMap_dims_ = inputs[regMapName]->shape();
auto Routput1 = net->getSessionOutput(session, regOutputName.data()); // signal output
regOut_dims_ = Routput1->shape();
session_reg_ = session;
/** set inputs reg tensor **/
inputRegTensor = net_reg_->getSessionInputAll(session_reg_);
#if (DEBUG_TMP == 1)
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
#endif
printf ("#MNN NetInit Load regModel:{%s} Sucess; Thread_num:[%d] Device:[%d]#\n",model_path, numThread_, config.type);
}
#if (USE_DOUBLE_HEADER == 1)
else if (model_idx == 2)
{
/** net input & output names **/
std::string clsKernelInput = "clskernel";
std::string clsMapInput = "x";
std::string clsOutput = "57";
std::string regKernelInput = "regkernel";
std::string regMapInput = "y";
std::string regOutput = "62";
/** create network session **/
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
MNN::ScheduleConfig config;
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU MNN_FORWARD_OPENCL
config.numThread = numThread_;
if (config.type == MNN_FORWARD_OPENCL)
config.mode = MNN_GPU_TUNING_FAST;
net_double_ = net;
MNN::BackendConfig bnconfig;
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
//bnconfig.power = MNN::BackendConfig::Power_Low;
bnconfig.precision = MNN::BackendConfig::Precision_Low; // Precision_Normal = 0, Precision_High, Precision_Low Precision_Low_BF16
config.backendConfig = &bnconfig;
MNNconfig_double_ = config;
//net->setSessionMode(MNN::Interpreter::Session_Input_User);
auto session = net->createSession(config);
/** get inputs & outputs shape **/
auto inputs = net->getSessionInputAll(session); // multi inputs
ck_dims_ = inputs[clsKernelInput]->shape();
clsMap_dims_ = inputs[clsMapInput]->shape();
rk_dims_ = inputs[regKernelInput]->shape();
regMap_dims_ = inputs[regMapInput]->shape();
auto Coutput1 = net->getSessionOutput(session, regOutput.data()); // signal output
clsOut_dims_ = Coutput1->shape();
auto Routput1 = net->getSessionOutput(session, regOutput.data()); // signal output
regOut_dims_ = Routput1->shape();
session_double_ = session;
/** set inputs reg tensor **/
input4Tensors = net_double_->getSessionInputAll(session_double_);
input4Tensors[clsKernelInput] = clsKernelTensor_;
input4Tensors[clsMapInput] = clsMapTensor_;
input4Tensors[regKernelInput] = regKernelTensor_;
input4Tensors[regMapInput] = regMapTensor_;
// net_double_->resizeSession(session_double_);
#if (DEBUG_TMP == 1)
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
float memoryUsage = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
float flops = 0.0f;
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
int backendType[2];
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
#endif
printf ("#MNN NetInit Load DoubleHeaderModel:{%s} Sucess!; Thread_num:[%d] Device:[%d] #\n",
model_path, numThread_, config.type);
}
#endif
initialized_ = true;
printf("*** Loading Model sucess!!! ***\n");
return 0;
}
int ForwardNet::net_deinit(int model_idx)
{
if (model_idx == 0)
net_cls_->releaseSession(session_cls_);
if (model_idx == 1)
net_reg_->releaseSession(session_reg_);
return 0;
}
int ForwardNet::clsMnnForward(const float *cmap_data, float* coutput_data, bool dataReady)
{
t1_ = steady_clock::now();
if (!dataReady){
auto clsM_data = clsMapTensor_->host<float>();
auto clsM_size = static_cast<int>(clsMapTensor_->elementSize());
::memcpy(clsM_data, cmap_data, sizeof(float)*clsM_size);
}
auto clsKernelInput = inputClsTensor[clsKernelName];
auto clsMapInput = inputClsTensor[clsMapName];
clsKernelInput->copyFromHostTensor(clsKernelTensor_);
clsMapInput->copyFromHostTensor(clsMapTensor_);
/**run network **/
net_cls_->runSession(session_cls_);
/**get output data **/
auto outTensor = net_cls_->getSessionOutput(session_cls_, clsOutputName.data());
auto outshape = outTensor->shape();
int outsize = outTensor->elementSize();
int outClsSize = outshape[0]*outshape[1];
outTensor->copyToHostTensor(clsOutTensor_);
//printf ("######### showTensor(clsOutTensor_) ##############\n");
//showTensor(clsOutTensor_,outClsSize,5*19*19);
::memcpy(coutput_data, clsOutTensor_->host<float>(), sizeof(float)*outClsSize);
t2_ = steady_clock::now();
#if (DEBUG_TMP == 1)
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#Run cls getSessionOutput time is %f ms.\n",1000 * time_span_.count());
#endif
return 0;
}
int ForwardNet::regMnnForward(const float *rmap_data, float* routput_data, bool dataReady)
{
t1_ = steady_clock::now();
if (!dataReady) {
auto regM_data = regMapTensor_->host<float>();
auto regM_size = static_cast<int>(regMapTensor_->elementSize());
::memcpy(regM_data, rmap_data, sizeof(float)*regM_size);
}
auto regKernelInput = inputRegTensor[regKernelNane];
auto regMapInput = inputRegTensor[regMapName];
regKernelInput->copyFromHostTensor(regKernelTensor_);
regMapInput->copyFromHostTensor(regMapTensor_);
/**run network **/
net_reg_->runSession(session_reg_);
/**get output data **/
auto outTensor = net_reg_->getSessionOutput(session_reg_, regOutputName.data());
auto outshape = outTensor->shape();
int outsize = outTensor->elementSize();
int outRegSize = outshape[0]*outshape[1];
outTensor->copyToHostTensor(regOutTensor_);
//printf ("######### showTensor(regOutTensor_) ##############\n");
//showTensor(regOutTensor_,outRegSize,5*19*19);
::memcpy(routput_data, regOutTensor_->host<float>(), sizeof(float)*outRegSize);
t2_ = steady_clock::now();
#if (DEBUG_TMP == 1)
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
printf("#Run reg getSessionOutput time is %f ms.\n",1000 * time_span_.count());
#endif
return 0;
}
int ForwardNet::updateTwoKernels(const float *ckernel_data, const float *rkernel_data)
{
auto clsK_data = clsKernelTensor_->host<float>();
auto clsK_size = static_cast<int>(clsKernelTensor_->elementSize());
auto regK_data = regKernelTensor_->host<float>();
auto regK_size = static_cast<int>(regKernelTensor_->elementSize());
auto cshape = clsKernelTensor_->shape();
auto rshape = regKernelTensor_->shape();
clsK_size = cshape[0] * cshape[1] * cshape[2] * cshape[3];
regK_size = rshape[0] * rshape[1] * rshape[2] * rshape[3];
t1_ = steady_clock::now();
::memcpy(clsK_data, ckernel_data, sizeof(float)*clsK_size);
::memcpy(regK_data, rkernel_data, sizeof(float)*regK_size);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
updataRKDone_ = 1;
updataCKDone_ = 1;
// printf("#Run updateTwoKernels time is %f ms.\n",1000 * time_span_.count());
// printf ("######### showTensor(clsKernelTensor_) ##############\n");
// showTensor(clsKernelTensor_,10*256*4*4,256*4*4);
// printf ("######### showTensor(regKernelTensor_) ##############\n");
// showTensor(regKernelTensor_,20*256*4*4,256*4*4);
return 0;
}
int ForwardNet::updateTwoMaps(const float *cm_data, const float *rm_data)
{
auto cMapData = clsMapTensor_->host<float>();
auto cm_size = static_cast<int>(clsMapTensor_->elementSize());
auto rMapData = regMapTensor_->host<float>();
auto rm_size = static_cast<int>(regMapTensor_->elementSize());
auto cmshape = clsMapTensor_->shape();
auto rmshape = regMapTensor_->shape();
int clsM_size = cmshape[0] * cmshape[1] * cmshape[2] * cmshape[3];
int regM_size = rmshape[0] * rmshape[1] * rmshape[2] * rmshape[3];
t1_ = steady_clock::now();
::memcpy(cMapData, cm_data, sizeof(float)*clsM_size);
::memcpy(rMapData, rm_data, sizeof(float)*regM_size);
t2_ = steady_clock::now();
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
//printf("#Run updateTwoMaps time is %f ms.\n",1000 * time_span_.count());
updataRMDone_ = 1;
updataCMDone_ = 1;
return 0;
}
void ForwardNet::showTensor(MNN::Tensor *TensorIn, int len, int diff)
{
auto Tshape = TensorIn->shape();
auto Tdata = TensorIn->host<float>();
auto Tsize = static_cast<int>(TensorIn->size());
printf ("\n***** Tensor shape:[%d %d %d %d], size:%d ******\n",Tshape[0],Tshape[1],Tshape[2],Tshape[3],Tsize);
printf ("DimensionType: %d dimensions:%d elementSize:%d\n",TensorIn->getDimensionType(), TensorIn->dimensions(), TensorIn->elementSize());
int lenN = 1;
for (int i = 0; i < len; ++i)
{
//printf ("%.4f ",Tdata[i]);
printf ("%.3f ",Tdata[i]);
if (i%(diff) == 0 and i!=0){
printf("\n#[%d]#\n",lenN);
lenN++;
}
}
printf ("\n***** END ******\n");
}