|
|
|
|
|
#include <opencv2/imgproc.hpp>
|
|
|
|
|
|
#include "use_mnn.hpp"
|
|
|
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
using namespace GUD::ALG;
|
|
|
|
|
|
|
|
|
|
|
|
ForwardNet::ForwardNet(const gud_siamrpn_config_t conf)
|
|
|
|
|
|
{
|
|
|
|
|
|
#if (1 == D_CPU_BIGCORE)
|
|
|
|
|
|
cpu_set_t mask; //CPU核的集合
|
|
|
|
|
|
cpu_set_t get; //获取在集合中的CPU
|
|
|
|
|
|
|
|
|
|
|
|
//int num= sysconf(_SC_NPROCESSORS_CONF);
|
|
|
|
|
|
int j = 0;
|
|
|
|
|
|
CPU_ZERO(&mask); /* 初始化set集,将set置为空*/
|
|
|
|
|
|
/*将本进程绑定到CPU0上*/
|
|
|
|
|
|
CPU_SET(3, &mask); // 3559 core 3
|
|
|
|
|
|
|
|
|
|
|
|
if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
|
|
|
|
|
|
{
|
|
|
|
|
|
printf("Set CPU affinity failue, ERROR:%s\n", strerror(errno));
|
|
|
|
|
|
exit(-1);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
printf("Set CPU affinity Sucess, Just run ForwardNet Class\n");
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
numThread_ = conf.thread_num;
|
|
|
|
|
|
prec_type_ = conf.precision_type;
|
|
|
|
|
|
deviceType_ = conf.device_type;
|
|
|
|
|
|
|
|
|
|
|
|
t1_ = steady_clock::now();
|
|
|
|
|
|
net_init(0, conf.mnnModel_0);
|
|
|
|
|
|
net_init(1, conf.mnnModel_1);
|
|
|
|
|
|
|
|
|
|
|
|
t2_ = steady_clock::now();
|
|
|
|
|
|
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
|
|
|
|
|
|
printf("#MNN Model Load time is %f ms.\n",1000 * time_span_.count());
|
|
|
|
|
|
clsOut_dims_[2] = 1;
|
|
|
|
|
|
clsOut_dims_[3] = 1;
|
|
|
|
|
|
regOut_dims_[2] = 1;
|
|
|
|
|
|
regOut_dims_[3] = 1;
|
|
|
|
|
|
updataCKDone_ = 0;
|
|
|
|
|
|
updataRKDone_ = 0;
|
|
|
|
|
|
/** create inputs & output tensor for two networks **/
|
|
|
|
|
|
clsMapTensor_ = MNN::Tensor::create<float>(clsMap_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
clsKernelTensor_ = MNN::Tensor::create<float>(ck_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
regMapTensor_ = MNN::Tensor::create<float>(regMap_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
regKernelTensor_ = MNN::Tensor::create<float>(rk_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
clsOutTensor_ = MNN::Tensor::create<float>(clsOut_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
regOutTensor_ = MNN::Tensor::create<float>(regOut_dims_, NULL, MNN::Tensor::CAFFE);
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
printf ("Create Tensor clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
|
|
|
|
|
|
printf ("Create Tensor RegOut_dims_[%d %d %d %d]\n",regOut_dims_[0],regOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
ForwardNet::~ForwardNet() {
|
|
|
|
|
|
net_deinit(0);
|
|
|
|
|
|
net_deinit(1);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::net_init(int model_idx,const char* model_path)
|
|
|
|
|
|
{
|
|
|
|
|
|
//numThread_ = 4;
|
|
|
|
|
|
printf ("Start Loading Net:[%d] (%s)#\n",model_idx,model_path);
|
|
|
|
|
|
if (model_idx == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
/** create network session **/
|
|
|
|
|
|
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
|
|
|
|
|
|
MNN::ScheduleConfig config;
|
|
|
|
|
|
if (deviceType_ == 0){
|
|
|
|
|
|
config.type = MNN_FORWARD_CPU;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (deviceType_ == 1){
|
|
|
|
|
|
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
config.numThread = numThread_;
|
|
|
|
|
|
net_cls_ = net;
|
|
|
|
|
|
MNN::BackendConfig bnconfig;
|
|
|
|
|
|
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
|
|
|
|
|
|
//bnconfig.power = MNN::BackendConfig::Power_Low;
|
|
|
|
|
|
if (prec_type_ == 0) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (prec_type_ == 1) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_High;
|
|
|
|
|
|
} else if (prec_type_ == 2) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Low;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (prec_type_ == 3) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
|
|
|
|
|
|
}
|
|
|
|
|
|
config.backendConfig = &bnconfig;
|
|
|
|
|
|
MNNconfig_cls_ = config;
|
|
|
|
|
|
auto session = net->createSession(config);
|
|
|
|
|
|
|
|
|
|
|
|
/** get inputs & outputs shape **/
|
|
|
|
|
|
auto inputs = net->getSessionInputAll(session); // multi inputs
|
|
|
|
|
|
ck_dims_ = inputs[clsKernelName]->shape();
|
|
|
|
|
|
clsMap_dims_ = inputs[clsMapName]->shape();
|
|
|
|
|
|
auto Routput0 = net->getSessionOutput(session, clsOutputName.data()); // signal output
|
|
|
|
|
|
clsOut_dims_ = Routput0->shape();
|
|
|
|
|
|
session_cls_ = session;
|
|
|
|
|
|
|
|
|
|
|
|
/** set inputs cls tensor **/
|
|
|
|
|
|
inputClsTensor = net_cls_->getSessionInputAll(session_cls_);
|
|
|
|
|
|
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
|
|
|
|
|
|
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
|
|
|
|
|
|
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
|
|
|
|
|
|
|
|
|
|
|
|
float memoryUsage = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
|
|
|
|
|
|
float flops = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
|
|
|
|
|
|
int backendType[2];
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
|
|
|
|
|
|
MNN_PRINT("RefSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
|
|
|
|
|
|
memoryUsage, flops, backendType[0], bnconfig.precision,bnconfig.power);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
printf ("#MNN NetInit Load clsModel:{%s} Sucess; Thread_num:[%d] Device:[%d] #\n",model_path, numThread_,config.type);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
else if (model_idx == 1)
|
|
|
|
|
|
{
|
|
|
|
|
|
/** create network session **/
|
|
|
|
|
|
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
|
|
|
|
|
|
MNN::ScheduleConfig config;
|
|
|
|
|
|
if (deviceType_ == 0){
|
|
|
|
|
|
config.type = MNN_FORWARD_CPU;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (deviceType_ == 1){
|
|
|
|
|
|
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU==0; MNN_FORWARD_OPENCL==3
|
|
|
|
|
|
}
|
|
|
|
|
|
config.numThread = numThread_;
|
|
|
|
|
|
net_reg_ = net;
|
|
|
|
|
|
MNN::BackendConfig bnconfig;
|
|
|
|
|
|
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
|
|
|
|
|
|
//bnconfig.power = MNN::BackendConfig::Power_Low;
|
|
|
|
|
|
if (prec_type_ == 0) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Normal;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (prec_type_ == 1) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_High;
|
|
|
|
|
|
} else if (prec_type_ == 2) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Low;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (prec_type_ == 3) {
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Low_BF16;
|
|
|
|
|
|
}
|
|
|
|
|
|
config.backendConfig = &bnconfig;
|
|
|
|
|
|
MNNconfig_reg_ = config;
|
|
|
|
|
|
auto session = net->createSession(config);
|
|
|
|
|
|
|
|
|
|
|
|
/** get inputs & outputs shape **/
|
|
|
|
|
|
auto inputs = net->getSessionInputAll(session); // multi inputs
|
|
|
|
|
|
rk_dims_ = inputs[regKernelNane]->shape();
|
|
|
|
|
|
regMap_dims_ = inputs[regMapName]->shape();
|
|
|
|
|
|
auto Routput1 = net->getSessionOutput(session, regOutputName.data()); // signal output
|
|
|
|
|
|
regOut_dims_ = Routput1->shape();
|
|
|
|
|
|
session_reg_ = session;
|
|
|
|
|
|
|
|
|
|
|
|
/** set inputs reg tensor **/
|
|
|
|
|
|
inputRegTensor = net_reg_->getSessionInputAll(session_reg_);
|
|
|
|
|
|
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
|
|
|
|
|
|
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
|
|
|
|
|
|
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
|
|
|
|
|
|
float memoryUsage = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
|
|
|
|
|
|
float flops = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
|
|
|
|
|
|
int backendType[2];
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
|
|
|
|
|
|
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
|
|
|
|
|
|
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
printf ("#MNN NetInit Load regModel:{%s} Sucess; Thread_num:[%d] Device:[%d]#\n",model_path, numThread_, config.type);
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
#if (USE_DOUBLE_HEADER == 1)
|
|
|
|
|
|
else if (model_idx == 2)
|
|
|
|
|
|
{
|
|
|
|
|
|
/** net input & output names **/
|
|
|
|
|
|
std::string clsKernelInput = "clskernel";
|
|
|
|
|
|
std::string clsMapInput = "x";
|
|
|
|
|
|
std::string clsOutput = "57";
|
|
|
|
|
|
|
|
|
|
|
|
std::string regKernelInput = "regkernel";
|
|
|
|
|
|
std::string regMapInput = "y";
|
|
|
|
|
|
std::string regOutput = "62";
|
|
|
|
|
|
/** create network session **/
|
|
|
|
|
|
auto net = std::shared_ptr<MNN::Interpreter>(MNN::Interpreter::createFromFile(model_path));
|
|
|
|
|
|
MNN::ScheduleConfig config;
|
|
|
|
|
|
config.type = MNN_FORWARD_OPENCL; // MNN_FORWARD_CPU MNN_FORWARD_OPENCL
|
|
|
|
|
|
config.numThread = numThread_;
|
|
|
|
|
|
if (config.type == MNN_FORWARD_OPENCL)
|
|
|
|
|
|
config.mode = MNN_GPU_TUNING_FAST;
|
|
|
|
|
|
net_double_ = net;
|
|
|
|
|
|
MNN::BackendConfig bnconfig;
|
|
|
|
|
|
//bnconfig.memory = MNN::BackendConfig::Memory_Low;
|
|
|
|
|
|
//bnconfig.power = MNN::BackendConfig::Power_Low;
|
|
|
|
|
|
bnconfig.precision = MNN::BackendConfig::Precision_Low; // Precision_Normal = 0, Precision_High, Precision_Low Precision_Low_BF16
|
|
|
|
|
|
config.backendConfig = &bnconfig;
|
|
|
|
|
|
MNNconfig_double_ = config;
|
|
|
|
|
|
//net->setSessionMode(MNN::Interpreter::Session_Input_User);
|
|
|
|
|
|
auto session = net->createSession(config);
|
|
|
|
|
|
/** get inputs & outputs shape **/
|
|
|
|
|
|
auto inputs = net->getSessionInputAll(session); // multi inputs
|
|
|
|
|
|
ck_dims_ = inputs[clsKernelInput]->shape();
|
|
|
|
|
|
clsMap_dims_ = inputs[clsMapInput]->shape();
|
|
|
|
|
|
rk_dims_ = inputs[regKernelInput]->shape();
|
|
|
|
|
|
regMap_dims_ = inputs[regMapInput]->shape();
|
|
|
|
|
|
auto Coutput1 = net->getSessionOutput(session, regOutput.data()); // signal output
|
|
|
|
|
|
clsOut_dims_ = Coutput1->shape();
|
|
|
|
|
|
auto Routput1 = net->getSessionOutput(session, regOutput.data()); // signal output
|
|
|
|
|
|
regOut_dims_ = Routput1->shape();
|
|
|
|
|
|
session_double_ = session;
|
|
|
|
|
|
|
|
|
|
|
|
/** set inputs reg tensor **/
|
|
|
|
|
|
input4Tensors = net_double_->getSessionInputAll(session_double_);
|
|
|
|
|
|
input4Tensors[clsKernelInput] = clsKernelTensor_;
|
|
|
|
|
|
input4Tensors[clsMapInput] = clsMapTensor_;
|
|
|
|
|
|
input4Tensors[regKernelInput] = regKernelTensor_;
|
|
|
|
|
|
input4Tensors[regMapInput] = regMapTensor_;
|
|
|
|
|
|
// net_double_->resizeSession(session_double_);
|
|
|
|
|
|
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
printf ("ck_dims_[%d %d %d %d]\n",ck_dims_[0],ck_dims_[1], ck_dims_[2], ck_dims_[3]);
|
|
|
|
|
|
printf ("clsMap_dims_[%d %d %d %d]\n",clsMap_dims_[0],clsMap_dims_[1], clsMap_dims_[2], clsMap_dims_[3]);
|
|
|
|
|
|
printf ("clsOut_dims_[%d %d %d %d]\n",clsOut_dims_[0],clsOut_dims_[1], clsOut_dims_[2], clsOut_dims_[3]);
|
|
|
|
|
|
printf ("rk_dims_[%d %d %d %d]\n",rk_dims_[0],rk_dims_[1], rk_dims_[2], rk_dims_[3]);
|
|
|
|
|
|
printf ("regMap_dims_[%d %d %d %d]\n",regMap_dims_[0],regMap_dims_[1], regMap_dims_[2], regMap_dims_[3]);
|
|
|
|
|
|
printf ("regOut_dims_[%d %d %d %d]\n",regOut_dims_[0],clsOut_dims_[1], regOut_dims_[2], regOut_dims_[3]);
|
|
|
|
|
|
float memoryUsage = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::MEMORY, &memoryUsage);
|
|
|
|
|
|
float flops = 0.0f;
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::FLOPS, &flops);
|
|
|
|
|
|
int backendType[2];
|
|
|
|
|
|
net->getSessionInfo(session, MNN::Interpreter::BACKENDS, backendType);
|
|
|
|
|
|
MNN_PRINT("SearSession Info: memory use [%f] MB, flops is [%f] M, backendType is [%d], PrecisionMode:[%d] PowerMode:[%d]\n",
|
|
|
|
|
|
memoryUsage, flops, backendType[0], bnconfig.precision, bnconfig.power);
|
|
|
|
|
|
#endif
|
|
|
|
|
|
printf ("#MNN NetInit Load DoubleHeaderModel:{%s} Sucess!; Thread_num:[%d] Device:[%d] #\n",
|
|
|
|
|
|
model_path, numThread_, config.type);
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
initialized_ = true;
|
|
|
|
|
|
printf("*** Loading Model sucess!!! ***\n");
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::net_deinit(int model_idx)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (model_idx == 0)
|
|
|
|
|
|
net_cls_->releaseSession(session_cls_);
|
|
|
|
|
|
if (model_idx == 1)
|
|
|
|
|
|
net_reg_->releaseSession(session_reg_);
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::clsMnnForward(const float *cmap_data, float* coutput_data, bool dataReady)
|
|
|
|
|
|
{
|
|
|
|
|
|
t1_ = steady_clock::now();
|
|
|
|
|
|
if (!dataReady){
|
|
|
|
|
|
auto clsM_data = clsMapTensor_->host<float>();
|
|
|
|
|
|
auto clsM_size = static_cast<int>(clsMapTensor_->elementSize());
|
|
|
|
|
|
::memcpy(clsM_data, cmap_data, sizeof(float)*clsM_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto clsKernelInput = inputClsTensor[clsKernelName];
|
|
|
|
|
|
auto clsMapInput = inputClsTensor[clsMapName];
|
|
|
|
|
|
clsKernelInput->copyFromHostTensor(clsKernelTensor_);
|
|
|
|
|
|
clsMapInput->copyFromHostTensor(clsMapTensor_);
|
|
|
|
|
|
/**run network **/
|
|
|
|
|
|
net_cls_->runSession(session_cls_);
|
|
|
|
|
|
/**get output data **/
|
|
|
|
|
|
auto outTensor = net_cls_->getSessionOutput(session_cls_, clsOutputName.data());
|
|
|
|
|
|
auto outshape = outTensor->shape();
|
|
|
|
|
|
int outsize = outTensor->elementSize();
|
|
|
|
|
|
int outClsSize = outshape[0]*outshape[1];
|
|
|
|
|
|
outTensor->copyToHostTensor(clsOutTensor_);
|
|
|
|
|
|
//printf ("######### showTensor(clsOutTensor_) ##############\n");
|
|
|
|
|
|
//showTensor(clsOutTensor_,outClsSize,5*19*19);
|
|
|
|
|
|
::memcpy(coutput_data, clsOutTensor_->host<float>(), sizeof(float)*outClsSize);
|
|
|
|
|
|
t2_ = steady_clock::now();
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
|
|
|
|
|
|
printf("#Run cls getSessionOutput time is %f ms.\n",1000 * time_span_.count());
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::regMnnForward(const float *rmap_data, float* routput_data, bool dataReady)
|
|
|
|
|
|
{
|
|
|
|
|
|
t1_ = steady_clock::now();
|
|
|
|
|
|
if (!dataReady) {
|
|
|
|
|
|
auto regM_data = regMapTensor_->host<float>();
|
|
|
|
|
|
auto regM_size = static_cast<int>(regMapTensor_->elementSize());
|
|
|
|
|
|
::memcpy(regM_data, rmap_data, sizeof(float)*regM_size);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
auto regKernelInput = inputRegTensor[regKernelNane];
|
|
|
|
|
|
auto regMapInput = inputRegTensor[regMapName];
|
|
|
|
|
|
regKernelInput->copyFromHostTensor(regKernelTensor_);
|
|
|
|
|
|
regMapInput->copyFromHostTensor(regMapTensor_);
|
|
|
|
|
|
/**run network **/
|
|
|
|
|
|
net_reg_->runSession(session_reg_);
|
|
|
|
|
|
/**get output data **/
|
|
|
|
|
|
auto outTensor = net_reg_->getSessionOutput(session_reg_, regOutputName.data());
|
|
|
|
|
|
auto outshape = outTensor->shape();
|
|
|
|
|
|
int outsize = outTensor->elementSize();
|
|
|
|
|
|
int outRegSize = outshape[0]*outshape[1];
|
|
|
|
|
|
outTensor->copyToHostTensor(regOutTensor_);
|
|
|
|
|
|
//printf ("######### showTensor(regOutTensor_) ##############\n");
|
|
|
|
|
|
//showTensor(regOutTensor_,outRegSize,5*19*19);
|
|
|
|
|
|
::memcpy(routput_data, regOutTensor_->host<float>(), sizeof(float)*outRegSize);
|
|
|
|
|
|
t2_ = steady_clock::now();
|
|
|
|
|
|
#if (DEBUG_TMP == 1)
|
|
|
|
|
|
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
|
|
|
|
|
|
printf("#Run reg getSessionOutput time is %f ms.\n",1000 * time_span_.count());
|
|
|
|
|
|
#endif
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::updateTwoKernels(const float *ckernel_data, const float *rkernel_data)
|
|
|
|
|
|
{
|
|
|
|
|
|
auto clsK_data = clsKernelTensor_->host<float>();
|
|
|
|
|
|
auto clsK_size = static_cast<int>(clsKernelTensor_->elementSize());
|
|
|
|
|
|
auto regK_data = regKernelTensor_->host<float>();
|
|
|
|
|
|
auto regK_size = static_cast<int>(regKernelTensor_->elementSize());
|
|
|
|
|
|
|
|
|
|
|
|
auto cshape = clsKernelTensor_->shape();
|
|
|
|
|
|
auto rshape = regKernelTensor_->shape();
|
|
|
|
|
|
|
|
|
|
|
|
clsK_size = cshape[0] * cshape[1] * cshape[2] * cshape[3];
|
|
|
|
|
|
regK_size = rshape[0] * rshape[1] * rshape[2] * rshape[3];
|
|
|
|
|
|
|
|
|
|
|
|
t1_ = steady_clock::now();
|
|
|
|
|
|
::memcpy(clsK_data, ckernel_data, sizeof(float)*clsK_size);
|
|
|
|
|
|
::memcpy(regK_data, rkernel_data, sizeof(float)*regK_size);
|
|
|
|
|
|
t2_ = steady_clock::now();
|
|
|
|
|
|
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
|
|
|
|
|
|
updataRKDone_ = 1;
|
|
|
|
|
|
updataCKDone_ = 1;
|
|
|
|
|
|
// printf("#Run updateTwoKernels time is %f ms.\n",1000 * time_span_.count());
|
|
|
|
|
|
// printf ("######### showTensor(clsKernelTensor_) ##############\n");
|
|
|
|
|
|
// showTensor(clsKernelTensor_,10*256*4*4,256*4*4);
|
|
|
|
|
|
// printf ("######### showTensor(regKernelTensor_) ##############\n");
|
|
|
|
|
|
// showTensor(regKernelTensor_,20*256*4*4,256*4*4);
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int ForwardNet::updateTwoMaps(const float *cm_data, const float *rm_data)
|
|
|
|
|
|
{
|
|
|
|
|
|
auto cMapData = clsMapTensor_->host<float>();
|
|
|
|
|
|
auto cm_size = static_cast<int>(clsMapTensor_->elementSize());
|
|
|
|
|
|
auto rMapData = regMapTensor_->host<float>();
|
|
|
|
|
|
auto rm_size = static_cast<int>(regMapTensor_->elementSize());
|
|
|
|
|
|
|
|
|
|
|
|
auto cmshape = clsMapTensor_->shape();
|
|
|
|
|
|
auto rmshape = regMapTensor_->shape();
|
|
|
|
|
|
|
|
|
|
|
|
int clsM_size = cmshape[0] * cmshape[1] * cmshape[2] * cmshape[3];
|
|
|
|
|
|
int regM_size = rmshape[0] * rmshape[1] * rmshape[2] * rmshape[3];
|
|
|
|
|
|
t1_ = steady_clock::now();
|
|
|
|
|
|
::memcpy(cMapData, cm_data, sizeof(float)*clsM_size);
|
|
|
|
|
|
::memcpy(rMapData, rm_data, sizeof(float)*regM_size);
|
|
|
|
|
|
t2_ = steady_clock::now();
|
|
|
|
|
|
time_span_ = duration_cast < duration < double >> (t2_ - t1_);
|
|
|
|
|
|
//printf("#Run updateTwoMaps time is %f ms.\n",1000 * time_span_.count());
|
|
|
|
|
|
updataRMDone_ = 1;
|
|
|
|
|
|
updataCMDone_ = 1;
|
|
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void ForwardNet::showTensor(MNN::Tensor *TensorIn, int len, int diff)
|
|
|
|
|
|
{
|
|
|
|
|
|
auto Tshape = TensorIn->shape();
|
|
|
|
|
|
auto Tdata = TensorIn->host<float>();
|
|
|
|
|
|
auto Tsize = static_cast<int>(TensorIn->size());
|
|
|
|
|
|
printf ("\n***** Tensor shape:[%d %d %d %d], size:%d ******\n",Tshape[0],Tshape[1],Tshape[2],Tshape[3],Tsize);
|
|
|
|
|
|
printf ("DimensionType: %d dimensions:%d elementSize:%d\n",TensorIn->getDimensionType(), TensorIn->dimensions(), TensorIn->elementSize());
|
|
|
|
|
|
int lenN = 1;
|
|
|
|
|
|
for (int i = 0; i < len; ++i)
|
|
|
|
|
|
{
|
|
|
|
|
|
//printf ("%.4f ",Tdata[i]);
|
|
|
|
|
|
printf ("%.3f ",Tdata[i]);
|
|
|
|
|
|
if (i%(diff) == 0 and i!=0){
|
|
|
|
|
|
printf("\n#[%d]#\n",lenN);
|
|
|
|
|
|
lenN++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
printf ("\n***** END ******\n");
|
|
|
|
|
|
}
|
|
|
|
|
|
|