|
@@ -281,9 +281,10 @@ namespace graft_cv {
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
YoloDrop::YoloDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
|
|
|
- :m_model_loaded(false)
|
|
|
+ :m_model_loaded(false),
|
|
|
+ m_pInfer(0),
|
|
|
+ m_runWithCuda(false)
|
|
|
{
|
|
|
- m_infer = Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
|
|
|
BATCH_SIZE = 1;
|
|
|
INPUT_CHANNEL = 3;
|
|
|
IMAGE_WIDTH = 640; // default 640
|
|
@@ -307,7 +308,7 @@ namespace graft_cv {
|
|
|
m_feature_steps = { 8,16,32 };
|
|
|
m_pLogger = pLogger;
|
|
|
|
|
|
- for (const int step : m_feature_steps) {
|
|
|
+ /*for (const int step : m_feature_steps) {
|
|
|
assert(step != 0);
|
|
|
int feature_map = IMAGE_HEIGHT / step;
|
|
|
m_feature_maps.push_back(feature_map);
|
|
@@ -316,7 +317,7 @@ namespace graft_cv {
|
|
|
}
|
|
|
m_anchor_sizes = { { 16,32 } ,{ 64,128 },{ 256, 512 } };
|
|
|
m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
|
|
|
- generate_anchors();
|
|
|
+ generate_anchors();*/
|
|
|
if (m_pLogger) {
|
|
|
m_pLogger->INFO(string("YoloDrop object initialized"));
|
|
|
}
|
|
@@ -331,15 +332,33 @@ namespace graft_cv {
|
|
|
{
|
|
|
this->m_obj_threshold = object_threshold;
|
|
|
this->m_nms_threshold = nms_threshold;
|
|
|
+ if (m_pInfer) {
|
|
|
+ m_pInfer->setModelNMSThreshold(m_nms_threshold);
|
|
|
+ m_pInfer->setModelScoreThreshold(m_obj_threshold);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
bool YoloDrop::LoadModel(std::string onnx_path) {
|
|
|
+ if (m_pInfer) {
|
|
|
+ delete m_pInfer;
|
|
|
+ m_pInfer = 0;
|
|
|
+ m_model_loaded = false;
|
|
|
+
|
|
|
+ }
|
|
|
+ cv::Size2f modelInputShape((float)IMAGE_WIDTH, (float)IMAGE_HEIGHT);
|
|
|
+
|
|
|
if (m_pLogger) {
|
|
|
m_pLogger->INFO(string("Loading detection model: ") + onnx_path);
|
|
|
}
|
|
|
else { std::cout << "Loading detection model: " << onnx_path << std::endl; }
|
|
|
try {
|
|
|
- m_model = cv::dnn::readNetFromONNX(onnx_path);
|
|
|
+ m_pInfer = new Inference(onnx_path, modelInputShape, "", m_runWithCuda);
|
|
|
+ if (!m_pInfer) {
|
|
|
+ throw(string("inference init error"));
|
|
|
+ }
|
|
|
+ m_pInfer->setModelNMSThreshold(m_nms_threshold);
|
|
|
+ m_pInfer->setModelScoreThreshold(m_obj_threshold);
|
|
|
+
|
|
|
if (m_pLogger) { m_pLogger->INFO(string("Detection model loaded")); }
|
|
|
m_model_loaded = true;
|
|
|
return m_model_loaded;
|
|
@@ -351,10 +370,10 @@ namespace graft_cv {
|
|
|
return false;
|
|
|
}
|
|
|
|
|
|
- std::vector<Bbox> YoloDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger)
|
|
|
+ std::vector<Bbox> YoloDrop::RunModel(cv::Mat& frame, CGcvLogger* pInstanceLogger)
|
|
|
{
|
|
|
std::vector<Bbox> result;
|
|
|
- if (img.empty()) {
|
|
|
+ if (frame.empty()) {
|
|
|
if (pInstanceLogger) {
|
|
|
pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
|
|
|
}
|
|
@@ -362,51 +381,76 @@ namespace graft_cv {
|
|
|
}
|
|
|
if (!m_model_loaded) {
|
|
|
pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
|
|
|
+ throw(string("model is NOT loaded"));
|
|
|
}
|
|
|
- cv::Mat blob = cv::dnn::blobFromImage(
|
|
|
- img,
|
|
|
- 1.0,
|
|
|
- m_size_detection,
|
|
|
- m_img_mean);
|
|
|
- m_model.setInput(blob);
|
|
|
|
|
|
- std::vector<std::string> outNames = m_model.getUnconnectedOutLayersNames();
|
|
|
- vector<Mat>outputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2)
|
|
|
- if (pInstanceLogger) {
|
|
|
- pInstanceLogger->INFO(string("RunModel(), before forward()"));
|
|
|
- }
|
|
|
- m_model.forward(outputs, outNames);
|
|
|
- std::vector<YoloDrop::DropRes> rects;
|
|
|
- int n = post_process(img, outputs, rects);
|
|
|
- for (const auto& rect : rects) {
|
|
|
- Bbox box;
|
|
|
- box.score = rect.confidence;
|
|
|
- box.x1 = (int)rect.drop_box.x1;
|
|
|
- box.y1 = (int)rect.drop_box.y1;
|
|
|
- box.x2 = (int)rect.drop_box.x2;
|
|
|
- box.y2 = (int)rect.drop_box.y2;
|
|
|
- box.ppoint[0] = rect.keypoints[0].x;
|
|
|
- box.ppoint[1] = rect.keypoints[0].y;
|
|
|
- box.ppoint[2] = rect.keypoints[1].x;
|
|
|
- box.ppoint[3] = rect.keypoints[1].y;
|
|
|
- box.ppoint[4] = rect.keypoints[2].x;
|
|
|
- box.ppoint[5] = rect.keypoints[2].y;
|
|
|
- box.ppoint[6] = rect.keypoints[3].x;
|
|
|
- box.ppoint[7] = rect.keypoints[3].y;
|
|
|
- box.ppoint[8] = rect.keypoints[4].x;
|
|
|
- box.ppoint[9] = rect.keypoints[4].y;
|
|
|
+ // Inference starts here...
|
|
|
+ std::vector<Detection> output = m_pInfer->runInference(frame);
|
|
|
|
|
|
- box.operate_point[0] = 0.0;
|
|
|
- box.operate_point[1] = 0.0;
|
|
|
- box.operate_angle = 0.0;
|
|
|
+ int detections = output.size();
|
|
|
+ std::cout << "Number of detections:" << detections << std::endl;
|
|
|
|
|
|
- box.area = 0.0;
|
|
|
- box.status = 0;
|
|
|
- result.push_back(box);
|
|
|
+ for (int i = 0; i < detections; ++i)
|
|
|
+ {
|
|
|
+ Detection detection = output[i];
|
|
|
+
|
|
|
+ cv::Rect box = detection.box;
|
|
|
+ cv::Scalar color = detection.color;
|
|
|
+ std::vector<cv::Point> pts = detection.kpts;
|
|
|
+
|
|
|
+ Bbox box_out;
|
|
|
+ box_out.score = detection.confidence;
|
|
|
+ box_out.x1 = box.x;
|
|
|
+ box_out.y1 = box.y;
|
|
|
+ box_out.x2 = box.x + box.width;
|
|
|
+ box_out.y2 = box.y + box.height;
|
|
|
+ box_out.ppoint[0] = pts[0].x;
|
|
|
+ box_out.ppoint[1] = pts[0].y;
|
|
|
+ box_out.ppoint[2] = pts[1].x;
|
|
|
+ box_out.ppoint[3] = pts[1].y;
|
|
|
+ box_out.ppoint[4] = pts[2].x;
|
|
|
+ box_out.ppoint[5] = pts[2].y;
|
|
|
+ box_out.ppoint[6] = pts[3].x;
|
|
|
+ box_out.ppoint[7] = pts[3].y;
|
|
|
+ box_out.ppoint[8] = pts[4].x;
|
|
|
+ box_out.ppoint[9] = pts[4].y;
|
|
|
+
|
|
|
+ box_out.operate_point[0] = 0.0;
|
|
|
+ box_out.operate_point[1] = 0.0;
|
|
|
+ box_out.operate_angle = 0.0;
|
|
|
+
|
|
|
+ box_out.area = 0.0;
|
|
|
+ box_out.status = 0;
|
|
|
+ result.push_back(box_out);
|
|
|
+
|
|
|
+
|
|
|
+ //// Detection box
|
|
|
+ //cv::rectangle(frame, box, color, 2);
|
|
|
+
|
|
|
+ //// Detection box text
|
|
|
+ //std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
|
|
|
+ //cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
|
|
|
+ //cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
|
|
|
+
|
|
|
+ //cv::rectangle(frame, textBox, color, cv::FILLED);
|
|
|
+ //cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
|
|
|
+
|
|
|
+ //for (auto& pt : pts) {
|
|
|
+ // cv::circle(frame, pt, 3, cv::Scalar(0, 0, 255));
|
|
|
+ //}
|
|
|
}
|
|
|
+ // Inference ends here...
|
|
|
+
|
|
|
+ // This is only for preview purposes
|
|
|
+ /*float scale = 0.8;
|
|
|
+ cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));
|
|
|
+ cv::imshow("Inference", frame);
|
|
|
+
|
|
|
+ cv::waitKey(-1);*/
|
|
|
+
|
|
|
if (pInstanceLogger) {
|
|
|
stringstream buff;
|
|
|
- buff << "detected object: " << n;
|
|
|
+ buff << "detected object: " << detections;
|
|
|
pInstanceLogger->INFO(buff.str());
|
|
|
}
|
|
|
return result;
|