#include "tea_detect.h" #include #include using namespace cv; using namespace std; namespace graft_cv { RetinaDrop::RetinaDrop(CGcvLogger* pLogger, float obj_th, float nms_th) :m_model_loaded(false) { BATCH_SIZE = 1; INPUT_CHANNEL = 3; IMAGE_WIDTH = 640; // default 640 IMAGE_HEIGHT = 640; // default 640 m_obj_threshold = obj_th;//default 0.6; m_nms_threshold = nms_th; //default0.4; m_anchor_num = 2; m_bbox_head = 4; m_variance[0] = 0.1f; m_variance[1] = 0.2f; //m_img_mean(123.0, 104.0, 117.0) m_img_mean[0] = 123.0; m_img_mean[1] = 104.0; m_img_mean[2] = 117.0; m_img_mean[3] = 0; //cv::Size size_detection(640, 640) m_size_detection.width = IMAGE_WIDTH; m_size_detection.height = IMAGE_HEIGHT; m_feature_steps = {8,16,32}; m_pLogger = pLogger; for (const int step : m_feature_steps) { assert(step != 0); int feature_map = IMAGE_HEIGHT / step; m_feature_maps.push_back(feature_map); int feature_size = feature_map * feature_map; m_feature_sizes.push_back(feature_size); } m_anchor_sizes = { { 16,32 } ,{ 64,128},{ 256, 512 }}; m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num; generate_anchors(); if (m_pLogger) { m_pLogger->INFO(string("RetinaDrop object initialized")); } } RetinaDrop::~RetinaDrop() = default; bool RetinaDrop::IsModelLoaded() { return m_model_loaded; }; void RetinaDrop::SetThreshold(float object_threshold, float nms_threshold) { this->m_obj_threshold = object_threshold; this->m_nms_threshold = nms_threshold; } bool RetinaDrop::LoadModel(std::string onnx_path) { if (m_pLogger) { m_pLogger->INFO(string("Loading detection model: ")+onnx_path); } else { std::cout << "Loading detection model: " << onnx_path<INFO(string("Detection model loaded"));} m_model_loaded = true; return m_model_loaded; } catch (...) { if (m_pLogger) { m_pLogger->ERRORINFO(string("loading model failed")); } } return false; } std::vector RetinaDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger) { std::vector result; if (img.empty()) { if (pInstanceLogger) { pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty")); } throw(string("image is empty")); } if (!m_model_loaded) { pInstanceLogger->ERRORINFO(string("model is NOT loaded")); } cv::Mat blob = cv::dnn::blobFromImage( img, 1.0, m_size_detection, m_img_mean); m_model.setInput(blob); std::vector outNames = m_model.getUnconnectedOutLayersNames(); vectoroutputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2) if (pInstanceLogger) { pInstanceLogger->INFO(string("RunModel(), before forward()")); } m_model.forward(outputs, outNames); std::vector rects; int n = post_process(img, outputs,rects); for (const auto& rect : rects) { Bbox box; box.score = rect.confidence; box.x1 = (int)rect.drop_box.x1; box.y1 = (int)rect.drop_box.y1; box.x2 = (int)rect.drop_box.x2; box.y2 = (int)rect.drop_box.y2; box.ppoint[0] = rect.keypoints[0].x; box.ppoint[1] = rect.keypoints[0].y; box.ppoint[2] = rect.keypoints[1].x; box.ppoint[3] = rect.keypoints[1].y; box.ppoint[4] = rect.keypoints[2].x; box.ppoint[5] = rect.keypoints[2].y; box.ppoint[6] = rect.keypoints[3].x; box.ppoint[7] = rect.keypoints[3].y; box.ppoint[8] = rect.keypoints[4].x; box.ppoint[9] = rect.keypoints[4].y; box.area = 0.0; result.push_back(box); } if (pInstanceLogger) { stringstream buff; buff << "detected object: " << n; pInstanceLogger->INFO(buff.str()); } return result; } void RetinaDrop::generate_anchors() { m_refer_matrix = cv::Mat(m_sum_of_feature, m_bbox_head, CV_32FC1); int line = 0; for (size_t feature_map = 0; feature_map < m_feature_maps.size(); feature_map++) { for (int height = 0; height < m_feature_maps[feature_map]; ++height) { for (int width = 0; width < m_feature_maps[feature_map]; ++width) { for (int anchor = 0; anchor < m_anchor_sizes[feature_map].size(); ++anchor) { auto* row = m_refer_matrix.ptr(line); row[0] = (float)(width+0.5) * m_feature_steps[feature_map]/(float)IMAGE_WIDTH; row[1] = (float)(height+0.5) * m_feature_steps[feature_map]/(float)IMAGE_HEIGHT; row[2] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_WIDTH; row[3] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_HEIGHT; line++; } } } } } int RetinaDrop::post_process( cv::Mat &src_img, vector &result_matrix, std::vector& valid_result ) { valid_result.clear(); std::vector result; for (int item = 0; item < m_sum_of_feature; ++item) { float* cur_bbox = (float*)result_matrix[0].data + item * 4;//result_matrix[0].step; float* cur_conf = (float*)result_matrix[2].data + item * 2;//result_matrix[1].step; float* cur_keyp = (float*)result_matrix[1].data + item * 10;//result_matrix[2].step; if (cur_conf[1] > m_obj_threshold) { DropRes headbox; headbox.confidence = cur_conf[1]; auto* anchor = m_refer_matrix.ptr(item); auto* keyp = cur_keyp; float cx, cy, kx, ky; cx = anchor[0] + cur_bbox[0] * m_variance[0] * anchor[2]; cy = anchor[1] + cur_bbox[1] * m_variance[0] * anchor[3]; kx = anchor[2] * exp(cur_bbox[2] * m_variance[1]); ky = anchor[3] * exp(cur_bbox[3] * m_variance[1]); cx -= kx / 2.0f; cy -= ky / 2.0f; kx += cx; ky += cy; headbox.drop_box.x1 = cx * src_img.cols; headbox.drop_box.y1 = cy * src_img.rows; headbox.drop_box.x2 = kx * src_img.cols; headbox.drop_box.y2 = ky * src_img.rows; for (int ki = 0; ki < 5; ++ki) { float kp_x = anchor[0] + keyp[2*ki] * m_variance[0] * anchor[2]; float kp_y = anchor[1] + keyp[2*ki+1] * m_variance[0] * anchor[3]; kp_x *= src_img.cols; kp_y *= src_img.rows; headbox.keypoints.push_back(cv::Point2f(kp_x, kp_y)); } /*float kp_x = anchor[0] + keyp[0] * m_variance[0] * anchor[2]; float kp_y = anchor[1] + keyp[1] * m_variance[0] * anchor[3]; kp_x *= src_img.cols; kp_y *= src_img.rows; headbox.keypoints = { cv::Point2f(kp_x,kp_y) };*/ result.push_back(headbox); } } vector keep; nms_detect(result,keep); for (size_t i = 0; i < keep.size(); ++i) { valid_result.push_back(result[keep[i]]); } return (int)valid_result.size(); } void RetinaDrop::nms_detect( std::vector & detections, vector & keep) { keep.clear(); if (detections.size() == 1) { keep.push_back(0); return; } sort(detections.begin(), detections.end(), [=](const DropRes& left, const DropRes& right) { return left.confidence > right.confidence; }); vector order; for (size_t i = 0; i < detections.size(); ++i) { order.push_back((int)i); } while (order.size()) { int i = order[0]; keep.push_back(i); vector del_idx; for (size_t j = 1; j < order.size(); ++j) { float iou = iou_calculate( detections[i].drop_box, detections[order[j]].drop_box); if (iou > m_nms_threshold) { del_idx.push_back((int)j); } } vector order_update; for (size_t j = 1; j < order.size(); ++j) { vector::iterator it = find(del_idx.begin(), del_idx.end(), j); if (it == del_idx.end()) { order_update.push_back(order[j]); } } order.clear(); order.assign(order_update.begin(), order_update.end()); } } float RetinaDrop::iou_calculate( const RetinaDrop::DropBox & det_a, const RetinaDrop::DropBox & det_b) { float aa = (det_a.x2 - det_a.x1 + 1) * (det_a.y2 - det_a.y1 + 1); float ab = (det_b.x2 - det_b.x1 + 1) * (det_b.y2 - det_b.y1 + 1); float xx1 = max(det_a.x1, det_b.x1); float yy1 = max(det_a.y1, det_b.y1); float xx2 = min(det_a.x2, det_b.x2); float yy2 = min(det_a.y2, det_b.y2); float w = (float)max(0.0, xx2 - xx1 + 1.0); float h = (float)max(0.0, yy2 - yy1 + 1.0); float inter = w * h; float ovr = inter / (aa + ab - inter); return ovr; } float RetinaDrop::GetNmsThreshold() { return m_nms_threshold; } }