123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- #include "tea_detect.h"
- #include <opencv.hpp>
- #include <numeric>
- using namespace cv;
- using namespace std;
- namespace graft_cv {
- RetinaDrop::RetinaDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
- :m_model_loaded(false)
- {
- BATCH_SIZE = 1;
- INPUT_CHANNEL = 3;
- IMAGE_WIDTH = 640; // default 640
- IMAGE_HEIGHT = 640; // default 640
- m_obj_threshold = obj_th;//default 0.6;
- m_nms_threshold = nms_th; //default0.4;
-
- m_anchor_num = 2;
- m_bbox_head = 4;
-
- m_variance[0] = 0.1f;
- m_variance[1] = 0.2f;
- //m_img_mean(123.0, 104.0, 117.0)
- m_img_mean[0] = 123.0;
- m_img_mean[1] = 104.0;
- m_img_mean[2] = 117.0;
- m_img_mean[3] = 0;
- //cv::Size size_detection(640, 640)
- m_size_detection.width = IMAGE_WIDTH;
- m_size_detection.height = IMAGE_HEIGHT;
- m_feature_steps = {8,16,32};
- m_pLogger = pLogger;
- for (const int step : m_feature_steps) {
- assert(step != 0);
- int feature_map = IMAGE_HEIGHT / step;
- m_feature_maps.push_back(feature_map);
- int feature_size = feature_map * feature_map;
- m_feature_sizes.push_back(feature_size);
- }
- m_anchor_sizes = { { 16,32 } ,{ 64,128},{ 256, 512 }};
- m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
- generate_anchors();
- if (m_pLogger) {
- m_pLogger->INFO(string("RetinaDrop object initialized"));
- }
- }
- RetinaDrop::~RetinaDrop() = default;
- bool RetinaDrop::IsModelLoaded() {
- return m_model_loaded;
- };
- void RetinaDrop::SetThreshold(float object_threshold, float nms_threshold)
- {
- this->m_obj_threshold = object_threshold;
- this->m_nms_threshold = nms_threshold;
- }
- bool RetinaDrop::LoadModel(std::string onnx_path) {
- if (m_pLogger) {
- m_pLogger->INFO(string("Loading detection model: ")+onnx_path);
- }
- else { std::cout << "Loading detection model: " << onnx_path<<std::endl; }
- try {
- m_model = cv::dnn::readNetFromONNX(onnx_path);
- if (m_pLogger) {m_pLogger->INFO(string("Detection model loaded"));}
- m_model_loaded = true;
- return m_model_loaded;
- }
- catch (...)
- {
- if (m_pLogger) { m_pLogger->ERRORINFO(string("loading model failed")); }
- }
- return false;
- }
- std::vector<Bbox> RetinaDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger)
- {
- std::vector<Bbox> result;
- if (img.empty()) {
- if (pInstanceLogger) {
- pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
- }
- throw(string("image is empty"));
- }
- if (!m_model_loaded) {
- pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
- }
- cv::Mat blob = cv::dnn::blobFromImage(
- img,
- 1.0,
- m_size_detection,
- m_img_mean);
- m_model.setInput(blob);
- std::vector<std::string> outNames = m_model.getUnconnectedOutLayersNames();
- vector<Mat>outputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2)
- if (pInstanceLogger) {
- pInstanceLogger->INFO(string("RunModel(), before forward()"));
- }
- m_model.forward(outputs, outNames);
- std::vector<RetinaDrop::DropRes> rects;
- int n = post_process(img, outputs,rects);
- for (const auto& rect : rects) {
- Bbox box;
- box.score = rect.confidence;
- box.x1 = (int)rect.drop_box.x1;
- box.y1 = (int)rect.drop_box.y1;
- box.x2 = (int)rect.drop_box.x2;
- box.y2 = (int)rect.drop_box.y2;
- box.ppoint[0] = rect.keypoints[0].x;
- box.ppoint[1] = rect.keypoints[0].y;
- box.ppoint[2] = rect.keypoints[1].x;
- box.ppoint[3] = rect.keypoints[1].y;
- box.ppoint[4] = rect.keypoints[2].x;
- box.ppoint[5] = rect.keypoints[2].y;
- box.ppoint[6] = rect.keypoints[3].x;
- box.ppoint[7] = rect.keypoints[3].y;
- box.ppoint[8] = rect.keypoints[4].x;
- box.ppoint[9] = rect.keypoints[4].y;
- box.area = 0.0;
- result.push_back(box);
- }
- if (pInstanceLogger) {
- stringstream buff;
- buff << "detected object: " << n;
- pInstanceLogger->INFO(buff.str());
- }
- return result;
- }
- void RetinaDrop::generate_anchors() {
- m_refer_matrix = cv::Mat(m_sum_of_feature, m_bbox_head, CV_32FC1);
- int line = 0;
- for (size_t feature_map = 0; feature_map < m_feature_maps.size(); feature_map++) {
- for (int height = 0; height < m_feature_maps[feature_map]; ++height) {
- for (int width = 0; width < m_feature_maps[feature_map]; ++width) {
- for (int anchor = 0; anchor < m_anchor_sizes[feature_map].size(); ++anchor) {
- auto* row = m_refer_matrix.ptr<float>(line);
- row[0] = (float)(width+0.5) * m_feature_steps[feature_map]/(float)IMAGE_WIDTH;
- row[1] = (float)(height+0.5) * m_feature_steps[feature_map]/(float)IMAGE_HEIGHT;
- row[2] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_WIDTH;
- row[3] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_HEIGHT;
- line++;
- }
- }
- }
- }
- }
- int RetinaDrop::post_process(
- cv::Mat &src_img,
- vector<cv::Mat> &result_matrix,
- std::vector<RetinaDrop::DropRes>& valid_result
- )
- {
- valid_result.clear();
- std::vector<DropRes> result;
- for (int item = 0; item < m_sum_of_feature; ++item) {
- float* cur_bbox = (float*)result_matrix[0].data + item * 4;//result_matrix[0].step;
- float* cur_conf = (float*)result_matrix[2].data + item * 2;//result_matrix[1].step;
- float* cur_keyp = (float*)result_matrix[1].data + item * 10;//result_matrix[2].step;
-
- if (cur_conf[1] > m_obj_threshold) {
- DropRes headbox;
- headbox.confidence = cur_conf[1];
- auto* anchor = m_refer_matrix.ptr<float>(item);
- auto* keyp = cur_keyp;
- float cx, cy, kx, ky;
- cx = anchor[0] + cur_bbox[0] * m_variance[0] * anchor[2];
- cy = anchor[1] + cur_bbox[1] * m_variance[0] * anchor[3];
- kx = anchor[2] * exp(cur_bbox[2] * m_variance[1]);
- ky = anchor[3] * exp(cur_bbox[3] * m_variance[1]);
- cx -= kx / 2.0f;
- cy -= ky / 2.0f;
- kx += cx;
- ky += cy;
- headbox.drop_box.x1 = cx * src_img.cols;
- headbox.drop_box.y1 = cy * src_img.rows;
- headbox.drop_box.x2 = kx * src_img.cols;
- headbox.drop_box.y2 = ky * src_img.rows;
- for (int ki = 0; ki < 5; ++ki) {
- float kp_x = anchor[0] + keyp[2*ki] * m_variance[0] * anchor[2];
- float kp_y = anchor[1] + keyp[2*ki+1] * m_variance[0] * anchor[3];
- kp_x *= src_img.cols;
- kp_y *= src_img.rows;
- headbox.keypoints.push_back(cv::Point2f(kp_x, kp_y));
- }
- /*float kp_x = anchor[0] + keyp[0] * m_variance[0] * anchor[2];
- float kp_y = anchor[1] + keyp[1] * m_variance[0] * anchor[3];
- kp_x *= src_img.cols;
- kp_y *= src_img.rows;
- headbox.keypoints = {
- cv::Point2f(kp_x,kp_y)
- };*/
- result.push_back(headbox);
- }
- }
- vector<int> keep;
- nms_detect(result,keep);
- for (size_t i = 0; i < keep.size(); ++i) {
- valid_result.push_back(result[keep[i]]);
- }
- return (int)valid_result.size();
- }
- void RetinaDrop::nms_detect(
- std::vector<DropRes> & detections,
- vector<int> & keep)
- {
- keep.clear();
- if (detections.size() == 1) {
- keep.push_back(0);
- return;
- }
- sort(detections.begin(), detections.end(),
- [=](const DropRes& left, const DropRes& right) {
- return left.confidence > right.confidence;
- });
-
- vector<int> order;
- for (size_t i = 0; i < detections.size(); ++i) { order.push_back((int)i); }
- while (order.size()) {
- int i = order[0];
- keep.push_back(i);
- vector<int> del_idx;
- for (size_t j = 1; j < order.size(); ++j) {
- float iou = iou_calculate(
- detections[i].drop_box,
- detections[order[j]].drop_box);
- if (iou > m_nms_threshold) {
- del_idx.push_back((int)j);
- }
- }
- vector<int> order_update;
- for (size_t j = 1; j < order.size(); ++j) {
- vector<int>::iterator it = find(del_idx.begin(), del_idx.end(), j);
- if (it == del_idx.end()) {
- order_update.push_back(order[j]);
- }
- }
- order.clear();
- order.assign(order_update.begin(), order_update.end());
- }
- }
- float RetinaDrop::iou_calculate(
- const RetinaDrop::DropBox & det_a,
- const RetinaDrop::DropBox & det_b)
- {
- float aa = (det_a.x2 - det_a.x1 + 1) * (det_a.y2 - det_a.y1 + 1);
- float ab = (det_b.x2 - det_b.x1 + 1) * (det_b.y2 - det_b.y1 + 1);
- float xx1 = max(det_a.x1, det_b.x1);
- float yy1 = max(det_a.y1, det_b.y1);
- float xx2 = min(det_a.x2, det_b.x2);
- float yy2 = min(det_a.y2, det_b.y2);
- float w = (float)max(0.0, xx2 - xx1 + 1.0);
- float h = (float)max(0.0, yy2 - yy1 + 1.0);
- float inter = w * h;
- float ovr = inter / (aa + ab - inter);
- return ovr;
- }
- float RetinaDrop::GetNmsThreshold() { return m_nms_threshold; }
- }
|