tea_detect.cpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #include "tea_detect.h"
  2. #include <opencv.hpp>
  3. #include <numeric>
  4. using namespace cv;
  5. using namespace std;
  6. namespace graft_cv {
  7. RetinaDrop::RetinaDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
  8. :m_model_loaded(false)
  9. {
  10. BATCH_SIZE = 1;
  11. INPUT_CHANNEL = 3;
  12. IMAGE_WIDTH = 640; // default 640
  13. IMAGE_HEIGHT = 640; // default 640
  14. m_obj_threshold = obj_th;//default 0.6;
  15. m_nms_threshold = nms_th; //default0.4;
  16. m_anchor_num = 2;
  17. m_bbox_head = 4;
  18. m_variance[0] = 0.1f;
  19. m_variance[1] = 0.2f;
  20. //m_img_mean(123.0, 104.0, 117.0)
  21. m_img_mean[0] = 123.0;
  22. m_img_mean[1] = 104.0;
  23. m_img_mean[2] = 117.0;
  24. m_img_mean[3] = 0;
  25. //cv::Size size_detection(640, 640)
  26. m_size_detection.width = IMAGE_WIDTH;
  27. m_size_detection.height = IMAGE_HEIGHT;
  28. m_feature_steps = {8,16,32};
  29. m_pLogger = pLogger;
  30. for (const int step : m_feature_steps) {
  31. assert(step != 0);
  32. int feature_map = IMAGE_HEIGHT / step;
  33. m_feature_maps.push_back(feature_map);
  34. int feature_size = feature_map * feature_map;
  35. m_feature_sizes.push_back(feature_size);
  36. }
  37. m_anchor_sizes = { { 16,32 } ,{ 64,128},{ 256, 512 }};
  38. m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
  39. generate_anchors();
  40. if (m_pLogger) {
  41. m_pLogger->INFO(string("RetinaDrop object initialized"));
  42. }
  43. }
  44. RetinaDrop::~RetinaDrop() = default;
  45. bool RetinaDrop::IsModelLoaded() {
  46. return m_model_loaded;
  47. };
  48. void RetinaDrop::SetThreshold(float object_threshold, float nms_threshold)
  49. {
  50. this->m_obj_threshold = object_threshold;
  51. this->m_nms_threshold = nms_threshold;
  52. }
  53. bool RetinaDrop::LoadModel(std::string onnx_path) {
  54. if (m_pLogger) {
  55. m_pLogger->INFO(string("Loading detection model: ")+onnx_path);
  56. }
  57. else { std::cout << "Loading detection model: " << onnx_path<<std::endl; }
  58. try {
  59. m_model = cv::dnn::readNetFromONNX(onnx_path);
  60. if (m_pLogger) {m_pLogger->INFO(string("Detection model loaded"));}
  61. m_model_loaded = true;
  62. return m_model_loaded;
  63. }
  64. catch (...)
  65. {
  66. if (m_pLogger) { m_pLogger->ERRORINFO(string("loading model failed")); }
  67. }
  68. return false;
  69. }
  70. std::vector<Bbox> RetinaDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger)
  71. {
  72. std::vector<Bbox> result;
  73. if (img.empty()) {
  74. if (pInstanceLogger) {
  75. pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
  76. }
  77. throw(string("image is empty"));
  78. }
  79. if (!m_model_loaded) {
  80. pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
  81. }
  82. cv::Mat blob = cv::dnn::blobFromImage(
  83. img,
  84. 1.0,
  85. m_size_detection,
  86. m_img_mean);
  87. m_model.setInput(blob);
  88. std::vector<std::string> outNames = m_model.getUnconnectedOutLayersNames();
  89. vector<Mat>outputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2)
  90. if (pInstanceLogger) {
  91. pInstanceLogger->INFO(string("RunModel(), before forward()"));
  92. }
  93. m_model.forward(outputs, outNames);
  94. std::vector<RetinaDrop::DropRes> rects;
  95. int n = post_process(img, outputs,rects);
  96. for (const auto& rect : rects) {
  97. Bbox box;
  98. box.score = rect.confidence;
  99. box.x1 = (int)rect.drop_box.x1;
  100. box.y1 = (int)rect.drop_box.y1;
  101. box.x2 = (int)rect.drop_box.x2;
  102. box.y2 = (int)rect.drop_box.y2;
  103. box.ppoint[0] = rect.keypoints[0].x;
  104. box.ppoint[1] = rect.keypoints[0].y;
  105. box.ppoint[2] = rect.keypoints[1].x;
  106. box.ppoint[3] = rect.keypoints[1].y;
  107. box.ppoint[4] = rect.keypoints[2].x;
  108. box.ppoint[5] = rect.keypoints[2].y;
  109. box.ppoint[6] = rect.keypoints[3].x;
  110. box.ppoint[7] = rect.keypoints[3].y;
  111. box.ppoint[8] = rect.keypoints[4].x;
  112. box.ppoint[9] = rect.keypoints[4].y;
  113. box.area = 0.0;
  114. result.push_back(box);
  115. }
  116. if (pInstanceLogger) {
  117. stringstream buff;
  118. buff << "detected object: " << n;
  119. pInstanceLogger->INFO(buff.str());
  120. }
  121. return result;
  122. }
  123. void RetinaDrop::generate_anchors() {
  124. m_refer_matrix = cv::Mat(m_sum_of_feature, m_bbox_head, CV_32FC1);
  125. int line = 0;
  126. for (size_t feature_map = 0; feature_map < m_feature_maps.size(); feature_map++) {
  127. for (int height = 0; height < m_feature_maps[feature_map]; ++height) {
  128. for (int width = 0; width < m_feature_maps[feature_map]; ++width) {
  129. for (int anchor = 0; anchor < m_anchor_sizes[feature_map].size(); ++anchor) {
  130. auto* row = m_refer_matrix.ptr<float>(line);
  131. row[0] = (float)(width+0.5) * m_feature_steps[feature_map]/(float)IMAGE_WIDTH;
  132. row[1] = (float)(height+0.5) * m_feature_steps[feature_map]/(float)IMAGE_HEIGHT;
  133. row[2] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_WIDTH;
  134. row[3] = m_anchor_sizes[feature_map][anchor]/(float)IMAGE_HEIGHT;
  135. line++;
  136. }
  137. }
  138. }
  139. }
  140. }
  141. int RetinaDrop::post_process(
  142. cv::Mat &src_img,
  143. vector<cv::Mat> &result_matrix,
  144. std::vector<RetinaDrop::DropRes>& valid_result
  145. )
  146. {
  147. valid_result.clear();
  148. std::vector<DropRes> result;
  149. for (int item = 0; item < m_sum_of_feature; ++item) {
  150. float* cur_bbox = (float*)result_matrix[0].data + item * 4;//result_matrix[0].step;
  151. float* cur_conf = (float*)result_matrix[2].data + item * 2;//result_matrix[1].step;
  152. float* cur_keyp = (float*)result_matrix[1].data + item * 10;//result_matrix[2].step;
  153. if (cur_conf[1] > m_obj_threshold) {
  154. DropRes headbox;
  155. headbox.confidence = cur_conf[1];
  156. auto* anchor = m_refer_matrix.ptr<float>(item);
  157. auto* keyp = cur_keyp;
  158. float cx, cy, kx, ky;
  159. cx = anchor[0] + cur_bbox[0] * m_variance[0] * anchor[2];
  160. cy = anchor[1] + cur_bbox[1] * m_variance[0] * anchor[3];
  161. kx = anchor[2] * exp(cur_bbox[2] * m_variance[1]);
  162. ky = anchor[3] * exp(cur_bbox[3] * m_variance[1]);
  163. cx -= kx / 2.0f;
  164. cy -= ky / 2.0f;
  165. kx += cx;
  166. ky += cy;
  167. headbox.drop_box.x1 = cx * src_img.cols;
  168. headbox.drop_box.y1 = cy * src_img.rows;
  169. headbox.drop_box.x2 = kx * src_img.cols;
  170. headbox.drop_box.y2 = ky * src_img.rows;
  171. for (int ki = 0; ki < 5; ++ki) {
  172. float kp_x = anchor[0] + keyp[2*ki] * m_variance[0] * anchor[2];
  173. float kp_y = anchor[1] + keyp[2*ki+1] * m_variance[0] * anchor[3];
  174. kp_x *= src_img.cols;
  175. kp_y *= src_img.rows;
  176. headbox.keypoints.push_back(cv::Point2f(kp_x, kp_y));
  177. }
  178. /*float kp_x = anchor[0] + keyp[0] * m_variance[0] * anchor[2];
  179. float kp_y = anchor[1] + keyp[1] * m_variance[0] * anchor[3];
  180. kp_x *= src_img.cols;
  181. kp_y *= src_img.rows;
  182. headbox.keypoints = {
  183. cv::Point2f(kp_x,kp_y)
  184. };*/
  185. result.push_back(headbox);
  186. }
  187. }
  188. vector<int> keep;
  189. nms_detect(result,keep);
  190. for (size_t i = 0; i < keep.size(); ++i) {
  191. valid_result.push_back(result[keep[i]]);
  192. }
  193. return (int)valid_result.size();
  194. }
  195. void RetinaDrop::nms_detect(
  196. std::vector<DropRes> & detections,
  197. vector<int> & keep)
  198. {
  199. keep.clear();
  200. if (detections.size() == 1) {
  201. keep.push_back(0);
  202. return;
  203. }
  204. sort(detections.begin(), detections.end(),
  205. [=](const DropRes& left, const DropRes& right) {
  206. return left.confidence > right.confidence;
  207. });
  208. vector<int> order;
  209. for (size_t i = 0; i < detections.size(); ++i) { order.push_back((int)i); }
  210. while (order.size()) {
  211. int i = order[0];
  212. keep.push_back(i);
  213. vector<int> del_idx;
  214. for (size_t j = 1; j < order.size(); ++j) {
  215. float iou = iou_calculate(
  216. detections[i].drop_box,
  217. detections[order[j]].drop_box);
  218. if (iou > m_nms_threshold) {
  219. del_idx.push_back((int)j);
  220. }
  221. }
  222. vector<int> order_update;
  223. for (size_t j = 1; j < order.size(); ++j) {
  224. vector<int>::iterator it = find(del_idx.begin(), del_idx.end(), j);
  225. if (it == del_idx.end()) {
  226. order_update.push_back(order[j]);
  227. }
  228. }
  229. order.clear();
  230. order.assign(order_update.begin(), order_update.end());
  231. }
  232. }
  233. float RetinaDrop::iou_calculate(
  234. const RetinaDrop::DropBox & det_a,
  235. const RetinaDrop::DropBox & det_b)
  236. {
  237. float aa = (det_a.x2 - det_a.x1 + 1) * (det_a.y2 - det_a.y1 + 1);
  238. float ab = (det_b.x2 - det_b.x1 + 1) * (det_b.y2 - det_b.y1 + 1);
  239. float xx1 = max(det_a.x1, det_b.x1);
  240. float yy1 = max(det_a.y1, det_b.y1);
  241. float xx2 = min(det_a.x2, det_b.x2);
  242. float yy2 = min(det_a.y2, det_b.y2);
  243. float w = (float)max(0.0, xx2 - xx1 + 1.0);
  244. float h = (float)max(0.0, yy2 - yy1 + 1.0);
  245. float inter = w * h;
  246. float ovr = inter / (aa + ab - inter);
  247. return ovr;
  248. }
  249. float RetinaDrop::GetNmsThreshold() { return m_nms_threshold; }
  250. }