Parcourir la source

add inference.h, cpp

chenhongjiang il y a 1 an
Parent
commit
59d842e23d
6 fichiers modifiés avec 578 ajouts et 2 suppressions
  1. 185 0
      inference.cpp
  2. 51 0
      inference.h
  3. 274 0
      tea_detect.cpp
  4. 66 0
      tea_detect.h
  5. 1 1
      tea_sorter.cpp
  6. 1 1
      tea_sorter.h

+ 185 - 0
inference.cpp

@@ -0,0 +1,185 @@
+#include "inference.h"
+
+Inference::Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda)
+{
+    modelPath = onnxModelPath;
+    modelShape = modelInputShape;
+    classesPath = classesTxtFile;
+    cudaEnabled = runWithCuda;
+
+    loadOnnxNetwork();
+    loadClassesFromFile();
+}
+
+std::vector<Detection> Inference::runInference(const cv::Mat &input)
+{
+    cv::Mat modelInput = input;
+    if (letterBoxForSquare && modelShape.width == modelShape.height)
+        modelInput = formatToSquare(modelInput);
+
+    cv::Mat blob;
+    cv::dnn::blobFromImage(modelInput, blob, 1.0/255.0, modelShape, cv::Scalar(), true, false);
+    net.setInput(blob);
+
+    std::vector<cv::Mat> outputs;
+    net.forward(outputs, net.getUnconnectedOutLayersNames());
+
+    int rows = outputs[0].size[1];
+    int dimensions = outputs[0].size[2];
+
+    bool yolov8 = false;
+    // yolov5 has an output of shape (batchSize, 25200, 85) (Num classes + box[x,y,w,h] + confidence[c])
+    // yolov8 has an output of shape (batchSize, 84,  8400) (Num classes + box[x,y,w,h])
+    if (dimensions > rows) // Check if the shape[2] is more than shape[1] (yolov8)
+    {
+        yolov8 = true;
+        rows = outputs[0].size[2];
+        dimensions = outputs[0].size[1];
+
+        outputs[0] = outputs[0].reshape(1, dimensions);
+        cv::transpose(outputs[0], outputs[0]);
+    }
+    float *data = (float *)outputs[0].data;
+
+    float x_factor = modelInput.cols / modelShape.width;
+    float y_factor = modelInput.rows / modelShape.height;
+
+    std::vector<int> class_ids;
+    std::vector<float> confidences;
+    std::vector<cv::Rect> boxes;
+
+    for (int i = 0; i < rows; ++i)
+    {
+        if (yolov8)
+        {
+            float *classes_scores = data+4;
+
+            cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+            cv::Point class_id;
+            double maxClassScore;
+
+            minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
+
+            if (maxClassScore > modelScoreThreshold)
+            {
+                confidences.push_back(maxClassScore);
+                class_ids.push_back(class_id.x);
+
+                float x = data[0];
+                float y = data[1];
+                float w = data[2];
+                float h = data[3];
+
+                int left = int((x - 0.5 * w) * x_factor);
+                int top = int((y - 0.5 * h) * y_factor);
+
+                int width = int(w * x_factor);
+                int height = int(h * y_factor);
+
+                boxes.push_back(cv::Rect(left, top, width, height));
+            }
+        }
+        else // yolov5
+        {
+            float confidence = data[4];
+
+            if (confidence >= modelConfidenseThreshold)
+            {
+                float *classes_scores = data+5;
+
+                cv::Mat scores(1, classes.size(), CV_32FC1, classes_scores);
+                cv::Point class_id;
+                double max_class_score;
+
+                minMaxLoc(scores, 0, &max_class_score, 0, &class_id);
+
+                if (max_class_score > modelScoreThreshold)
+                {
+                    confidences.push_back(confidence);
+                    class_ids.push_back(class_id.x);
+
+                    float x = data[0];
+                    float y = data[1];
+                    float w = data[2];
+                    float h = data[3];
+
+                    int left = int((x - 0.5 * w) * x_factor);
+                    int top = int((y - 0.5 * h) * y_factor);
+
+                    int width = int(w * x_factor);
+                    int height = int(h * y_factor);
+
+                    boxes.push_back(cv::Rect(left, top, width, height));
+                }
+            }
+        }
+
+        data += dimensions;
+    }
+
+    std::vector<int> nms_result;
+    cv::dnn::NMSBoxes(boxes, confidences, modelScoreThreshold, modelNMSThreshold, nms_result);
+
+    std::vector<Detection> detections{};
+    for (unsigned long i = 0; i < nms_result.size(); ++i)
+    {
+        int idx = nms_result[i];
+
+        Detection result;
+        result.class_id = class_ids[idx];
+        result.confidence = confidences[idx];
+
+        std::random_device rd;
+        std::mt19937 gen(rd());
+        std::uniform_int_distribution<int> dis(100, 255);
+        result.color = cv::Scalar(dis(gen),
+                                  dis(gen),
+                                  dis(gen));
+
+        result.className = classes[result.class_id];
+        result.box = boxes[idx];
+
+        detections.push_back(result);
+    }
+
+    return detections;
+}
+
+void Inference::loadClassesFromFile()
+{
+    std::ifstream inputFile(classesPath);
+    if (inputFile.is_open())
+    {
+        std::string classLine;
+        while (std::getline(inputFile, classLine))
+            classes.push_back(classLine);
+        inputFile.close();
+    }
+}
+
+void Inference::loadOnnxNetwork()
+{
+    net = cv::dnn::readNetFromONNX(modelPath);
+    if (cudaEnabled)
+    {
+        std::cout << "\nRunning on CUDA" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
+    }
+    else
+    {
+        std::cout << "\nRunning on CPU" << std::endl;
+        net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
+        net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
+    }
+}
+
+cv::Mat Inference::formatToSquare(const cv::Mat &source)
+{
+    int col = source.cols;
+    int row = source.rows;
+    int _max = MAX(col, row);
+    cv::Mat result = cv::Mat::zeros(_max, _max, CV_8UC3);
+    source.copyTo(result(cv::Rect(0, 0, col, row)));
+    return result;
+}

+ 51 - 0
inference.h

@@ -0,0 +1,51 @@
+#ifndef INFERENCE_H
+#define INFERENCE_H
+
+// Cpp native
+#include <fstream>
+#include <vector>
+#include <string>
+#include <random>
+
+// OpenCV / DNN / Inference
+#include <opencv2/imgproc.hpp>
+#include <opencv2/opencv.hpp>
+#include <opencv2/dnn.hpp>
+
+struct Detection
+{
+    int class_id{0};
+    std::string className{};
+    float confidence{0.0};
+    cv::Scalar color{};
+    cv::Rect box{};
+};
+
+class Inference
+{
+public:
+    Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
+    std::vector<Detection> runInference(const cv::Mat &input);
+
+private:
+    void loadClassesFromFile();
+    void loadOnnxNetwork();
+    cv::Mat formatToSquare(const cv::Mat &source);
+
+    std::string modelPath{};
+    std::string classesPath{};
+    bool cudaEnabled{};
+
+    std::vector<std::string> classes{};
+    cv::Size2f modelShape{};
+
+    float modelConfidenseThreshold {0.25};
+    float modelScoreThreshold      {0.45f};
+    float modelNMSThreshold        {0.50};
+
+    bool letterBoxForSquare = true;
+
+    cv::dnn::Net net;
+};
+
+#endif // INFERENCE_H

+ 274 - 0
tea_detect.cpp

@@ -277,4 +277,278 @@ namespace graft_cv {
 		return ovr;		
 	}	
 	float RetinaDrop::GetNmsThreshold() { return m_nms_threshold; }
+
+	//////////////////////////////////////////////////////////////////////////////////
+	//////////////////////////////////////////////////////////////////////////////////
+	YoloDrop::YoloDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
+		:m_model_loaded(false)
+	{
+		m_infer = Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
+		BATCH_SIZE = 1;
+		INPUT_CHANNEL = 3;
+		IMAGE_WIDTH = 640; // default 640
+		IMAGE_HEIGHT = 640; // default 640
+		m_obj_threshold = obj_th;//default 0.6; 
+		m_nms_threshold = nms_th; //default0.4; 	
+
+		m_anchor_num = 2;
+		m_bbox_head = 4;
+
+		m_variance[0] = 0.1f;
+		m_variance[1] = 0.2f;
+		//m_img_mean(123.0, 104.0, 117.0)
+		m_img_mean[0] = 123.0;
+		m_img_mean[1] = 104.0;
+		m_img_mean[2] = 117.0;
+		m_img_mean[3] = 0;
+		//cv::Size size_detection(640, 640)	
+		m_size_detection.width = IMAGE_WIDTH;
+		m_size_detection.height = IMAGE_HEIGHT;
+		m_feature_steps = { 8,16,32 };
+		m_pLogger = pLogger;
+
+		for (const int step : m_feature_steps) {
+			assert(step != 0);
+			int feature_map = IMAGE_HEIGHT / step;
+			m_feature_maps.push_back(feature_map);
+			int feature_size = feature_map * feature_map;
+			m_feature_sizes.push_back(feature_size);
+		}
+		m_anchor_sizes = { { 16,32 } ,{ 64,128 },{ 256, 512 } };
+		m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
+		generate_anchors();
+		if (m_pLogger) {
+			m_pLogger->INFO(string("YoloDrop object initialized"));
+		}
+	}
+
+	YoloDrop::~YoloDrop() = default;
+
+	bool YoloDrop::IsModelLoaded() {
+		return m_model_loaded;
+	};
+	void YoloDrop::SetThreshold(float object_threshold, float nms_threshold)
+	{
+		this->m_obj_threshold = object_threshold;
+		this->m_nms_threshold = nms_threshold;
+	}
+
+	bool YoloDrop::LoadModel(std::string onnx_path) {
+		if (m_pLogger) {
+			m_pLogger->INFO(string("Loading detection model: ") + onnx_path);
+		}
+		else { std::cout << "Loading detection model: " << onnx_path << std::endl; }
+		try {
+			m_model = cv::dnn::readNetFromONNX(onnx_path);
+			if (m_pLogger) { m_pLogger->INFO(string("Detection model loaded")); }
+			m_model_loaded = true;
+			return m_model_loaded;
+		}
+		catch (...)
+		{
+			if (m_pLogger) { m_pLogger->ERRORINFO(string("loading model failed")); }
+		}
+		return false;
+	}
+
+	std::vector<Bbox> YoloDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger)
+	{
+		std::vector<Bbox> result;
+		if (img.empty()) {
+			if (pInstanceLogger) {
+				pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
+			}
+			throw(string("image is empty"));
+		}
+		if (!m_model_loaded) {
+			pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
+		}
+		cv::Mat blob = cv::dnn::blobFromImage(
+			img,
+			1.0,
+			m_size_detection,
+			m_img_mean);
+		m_model.setInput(blob);
+
+		std::vector<std::string> outNames = m_model.getUnconnectedOutLayersNames();
+		vector<Mat>outputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2)
+		if (pInstanceLogger) {
+			pInstanceLogger->INFO(string("RunModel(), before forward()"));
+		}
+		m_model.forward(outputs, outNames);
+		std::vector<YoloDrop::DropRes> rects;
+		int n = post_process(img, outputs, rects);
+		for (const auto& rect : rects) {
+			Bbox box;
+			box.score = rect.confidence;
+			box.x1 = (int)rect.drop_box.x1;
+			box.y1 = (int)rect.drop_box.y1;
+			box.x2 = (int)rect.drop_box.x2;
+			box.y2 = (int)rect.drop_box.y2;
+			box.ppoint[0] = rect.keypoints[0].x;
+			box.ppoint[1] = rect.keypoints[0].y;
+			box.ppoint[2] = rect.keypoints[1].x;
+			box.ppoint[3] = rect.keypoints[1].y;
+			box.ppoint[4] = rect.keypoints[2].x;
+			box.ppoint[5] = rect.keypoints[2].y;
+			box.ppoint[6] = rect.keypoints[3].x;
+			box.ppoint[7] = rect.keypoints[3].y;
+			box.ppoint[8] = rect.keypoints[4].x;
+			box.ppoint[9] = rect.keypoints[4].y;
+
+			box.operate_point[0] = 0.0;
+			box.operate_point[1] = 0.0;
+			box.operate_angle = 0.0;
+
+			box.area = 0.0;
+			box.status = 0;
+			result.push_back(box);
+		}
+		if (pInstanceLogger) {
+			stringstream buff;
+			buff << "detected object: " << n;
+			pInstanceLogger->INFO(buff.str());
+		}
+		return result;
+	}
+
+	void YoloDrop::generate_anchors() {
+		m_refer_matrix = cv::Mat(m_sum_of_feature, m_bbox_head, CV_32FC1);
+		int line = 0;
+		for (size_t feature_map = 0; feature_map < m_feature_maps.size(); feature_map++) {
+			for (int height = 0; height < m_feature_maps[feature_map]; ++height) {
+				for (int width = 0; width < m_feature_maps[feature_map]; ++width) {
+					for (int anchor = 0; anchor < m_anchor_sizes[feature_map].size(); ++anchor) {
+						auto* row = m_refer_matrix.ptr<float>(line);
+						row[0] = (float)(width + 0.5) * m_feature_steps[feature_map] / (float)IMAGE_WIDTH;
+						row[1] = (float)(height + 0.5) * m_feature_steps[feature_map] / (float)IMAGE_HEIGHT;
+						row[2] = m_anchor_sizes[feature_map][anchor] / (float)IMAGE_WIDTH;
+						row[3] = m_anchor_sizes[feature_map][anchor] / (float)IMAGE_HEIGHT;
+						line++;
+					}
+				}
+			}
+		}
+	}
+	int YoloDrop::post_process(
+		cv::Mat &src_img,
+		vector<cv::Mat> &result_matrix,
+		std::vector<YoloDrop::DropRes>& valid_result
+	)
+	{
+		valid_result.clear();
+		std::vector<DropRes> result;
+		for (int item = 0; item < m_sum_of_feature; ++item) {
+			float* cur_bbox = (float*)result_matrix[0].data + item * 4;//result_matrix[0].step;
+			float* cur_conf = (float*)result_matrix[2].data + item * 2;//result_matrix[1].step;			
+			float* cur_keyp = (float*)result_matrix[1].data + item * 10;//result_matrix[2].step;
+
+			if (cur_conf[1] > m_obj_threshold) {
+				DropRes headbox;
+				headbox.confidence = cur_conf[1];
+				auto* anchor = m_refer_matrix.ptr<float>(item);
+				auto* keyp = cur_keyp;
+
+				float cx, cy, kx, ky;
+				cx = anchor[0] + cur_bbox[0] * m_variance[0] * anchor[2];
+				cy = anchor[1] + cur_bbox[1] * m_variance[0] * anchor[3];
+				kx = anchor[2] * exp(cur_bbox[2] * m_variance[1]);
+				ky = anchor[3] * exp(cur_bbox[3] * m_variance[1]);
+
+				cx -= kx / 2.0f;
+				cy -= ky / 2.0f;
+				kx += cx;
+				ky += cy;
+
+				headbox.drop_box.x1 = cx * src_img.cols;
+				headbox.drop_box.y1 = cy * src_img.rows;
+				headbox.drop_box.x2 = kx * src_img.cols;
+				headbox.drop_box.y2 = ky * src_img.rows;
+
+				for (int ki = 0; ki < 5; ++ki) {
+					float kp_x = anchor[0] + keyp[2 * ki] * m_variance[0] * anchor[2];
+					float kp_y = anchor[1] + keyp[2 * ki + 1] * m_variance[0] * anchor[3];
+					kp_x *= src_img.cols;
+					kp_y *= src_img.rows;
+					headbox.keypoints.push_back(cv::Point2f(kp_x, kp_y));
+				}
+				/*float kp_x = anchor[0] + keyp[0] * m_variance[0] * anchor[2];
+				float kp_y = anchor[1] + keyp[1] * m_variance[0] * anchor[3];
+				kp_x *= src_img.cols;
+				kp_y *= src_img.rows;
+				headbox.keypoints = {
+				cv::Point2f(kp_x,kp_y)
+				};*/
+				result.push_back(headbox);
+			}
+		}
+		vector<int> keep;
+		nms_detect(result, keep);
+		for (size_t i = 0; i < keep.size(); ++i) {
+			valid_result.push_back(result[keep[i]]);
+		}
+		return (int)valid_result.size();
+	}
+
+	void YoloDrop::nms_detect(
+		std::vector<DropRes> & detections,
+		vector<int> & keep)
+	{
+		keep.clear();
+		if (detections.size() == 1) {
+			keep.push_back(0);
+			return;
+		}
+
+		sort(detections.begin(), detections.end(),
+			[=](const DropRes& left, const DropRes& right) {
+			return left.confidence > right.confidence;
+		});
+
+		vector<int> order;
+		for (size_t i = 0; i < detections.size(); ++i) { order.push_back((int)i); }
+
+		while (order.size()) {
+			int i = order[0];
+			keep.push_back(i);
+			vector<int> del_idx;
+			for (size_t j = 1; j < order.size(); ++j) {
+				float iou = iou_calculate(
+					detections[i].drop_box,
+					detections[order[j]].drop_box);
+				if (iou > m_nms_threshold) {
+					del_idx.push_back((int)j);
+				}
+			}
+			vector<int> order_update;
+			for (size_t j = 1; j < order.size(); ++j) {
+				vector<int>::iterator it = find(del_idx.begin(), del_idx.end(), j);
+				if (it == del_idx.end()) {
+					order_update.push_back(order[j]);
+				}
+			}
+			order.clear();
+			order.assign(order_update.begin(), order_update.end());
+		}
+	}
+
+	float YoloDrop::iou_calculate(
+		const YoloDrop::DropBox & det_a,
+		const YoloDrop::DropBox & det_b)
+	{
+		float aa = (det_a.x2 - det_a.x1 + 1) * (det_a.y2 - det_a.y1 + 1);
+		float ab = (det_b.x2 - det_b.x1 + 1) * (det_b.y2 - det_b.y1 + 1);
+
+		float xx1 = max(det_a.x1, det_b.x1);
+		float yy1 = max(det_a.y1, det_b.y1);
+		float xx2 = min(det_a.x2, det_b.x2);
+		float yy2 = min(det_a.y2, det_b.y2);
+
+		float w = (float)max(0.0, xx2 - xx1 + 1.0);
+		float h = (float)max(0.0, yy2 - yy1 + 1.0);
+		float inter = w * h;
+		float ovr = inter / (aa + ab - inter);
+		return ovr;
+	}
+	float YoloDrop::GetNmsThreshold() { return m_nms_threshold; }
 }

+ 66 - 0
tea_detect.h

@@ -3,6 +3,7 @@
 #include <opencv.hpp>
 #include "logger.h"
 #include "data_def.h"
+#include "inference.h"
 
 
 namespace graft_cv {
@@ -71,4 +72,69 @@ namespace graft_cv {
 		CGcvLogger* m_pLogger;
 	};
 
+
+	class YoloDrop {
+		struct DropBox {
+			float x1;
+			float y1;
+			float x2;
+			float y2;
+		};
+
+		struct DropRes {
+			float confidence;
+			DropBox drop_box;
+			std::vector<cv::Point2f> keypoints;
+		};
+
+	public:
+		explicit YoloDrop(CGcvLogger* pLogger = 0, float obj_th = 0.6, float nms_th = 0.4);
+		~YoloDrop();
+		bool LoadModel(std::string onnx_path);
+		std::vector<Bbox> RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger = 0);
+		bool IsModelLoaded();
+		float GetNmsThreshold();
+		void SetThreshold(float object_threshold, float nms_threshold);
+
+	private:
+		Inference m_infer;
+		void generate_anchors();
+		int post_process(
+			cv::Mat &vec_Mat,
+			std::vector<cv::Mat> &result_matrix,
+			std::vector<YoloDrop::DropRes>& valid_result);
+
+		void nms_detect(
+			std::vector<DropRes>& detections,
+			std::vector<int>& keep);
+
+		static float iou_calculate(
+			const DropBox& det_a,
+			const DropBox& det_b);
+
+		int BATCH_SIZE; //default 1
+		int INPUT_CHANNEL; //default 3
+		int IMAGE_WIDTH; //default 640
+		int IMAGE_HEIGHT; //default 640
+		float m_obj_threshold; // default 0.5
+		float m_nms_threshold; // default 0.45		
+
+
+		cv::Mat m_refer_matrix;
+		int m_anchor_num;
+		int m_bbox_head;
+
+		std::vector<int> m_feature_sizes;
+		std::vector<int> m_feature_steps;
+		std::vector<int> m_feature_maps;
+		std::vector<std::vector<int>>m_anchor_sizes;
+		int m_sum_of_feature;
+
+		cv::dnn::Net m_model;
+		float m_variance[2];
+		cv::Scalar m_img_mean;
+		cv::Size m_size_detection;
+		bool m_model_loaded;
+		CGcvLogger* m_pLogger;
+	};
 }

+ 1 - 1
tea_sorter.cpp

@@ -22,7 +22,7 @@ m_ppImgSaver(0),
 m_pImginfoRaw(0),
 m_pImginfoDetected(0)
 {
-	m_drop_detector = RetinaDrop(m_pLogger, 0.5, 0.5);
+	m_drop_detector = YoloDrop(m_pLogger, 0.5, 0.5);
 }
 
 CTeaSort::~CTeaSort()

+ 1 - 1
tea_sorter.h

@@ -34,7 +34,7 @@ namespace graft_cv{
 		Mat m_raw_img;
 		Mat m_raw_gray_img;
 		ConfigParam& m_cp;
-		RetinaDrop m_drop_detector;
+		YoloDrop m_drop_detector;
 		CImStoreManager** m_ppImgSaver;
 		CGcvLogger* m_pLogger;