Browse Source

v0.2.0 抓取优化,采用y8,opencv490

chenhongjiang 1 year ago
parent
commit
3fe67633cf
8 changed files with 198 additions and 74 deletions
  1. 2 1
      ReadMe.txt
  2. 13 0
      inference.cpp
  3. 5 1
      inference.h
  4. 4 3
      tcv_conf.yml
  5. 1 1
      tea_cv_api.cpp
  6. 89 45
      tea_detect.cpp
  7. 2 1
      tea_detect.h
  8. 82 22
      tea_sorter.cpp

+ 2 - 1
ReadMe.txt

@@ -23,4 +23,5 @@ v0.1.21 
 v0.1.22 切割模型标注关键点更改,2、3点分别是第一、二叶子分叉点
 v0.1.22 切割模型标注关键点更改,2、3点分别是第一、二叶子分叉点
 v0.1.23 抓取点以关键点4为基础偏移
 v0.1.23 抓取点以关键点4为基础偏移
 v0.1.24 切割位置识别,增加kp3_weight_cut参数,控制抓取点在点3和点2间的位置,在[0,1.0]间,越大越靠近点3
 v0.1.24 切割位置识别,增加kp3_weight_cut参数,控制抓取点在点3和点2间的位置,在[0,1.0]间,越大越靠近点3
-v0.1.25 抓取功能扩展:增加是否空盘识别;增加叶尖朝向优先功能;增加单株优先功能;完善抓取接口返回数值定义
+v0.1.25 抓取功能扩展:增加是否空盘识别;增加叶尖朝向优先功能;增加单株优先功能;完善抓取接口返回数值定义
+v0.2.0 抓取优化,采用y8,opencv490

+ 13 - 0
inference.cpp

@@ -47,6 +47,7 @@ std::vector<Detection> Inference::runInference(const cv::Mat &input)
     std::vector<int> class_ids;
     std::vector<int> class_ids;
     std::vector<float> confidences;
     std::vector<float> confidences;
     std::vector<cv::Rect> boxes;
     std::vector<cv::Rect> boxes;
+	std::vector<std::vector<cv::Point>> kpts;
 
 
     for (int i = 0; i < rows; ++i)
     for (int i = 0; i < rows; ++i)
     {
     {
@@ -76,6 +77,16 @@ std::vector<Detection> Inference::runInference(const cv::Mat &input)
                 int width = int(w * x_factor);
                 int width = int(w * x_factor);
                 int height = int(h * y_factor);
                 int height = int(h * y_factor);
 
 
+				int step = 3;
+				std::vector<cv::Point> kps;
+				for (int kpi = 0; kpi < 5; ++kpi) {
+					float kp_x = data[5 + kpi * step];
+					float kp_y = data[5 + kpi * step + 1];
+					cv::Point kp(int(kp_x * x_factor), int(kp_y * y_factor));
+					kps.push_back(kp);
+				}
+				kpts.push_back(kps);
+
                 boxes.push_back(cv::Rect(left, top, width, height));
                 boxes.push_back(cv::Rect(left, top, width, height));
             }
             }
         }
         }
@@ -138,6 +149,7 @@ std::vector<Detection> Inference::runInference(const cv::Mat &input)
 
 
         result.className = classes[result.class_id];
         result.className = classes[result.class_id];
         result.box = boxes[idx];
         result.box = boxes[idx];
+		result.kpts = kpts[idx];
 
 
         detections.push_back(result);
         detections.push_back(result);
     }
     }
@@ -155,6 +167,7 @@ void Inference::loadClassesFromFile()
             classes.push_back(classLine);
             classes.push_back(classLine);
         inputFile.close();
         inputFile.close();
     }
     }
+	classes.push_back(std::string("tea"));
 }
 }
 
 
 void Inference::loadOnnxNetwork()
 void Inference::loadOnnxNetwork()

+ 5 - 1
inference.h

@@ -19,6 +19,7 @@ struct Detection
     float confidence{0.0};
     float confidence{0.0};
     cv::Scalar color{};
     cv::Scalar color{};
     cv::Rect box{};
     cv::Rect box{};
+	std::vector<cv::Point> kpts{};
 };
 };
 
 
 class Inference
 class Inference
@@ -26,7 +27,10 @@ class Inference
 public:
 public:
     Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
     Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
     std::vector<Detection> runInference(const cv::Mat &input);
     std::vector<Detection> runInference(const cv::Mat &input);
-
+	
+	void setModelConfidenseThreshold(float t) { modelConfidenseThreshold = t; };
+	void setModelScoreThreshold(float t) { modelScoreThreshold = t; };
+	void setModelNMSThreshold(float t) { modelNMSThreshold = t; }
 private:
 private:
     void loadClassesFromFile();
     void loadClassesFromFile();
     void loadOnnxNetwork();
     void loadOnnxNetwork();

+ 4 - 3
tcv_conf.yml

@@ -6,8 +6,8 @@ conf_parameters:
    image_save: 1
    image_save: 1
    image_depository: "D:\\logs\\algo_img"
    image_depository: "D:\\logs\\algo_img"
    image_backup_days: 7
    image_backup_days: 7
-   model_path_grab: "D:/projects/graft/py_code/retina_tea5/TeaDetector_grab_20231223061511.onnx"
-   object_threshold_grab: 0.85
+   model_path_grab: "D:/projects/graft/py_code/yolo8_tea/tea_grab/teagrab_20240108_2002/weights/best.onnx"
+   object_threshold_grab: 0.65
    nms_threshold_grab: 1.0000000149011612e-01
    nms_threshold_grab: 1.0000000149011612e-01
    grid_row_grab: 2
    grid_row_grab: 2
    grid_col_grab: 3
    grid_col_grab: 3
@@ -17,7 +17,8 @@ conf_parameters:
    min_area_ratio_grab: 0.0
    min_area_ratio_grab: 0.0
    max_area_ratio_grab: 0.12
    max_area_ratio_grab: 0.12
    rot_degree_grab: 0
    rot_degree_grab: 0
-   model_path_cut: "D:/projects/graft/py_code/retina_tea5/TeaDetector_cut_20231219003541.onnx"
+   # model_path_cut: "D:/projects/graft/py_code/retina_tea5/TeaDetector_cut_20231219003541.onnx"
+   model_path_cut: "D:/projects/graft/py_code/yolo8_tea/tea_cut/teacut_20240106_200/weights/best.onnx"
    object_threshold_cut: 0.8
    object_threshold_cut: 0.8
    nms_threshold_cut: 0.1
    nms_threshold_cut: 0.1
    grid_row_cut: 1
    grid_row_cut: 1

+ 1 - 1
tea_cv_api.cpp

@@ -18,7 +18,7 @@ extern CRITICAL_SECTION g_cs;
 namespace graft_cv
 namespace graft_cv
 {
 {
 
 
-	char *g_version_str = "0.1.25";
+	char *g_version_str = "0.2.0";
 
 
 	//configure
 	//configure
 	string g_conf_file = "./tcv_conf.yml";	
 	string g_conf_file = "./tcv_conf.yml";	

+ 89 - 45
tea_detect.cpp

@@ -281,9 +281,10 @@ namespace graft_cv {
 	//////////////////////////////////////////////////////////////////////////////////
 	//////////////////////////////////////////////////////////////////////////////////
 	//////////////////////////////////////////////////////////////////////////////////
 	//////////////////////////////////////////////////////////////////////////////////
 	YoloDrop::YoloDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
 	YoloDrop::YoloDrop(CGcvLogger* pLogger, float obj_th, float nms_th)
-		:m_model_loaded(false)
+		:m_model_loaded(false),
+		m_pInfer(0),
+		m_runWithCuda(false)
 	{
 	{
-		m_infer = Inference(const std::string &onnxModelPath, const cv::Size2f &modelInputShape, const std::string &classesTxtFile, const bool &runWithCuda = true);
 		BATCH_SIZE = 1;
 		BATCH_SIZE = 1;
 		INPUT_CHANNEL = 3;
 		INPUT_CHANNEL = 3;
 		IMAGE_WIDTH = 640; // default 640
 		IMAGE_WIDTH = 640; // default 640
@@ -307,7 +308,7 @@ namespace graft_cv {
 		m_feature_steps = { 8,16,32 };
 		m_feature_steps = { 8,16,32 };
 		m_pLogger = pLogger;
 		m_pLogger = pLogger;
 
 
-		for (const int step : m_feature_steps) {
+		/*for (const int step : m_feature_steps) {
 			assert(step != 0);
 			assert(step != 0);
 			int feature_map = IMAGE_HEIGHT / step;
 			int feature_map = IMAGE_HEIGHT / step;
 			m_feature_maps.push_back(feature_map);
 			m_feature_maps.push_back(feature_map);
@@ -316,7 +317,7 @@ namespace graft_cv {
 		}
 		}
 		m_anchor_sizes = { { 16,32 } ,{ 64,128 },{ 256, 512 } };
 		m_anchor_sizes = { { 16,32 } ,{ 64,128 },{ 256, 512 } };
 		m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
 		m_sum_of_feature = std::accumulate(m_feature_sizes.begin(), m_feature_sizes.end(), 0) * m_anchor_num;
-		generate_anchors();
+		generate_anchors();*/
 		if (m_pLogger) {
 		if (m_pLogger) {
 			m_pLogger->INFO(string("YoloDrop object initialized"));
 			m_pLogger->INFO(string("YoloDrop object initialized"));
 		}
 		}
@@ -331,15 +332,33 @@ namespace graft_cv {
 	{
 	{
 		this->m_obj_threshold = object_threshold;
 		this->m_obj_threshold = object_threshold;
 		this->m_nms_threshold = nms_threshold;
 		this->m_nms_threshold = nms_threshold;
+		if (m_pInfer) {
+			m_pInfer->setModelNMSThreshold(m_nms_threshold);
+			m_pInfer->setModelScoreThreshold(m_obj_threshold);
+		}
 	}
 	}
 
 
 	bool YoloDrop::LoadModel(std::string onnx_path) {
 	bool YoloDrop::LoadModel(std::string onnx_path) {
+		if (m_pInfer) {
+			delete m_pInfer;
+			m_pInfer = 0;
+			m_model_loaded = false;
+
+		}		
+		cv::Size2f modelInputShape((float)IMAGE_WIDTH, (float)IMAGE_HEIGHT);		
+		
 		if (m_pLogger) {
 		if (m_pLogger) {
 			m_pLogger->INFO(string("Loading detection model: ") + onnx_path);
 			m_pLogger->INFO(string("Loading detection model: ") + onnx_path);
 		}
 		}
 		else { std::cout << "Loading detection model: " << onnx_path << std::endl; }
 		else { std::cout << "Loading detection model: " << onnx_path << std::endl; }
 		try {
 		try {
-			m_model = cv::dnn::readNetFromONNX(onnx_path);
+			m_pInfer = new Inference(onnx_path, modelInputShape, "", m_runWithCuda);
+			if (!m_pInfer) {
+				throw(string("inference init error"));
+			}
+			m_pInfer->setModelNMSThreshold(m_nms_threshold);
+			m_pInfer->setModelScoreThreshold(m_obj_threshold);
+
 			if (m_pLogger) { m_pLogger->INFO(string("Detection model loaded")); }
 			if (m_pLogger) { m_pLogger->INFO(string("Detection model loaded")); }
 			m_model_loaded = true;
 			m_model_loaded = true;
 			return m_model_loaded;
 			return m_model_loaded;
@@ -351,10 +370,10 @@ namespace graft_cv {
 		return false;
 		return false;
 	}
 	}
 
 
-	std::vector<Bbox> YoloDrop::RunModel(cv::Mat& img, CGcvLogger* pInstanceLogger)
+	std::vector<Bbox> YoloDrop::RunModel(cv::Mat& frame, CGcvLogger* pInstanceLogger)
 	{
 	{
 		std::vector<Bbox> result;
 		std::vector<Bbox> result;
-		if (img.empty()) {
+		if (frame.empty()) {
 			if (pInstanceLogger) {
 			if (pInstanceLogger) {
 				pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
 				pInstanceLogger->ERRORINFO(string("RunModel(), input image is empty"));
 			}
 			}
@@ -362,51 +381,76 @@ namespace graft_cv {
 		}
 		}
 		if (!m_model_loaded) {
 		if (!m_model_loaded) {
 			pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
 			pInstanceLogger->ERRORINFO(string("model is NOT loaded"));
+			throw(string("model is NOT loaded"));
 		}
 		}
-		cv::Mat blob = cv::dnn::blobFromImage(
-			img,
-			1.0,
-			m_size_detection,
-			m_img_mean);
-		m_model.setInput(blob);
 
 
-		std::vector<std::string> outNames = m_model.getUnconnectedOutLayersNames();
-		vector<Mat>outputs;// location(1x16800x4), confidence(1x16800x2), keypoint(1x16800x2)
-		if (pInstanceLogger) {
-			pInstanceLogger->INFO(string("RunModel(), before forward()"));
-		}
-		m_model.forward(outputs, outNames);
-		std::vector<YoloDrop::DropRes> rects;
-		int n = post_process(img, outputs, rects);
-		for (const auto& rect : rects) {
-			Bbox box;
-			box.score = rect.confidence;
-			box.x1 = (int)rect.drop_box.x1;
-			box.y1 = (int)rect.drop_box.y1;
-			box.x2 = (int)rect.drop_box.x2;
-			box.y2 = (int)rect.drop_box.y2;
-			box.ppoint[0] = rect.keypoints[0].x;
-			box.ppoint[1] = rect.keypoints[0].y;
-			box.ppoint[2] = rect.keypoints[1].x;
-			box.ppoint[3] = rect.keypoints[1].y;
-			box.ppoint[4] = rect.keypoints[2].x;
-			box.ppoint[5] = rect.keypoints[2].y;
-			box.ppoint[6] = rect.keypoints[3].x;
-			box.ppoint[7] = rect.keypoints[3].y;
-			box.ppoint[8] = rect.keypoints[4].x;
-			box.ppoint[9] = rect.keypoints[4].y;
+		// Inference starts here...
+		std::vector<Detection> output = m_pInfer->runInference(frame);
 
 
-			box.operate_point[0] = 0.0;
-			box.operate_point[1] = 0.0;
-			box.operate_angle = 0.0;
+		int detections = output.size();
+		std::cout << "Number of detections:" << detections << std::endl;
 
 
-			box.area = 0.0;
-			box.status = 0;
-			result.push_back(box);
+		for (int i = 0; i < detections; ++i)
+		{
+			Detection detection = output[i];
+
+			cv::Rect box = detection.box;
+			cv::Scalar color = detection.color;
+			std::vector<cv::Point> pts = detection.kpts;
+
+			Bbox box_out;
+			box_out.score = detection.confidence;
+			box_out.x1 = box.x;
+			box_out.y1 = box.y;
+			box_out.x2 = box.x + box.width;
+			box_out.y2 = box.y + box.height;
+			box_out.ppoint[0] = pts[0].x;
+			box_out.ppoint[1] = pts[0].y;
+			box_out.ppoint[2] = pts[1].x;
+			box_out.ppoint[3] = pts[1].y;
+			box_out.ppoint[4] = pts[2].x;
+			box_out.ppoint[5] = pts[2].y;
+			box_out.ppoint[6] = pts[3].x;
+			box_out.ppoint[7] = pts[3].y;
+			box_out.ppoint[8] = pts[4].x;
+			box_out.ppoint[9] = pts[4].y;
+
+			box_out.operate_point[0] = 0.0;
+			box_out.operate_point[1] = 0.0;
+			box_out.operate_angle = 0.0;
+
+			box_out.area = 0.0;
+			box_out.status = 0;
+			result.push_back(box_out);
+
+
+			//// Detection box
+			//cv::rectangle(frame, box, color, 2);
+
+			//// Detection box text
+			//std::string classString = detection.className + ' ' + std::to_string(detection.confidence).substr(0, 4);
+			//cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
+			//cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);
+
+			//cv::rectangle(frame, textBox, color, cv::FILLED);
+			//cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
+
+			//for (auto& pt : pts) {
+			//	cv::circle(frame, pt, 3, cv::Scalar(0, 0, 255));
+			//}
 		}
 		}
+		// Inference ends here...
+
+		// This is only for preview purposes
+		/*float scale = 0.8;
+		cv::resize(frame, frame, cv::Size(frame.cols*scale, frame.rows*scale));
+		cv::imshow("Inference", frame);
+
+		cv::waitKey(-1);*/
+		
 		if (pInstanceLogger) {
 		if (pInstanceLogger) {
 			stringstream buff;
 			stringstream buff;
-			buff << "detected object: " << n;
+			buff << "detected object: " << detections;
 			pInstanceLogger->INFO(buff.str());
 			pInstanceLogger->INFO(buff.str());
 		}
 		}
 		return result;
 		return result;

+ 2 - 1
tea_detect.h

@@ -97,7 +97,8 @@ namespace graft_cv {
 		void SetThreshold(float object_threshold, float nms_threshold);
 		void SetThreshold(float object_threshold, float nms_threshold);
 
 
 	private:
 	private:
-		Inference m_infer;
+		Inference* m_pInfer;
+		bool m_runWithCuda;
 		void generate_anchors();
 		void generate_anchors();
 		int post_process(
 		int post_process(
 			cv::Mat &vec_Mat,
 			cv::Mat &vec_Mat,

+ 82 - 22
tea_sorter.cpp

@@ -1679,42 +1679,102 @@ double CTeaSort::singleten_ratio(
 Bbox& box
 Bbox& box
 )
 )
 {
 {
-	//计算图片中背景的占有率
-	//padding
-	//扩展box的范围,4个方向全部扩展
-	int x1 = box.x1;
-	int y1 = box.y1;
-	int x2 = box.x2;
-	int y2 = box.y2;
+	//计算苗的方向,找到抓取的位置
+	float x3 = box.ppoint[4];
+	float y3 = box.ppoint[5];
+	float x5 = box.ppoint[8];
+	float y5 = box.ppoint[9];
+	double angle = atan2(x5 - x3, y5 - y3);
+
 	int padding_border = m_cp.offset_grab;
 	int padding_border = m_cp.offset_grab;
-	x1 -= padding_border;
+
+	float grab_x = x3 + (float)padding_border * sin(angle);
+	float grab_y = y3 + (float)padding_border * cos(angle);
+
+	double singleten_ratio = 0.0;
+	if (grab_x < 0 || grab_y <0 || grab_x> m_raw_img.cols - 1 || grab_y >m_raw_img.rows - 1) {
+		return singleten_ratio;
+	}
+
+	int x1 = int(grab_x) - padding_border / 2;
+	int y1 = int(grab_y) - padding_border / 2;
+	int x2 = x1 + padding_border;
+	int y2 = y1 + padding_border;
+
 	x1 = x1 < 0 ? 0 : x1;
 	x1 = x1 < 0 ? 0 : x1;
-	y1 -= padding_border;
 	y1 = y1 < 0 ? 0 : y1;
 	y1 = y1 < 0 ? 0 : y1;
-
-	x2 += padding_border;
 	x2 = x2 < m_raw_img.cols ? x2 : m_raw_img.cols - 1;
 	x2 = x2 < m_raw_img.cols ? x2 : m_raw_img.cols - 1;
-
-	y2 += padding_border;
 	y2 = y2 < m_raw_img.rows ? y2 : m_raw_img.rows - 1;
 	y2 = y2 < m_raw_img.rows ? y2 : m_raw_img.rows - 1;
-	
-	cv::Rect r(x1,y1,x2-x1,y2-y1);
-	
+
+	cv::Rect r(x1, y1, x2 - x1, y2 - y1);
+
+	//debug
+	if (m_cp.image_show) {
+		cv::Mat tmp = m_raw_img.clone();
+		cv::Rect br(box.x1, box.y1, box.x2 - box.x1, box.y2 - box.y1);
+		cv::rectangle(tmp, br, cv::Scalar(0, 0, 200), 2);
+		cv::rectangle(tmp, r, cv::Scalar(0, 100, 0), 2);
+		imshow_wait("box", tmp);
+	}
+
 	cv::Mat roi = m_raw_gray_img(r).clone();
 	cv::Mat roi = m_raw_gray_img(r).clone();
 	cv::Mat bin_img;
 	cv::Mat bin_img;
 	double th = cv::threshold(roi, bin_img, 255, 255, cv::THRESH_OTSU);
 	double th = cv::threshold(roi, bin_img, 255, 255, cv::THRESH_OTSU);
-	
+
+	if (m_cp.image_show) {
+		imshow_wait("box bin_img", bin_img);
+	}
+
+
 	//统计bin_img中非0个数
 	//统计bin_img中非0个数
 	double bg_area = 0;
 	double bg_area = 0;
 	cv::Mat_<uchar>::iterator it = bin_img.begin<uchar>();
 	cv::Mat_<uchar>::iterator it = bin_img.begin<uchar>();
 	cv::Mat_<uchar>::iterator it_end = bin_img.end<uchar>();
 	cv::Mat_<uchar>::iterator it_end = bin_img.end<uchar>();
-	for(;it!=it_end;++it){
-		if((*it)>0){
-			bg_area+=1;
+	for (; it != it_end; ++it) {
+		if ((*it)>0) {
+			bg_area += 1;
 		}
 		}
 	}
 	}
-	double singleten_ratio = bg_area / static_cast<double>(roi.cols * roi.rows);
-	return singleten_ratio;	
+	singleten_ratio = bg_area / static_cast<double>(roi.cols * roi.rows);
+	return singleten_ratio;
+
+
+	////计算图片中背景的占有率
+	////padding
+	////扩展box的范围,4个方向全部扩展
+	//int x1 = box.x1;
+	//int y1 = box.y1;
+	//int x2 = box.x2;
+	//int y2 = box.y2;
+	//int padding_border = m_cp.offset_grab;
+	//x1 -= padding_border;
+	//x1 = x1 < 0 ? 0 : x1;
+	//y1 -= padding_border;
+	//y1 = y1 < 0 ? 0 : y1;
+
+	//x2 += padding_border;
+	//x2 = x2 < m_raw_img.cols ? x2 : m_raw_img.cols - 1;
+
+	//y2 += padding_border;
+	//y2 = y2 < m_raw_img.rows ? y2 : m_raw_img.rows - 1;
+	//
+	//cv::Rect r(x1,y1,x2-x1,y2-y1);
+	//
+	//cv::Mat roi = m_raw_gray_img(r).clone();
+	//cv::Mat bin_img;
+	//double th = cv::threshold(roi, bin_img, 255, 255, cv::THRESH_OTSU);
+	//
+	////统计bin_img中非0个数
+	//double bg_area = 0;
+	//cv::Mat_<uchar>::iterator it = bin_img.begin<uchar>();
+	//cv::Mat_<uchar>::iterator it_end = bin_img.end<uchar>();
+	//for(;it!=it_end;++it){
+	//	if((*it)>0){
+	//		bg_area+=1;
+	//	}
+	//}
+	//double singleten_ratio = bg_area / static_cast<double>(roi.cols * roi.rows);
+	//return singleten_ratio;	
 }
 }
 double CTeaSort::direction_ratio(
 double CTeaSort::direction_ratio(
 	Bbox& box
 	Bbox& box