InferNetOnnxPaddleOcrDetect.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. #include"InferNetOnnxPaddleOcrDetect.h"
  2. InferNetOnnxPaddleOcrDetect::InferNetOnnxPaddleOcrDetect()
  3. {
  4. }
  5. void InferNetOnnxPaddleOcrDetect::LoadNetwork(const void* modelData, size_t modelDataLen)
  6. {
  7. if (_modelLoaded)
  8. {
  9. // 如果模型已加载,则释放之前的模型
  10. delete net;
  11. net = nullptr;
  12. }
  13. this->binaryThreshold = 0.3;
  14. this->polygonThreshold = 0.5;
  15. this->unclipRatio = 1.6;
  16. this->maxCandidates = 1000;
  17. sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
  18. net = new Session(env, modelData, modelDataLen, sessionOptions);
  19. size_t numInputNodes = net->GetInputCount();
  20. size_t numOutputNodes = net->GetOutputCount();
  21. AllocatorWithDefaultOptions allocator;
  22. for (int i = 0; i < numInputNodes; i++)
  23. {
  24. inputNames.push_back(net->GetInputName(i, allocator));
  25. }
  26. for (int i = 0; i < numOutputNodes; i++)
  27. {
  28. outputNames.push_back(net->GetOutputName(i, allocator));
  29. }
  30. _modelLoaded = true;
  31. }
  32. std::vector<TextBlock> InferNetOnnxPaddleOcrDetect::Process(cv::Mat& srcimg)
  33. { // 对图像预处理
  34. //cv::Mat dstimg = this->preprocess(srcimg);
  35. cv::Mat dstimg = srcimg.clone();
  36. this->normalize_(dstimg);
  37. // 创建用于存储输入形状的数组
  38. array<int64_t, 4> input_shape_{ 1, 3, dstimg.rows, dstimg.cols };
  39. // 创建CPU内存的分配器信息
  40. auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
  41. // 创建输入张量
  42. Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
  43. // 运行ONNX模型获取输出
  44. vector<Value> ort_outputs = net->Run(RunOptions{ nullptr }, &inputNames[0], &input_tensor_, 1, outputNames.data(), outputNames.size());
  45. const float* floatArray = ort_outputs[0].GetTensorMutableData<float>();
  46. int outputCount = 1;
  47. for (int i = 0; i < ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().size(); i++)
  48. {
  49. int dim = ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().at(i);
  50. outputCount *= dim;
  51. }
  52. Mat binary(dstimg.rows, dstimg.cols, CV_32FC1);
  53. memcpy(binary.data, floatArray, outputCount * sizeof(float));
  54. // 输出结果提取box
  55. std::vector<TextBlock> results;
  56. results = GetTextBoxes(binary, srcimg);
  57. return results;
  58. }
  59. // 该函数用于对输入图像进行预处理,包括颜色空间转换和图像缩放
  60. cv::Mat InferNetOnnxPaddleOcrDetect::preprocess(cv::Mat srcimg)
  61. {
  62. cv::Mat dstimg = srcimg.clone();
  63. //cv::Mat dstimg;
  64. int h = srcimg.rows;
  65. int w = srcimg.cols;
  66. // 初始化高度和宽度的缩放比例
  67. float scale_h = 1;
  68. float scale_w = 1;
  69. // 根据图像的高度和宽度选择缩放比例
  70. if (h < w)
  71. {
  72. // 如果图像高度小于宽度 计算高度缩放比例
  73. scale_h = (float)this->shortSize / (float)h;
  74. float tar_w = (float)w * scale_h;
  75. tar_w = tar_w - (int)tar_w % 32;
  76. tar_w = max((float)32, tar_w);
  77. scale_w = tar_w / (float)w;
  78. }
  79. else
  80. {
  81. // 如果图像宽度小于等于高度 计算宽度缩放比例
  82. scale_w = (float)this->shortSize / (float)w;
  83. float tar_h = (float)h * scale_w;
  84. tar_h = tar_h - (int)tar_h % 32;
  85. tar_h = max((float)32, tar_h);
  86. scale_h = tar_h / (float)h;
  87. }
  88. // 使用线性插值对图像进行缩放,以调整到目标尺寸
  89. resize(dstimg, dstimg, Size(int(scale_w * dstimg.cols), int(scale_h * dstimg.rows)), INTER_LINEAR);
  90. return dstimg;
  91. }
  92. void InferNetOnnxPaddleOcrDetect::normalize_(cv::Mat img)
  93. {
  94. // img.convertTo(img, CV_32F);
  95. int row = img.rows;
  96. int col = img.cols;
  97. this->input_image_.resize(row * col * img.channels());
  98. for (int c = 0; c < 3; c++)
  99. {
  100. for (int i = 0; i < row; i++)
  101. {
  102. for (int j = 0; j < col; j++)
  103. {
  104. float pix = img.ptr<uchar>(i)[j * 3 + c];
  105. this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - this->meanValues[c]) / this->normValues[c];
  106. }
  107. }
  108. }
  109. }
  110. Point2f InferNetOnnxPaddleOcrDetect::pointCenBoxs(vector<Point2f> polygonBoxs)
  111. {
  112. // 计算中心点坐标
  113. Point2f center(0, 0); // 初始化中心点坐标
  114. // 遍历四个点,累加坐标
  115. for (const auto& point : polygonBoxs) {
  116. center.x += point.x;
  117. center.y += point.y;
  118. }
  119. // 计算平均值
  120. center.x /= polygonBoxs.size();
  121. center.y /= polygonBoxs.size();
  122. return center;
  123. }
  124. std::vector<TextBlock> InferNetOnnxPaddleOcrDetect::GetTextBoxes(cv::Mat& binaryIN, cv::Mat& srcimgIN)
  125. {
  126. // 获取图像的高度和宽度
  127. int h = srcimgIN.rows;
  128. int w = srcimgIN.cols;
  129. // 二值化处理
  130. Mat bitmap;
  131. threshold(binaryIN, bitmap, binaryThreshold, 255, THRESH_BINARY);
  132. //// 计算图像缩放比例
  133. float scaleHeight = (float)(h) / (float)(binaryIN.size[0]);
  134. float scaleWidth = (float)(w) / (float)(binaryIN.size[1]);
  135. // 寻找轮廓
  136. vector< vector<Point> > contours;
  137. bitmap.convertTo(bitmap, CV_8UC1);
  138. findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
  139. // 限制候选框的数量
  140. size_t numCandidate = min(contours.size(), (size_t)(maxCandidates > 0 ? maxCandidates : INT_MAX));
  141. vector<float> confidences;
  142. //vector< vector<Point2f> > rsBoxes;
  143. std::vector<TextBlock> rsBoxes;
  144. // 遍历每个候选框
  145. for (size_t i = 0; i < numCandidate; i++)
  146. {
  147. vector<Point>& contour = contours[i];
  148. // 计算文本轮廓分数
  149. float score = contourScore(binaryIN, contour);
  150. float boxScore = 0.0f;
  151. if (score < polygonThreshold) {
  152. boxScore = score;
  153. continue;
  154. }
  155. //// 对轮廓进行缩放
  156. vector<Point> contourScaled; contourScaled.reserve(contour.size());
  157. for (size_t j = 0; j < contour.size(); j++)
  158. {
  159. contourScaled.push_back(Point(int(contour[j].x * scaleWidth),
  160. int(contour[j].y * scaleHeight)));
  161. }
  162. // 检查坐标是否有效
  163. bool coordinatesValid = true;
  164. for (size_t j = 0; j < contourScaled.size(); j++) {
  165. if (contourScaled[j].x < 0 || contourScaled[j].y < 0 ||
  166. contourScaled[j].x >= w || contourScaled[j].y >= h) {
  167. coordinatesValid = false;
  168. break;
  169. }
  170. }
  171. // 如果坐标有效,则处理该结果
  172. if (coordinatesValid)
  173. {
  174. TextBlock detectedBox;
  175. // 解除裁剪
  176. RotatedRect box = minAreaRect(contourScaled);
  177. float longSide = std::max(box.size.width, box.size.height);
  178. if (longSide < longSideThresh)
  179. {
  180. continue;
  181. }
  182. // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width
  183. const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property
  184. bool swap_size = false;
  185. if (box.size.width < box.size.height) // horizontal-wide text area is expected
  186. swap_size = true;
  187. else if (fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles
  188. swap_size = true;
  189. if (swap_size)
  190. {
  191. swap(box.size.width, box.size.height);
  192. if (box.angle < 0)
  193. box.angle += 90;
  194. else if (box.angle > 0)
  195. box.angle -= 90;
  196. }
  197. Point2f vertex[4];
  198. box.points(vertex); // order: bl, tl, tr, br
  199. vector<Point2f> approx;
  200. for (int j = 0; j < 4; j++)
  201. approx.emplace_back(vertex[j]);
  202. vector<Point2f> polygon;
  203. unclip(approx, polygon);
  204. box = minAreaRect(polygon);
  205. longSide = std::max(box.size.width, box.size.height);
  206. if (longSide < longSideThresh + 2)
  207. {
  208. continue;
  209. }
  210. if (std::all_of(polygon.begin(), polygon.end(), [w, h](const Point2f& p) {
  211. return p.x >= 0 && p.x <= w && p.y >= 0 && p.y <= h;
  212. }))
  213. {
  214. Point2f centPoint = pointCenBoxs(polygon);
  215. detectedBox.boxVertices = polygon;
  216. detectedBox.boxCenterVer = centPoint;
  217. detectedBox.angle = box.angle;
  218. detectedBox.boxScore = boxScore;
  219. rsBoxes.push_back(detectedBox);
  220. }
  221. }
  222. }
  223. confidences = vector<float>(contours.size(), 1.0f);
  224. // 对 results 进行倒序处理
  225. std::reverse(rsBoxes.begin(), rsBoxes.end());
  226. return rsBoxes;
  227. }
  228. std::vector< std::vector<Point2f> > InferNetOnnxPaddleOcrDetect::order_points_clockwise(std::vector< std::vector<Point2f> > results)
  229. {
  230. std::vector< std::vector<Point2f> > order_points(results);
  231. for (int i = 0; i < results.size(); i++)
  232. {
  233. float max_sum_pts = -10000;
  234. float min_sum_pts = 10000;
  235. float max_diff_pts = -10000;
  236. float min_diff_pts = 10000;
  237. int max_sum_pts_id = 0;
  238. int min_sum_pts_id = 0;
  239. int max_diff_pts_id = 0;
  240. int min_diff_pts_id = 0;
  241. for (int j = 0; j < 4; j++)
  242. {
  243. const float sum_pt = results[i][j].x + results[i][j].y;
  244. if (sum_pt > max_sum_pts)
  245. {
  246. max_sum_pts = sum_pt;
  247. max_sum_pts_id = j;
  248. }
  249. if (sum_pt < min_sum_pts)
  250. {
  251. min_sum_pts = sum_pt;
  252. min_sum_pts_id = j;
  253. }
  254. const float diff_pt = results[i][j].y - results[i][j].x;
  255. if (diff_pt > max_diff_pts)
  256. {
  257. max_diff_pts = diff_pt;
  258. max_diff_pts_id = j;
  259. }
  260. if (diff_pt < min_diff_pts)
  261. {
  262. min_diff_pts = diff_pt;
  263. min_diff_pts_id = j;
  264. }
  265. }
  266. order_points[i][0].x = results[i][min_sum_pts_id].x;
  267. order_points[i][0].y = results[i][min_sum_pts_id].y;
  268. order_points[i][2].x = results[i][max_sum_pts_id].x;
  269. order_points[i][2].y = results[i][max_sum_pts_id].y;
  270. order_points[i][1].x = results[i][min_diff_pts_id].x;
  271. order_points[i][1].y = results[i][min_diff_pts_id].y;
  272. order_points[i][3].x = results[i][max_diff_pts_id].x;
  273. order_points[i][3].y = results[i][max_diff_pts_id].y;
  274. }
  275. return order_points;
  276. }
  277. void InferNetOnnxPaddleOcrDetect::drawPred(cv::Mat& srcimg, std::vector< std::vector<Point2f> > results)
  278. {
  279. for (int i = 0; i < results.size(); i++)
  280. {
  281. for (int j = 0; j < 4; j++)
  282. {
  283. circle(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), 2, Scalar(0, 0, 255), -1);
  284. if (j < 3)
  285. {
  286. line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][j + 1].x, (int)results[i][j + 1].y), Scalar(0, 255, 0));
  287. }
  288. else
  289. {
  290. line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][0].x, (int)results[i][0].y), Scalar(0, 255, 0));
  291. }
  292. }
  293. }
  294. }
  295. // 该函数计算二进制图像中指定轮廓的分数
  296. float InferNetOnnxPaddleOcrDetect::contourScore(cv::Mat& binary, std::vector<Point>& contour)
  297. {
  298. // 计算轮廓的边界矩形
  299. Rect rect = boundingRect(contour);
  300. // 计算边界框在二进制图像中的有效范围
  301. int xmin = max(rect.x, 0);
  302. int xmax = min(rect.x + rect.width, binary.cols - 1);
  303. int ymin = max(rect.y, 0);
  304. int ymax = min(rect.y + rect.height, binary.rows - 1);
  305. // 提取二进制图像中边界框的ROI(感兴趣区域)
  306. cv::Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
  307. // 创建一个掩码,用于标识ROI中的像素
  308. cv::Mat mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U);
  309. // 将轮廓中的点坐标调整为ROI内的坐标
  310. std::vector<Point> roiContour;
  311. for (size_t i = 0; i < contour.size(); i++) {
  312. Point pt = Point(contour[i].x - xmin, contour[i].y - ymin);
  313. roiContour.push_back(pt);
  314. }
  315. // 使用填充多边形函数将ROI内的轮廓标记为1
  316. std::vector<std::vector<Point>> roiContours = { roiContour };
  317. fillPoly(mask, roiContours, Scalar(1));
  318. // 计算ROI内二进制图像的均值,以掩码为权重
  319. float score = mean(binROI, mask).val[0];
  320. return score;
  321. }
  322. void InferNetOnnxPaddleOcrDetect::unclip(std::vector<Point2f>& inPoly, std::vector<Point2f>& outPoly)
  323. {
  324. // 计算轮廓的面积
  325. float area = contourArea(inPoly);
  326. float length = arcLength(inPoly, true); // 计算轮廓的周长
  327. float distance = area * unclipRatio / length; // 计算解剪距离
  328. // 获取输入轮廓的点数
  329. size_t numPoints = inPoly.size();
  330. // 存储新的轮廓线段
  331. std::vector<std::vector<Point2f>> newLines;
  332. // 遍历原始轮廓的每个点
  333. for (size_t i = 0; i < numPoints; i++)
  334. {
  335. std::vector<Point2f> newLine;
  336. Point pt1 = inPoly[i];
  337. Point pt2 = inPoly[(i - 1) % numPoints];
  338. Point vec = pt1 - pt2;
  339. // 计算解剪距离
  340. float unclipDis = (float)(distance / norm(vec));
  341. // 计算旋转后的向量
  342. Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis);
  343. // 添加旋转后的点到新线段
  344. newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y));
  345. newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y));
  346. newLines.push_back(newLine);
  347. }
  348. // 获取新线段的数量
  349. size_t numLines = newLines.size();
  350. // 遍历新线段集合
  351. for (size_t i = 0; i < numLines; i++)
  352. {
  353. Point2f a = newLines[i][0];
  354. Point2f b = newLines[i][1];
  355. Point2f c = newLines[(i + 1) % numLines][0];
  356. Point2f d = newLines[(i + 1) % numLines][1];
  357. Point2f pt;
  358. // 计算两向量的夹角余弦值
  359. Point2f v1 = b - a;
  360. Point2f v2 = d - c;
  361. float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2));
  362. // 根据夹角余弦值判断旋转后的点位置
  363. if (fabs(cosAngle) > 0.7)
  364. {
  365. pt.x = (b.x + c.x) * 0.5;
  366. pt.y = (b.y + c.y) * 0.5;
  367. }
  368. else
  369. {
  370. float denom = a.x * (float)(d.y - c.y) + b.x * (float)(c.y - d.y) +
  371. d.x * (float)(b.y - a.y) + c.x * (float)(a.y - b.y);
  372. float num = a.x * (float)(d.y - c.y) + c.x * (float)(a.y - d.y) + d.x * (float)(c.y - a.y);
  373. float s = num / denom;
  374. pt.x = a.x + s * (b.x - a.x);
  375. pt.y = a.y + s * (b.y - a.y);
  376. }
  377. // 将计算得到的点添加到输出轮廓
  378. outPoly.push_back(pt);
  379. }
  380. }
  381. cv::Mat InferNetOnnxPaddleOcrDetect::getRotateCropImage(cv::Mat& frame, std::vector<Point2f> vertices)
  382. {
  383. // 计算包围轮廓的最小矩形
  384. Rect rect = boundingRect(cv::Mat(vertices));
  385. // 从原始图像中提取感兴趣区域(ROI)
  386. cv::Mat crop_img = frame(rect);
  387. // 设置输出图像的大小为矩形的宽度和高度
  388. const Size outputSize = Size(rect.width, rect.height);
  389. // 定义目标矩形的四个顶点坐标
  390. std::vector<Point2f> targetVertices{ Point2f(0, outputSize.height), Point2f(0, 0), Point2f(outputSize.width, 0), Point2f(outputSize.width, outputSize.height) };
  391. // 将原始轮廓的顶点坐标调整为在裁剪后的图像中的坐标
  392. for (int i = 0; i < 4; i++)
  393. {
  394. vertices[i].x -= rect.x;
  395. vertices[i].y -= rect.y;
  396. }
  397. // 计算透视变换矩阵,将原始轮廓映射到目标矩形
  398. cv::Mat rotationMatrix = cv::getPerspectiveTransform(vertices, targetVertices);
  399. // 应用透视变换,旋转和裁剪原始图像的感兴趣区域
  400. cv::Mat result;
  401. cv::warpPerspective(crop_img, result, rotationMatrix, outputSize, cv::BORDER_REPLICATE);
  402. return result;
  403. }
  404. void InferNetOnnxPaddleOcrDetect::Dispose()
  405. {
  406. // 在此处释放资源,确保在对象销毁时调用
  407. //delete net;
  408. //net = nullptr;
  409. }