ocr.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "layer.h"
  15. #include "net.h"
  16. #include <opencv2/core/core.hpp>
  17. #include <opencv2/highgui/highgui.hpp>
  18. #include <opencv2/imgproc/imgproc.hpp>
  19. #include <cfloat>
  20. #include <vector>
  21. #include <opencv2/imgproc.hpp>
  22. #include <iostream>
  23. #include <fstream>
  24. #include "math.h"
  25. #include <locale>
  26. #include <codecvt>
  27. #include "android/log.h"
  28. #include <datareader.h>
  29. using namespace cv;
  30. using namespace std;
  31. std::vector<int> prebLabel;
  32. std::vector<string> alphabet;
  33. struct TextBlock
  34. {
  35. std::vector<Point2f> boxVertices; // 矩形框的顶点坐标
  36. float angle; // 矩形框的角度
  37. float boxScore;
  38. float blockTime;
  39. };
  40. // 该函数计算二进制图像中指定轮廓的分数
  41. float contourScore(cv::Mat& binary, std::vector<Point>& contour)
  42. {
  43. // 计算轮廓的边界矩形
  44. Rect rect = boundingRect(contour);
  45. // 计算边界框在二进制图像中的有效范围
  46. int xmin = max(rect.x, 0);
  47. int xmax = min(rect.x + rect.width, binary.cols - 1);
  48. int ymin = max(rect.y, 0);
  49. int ymax = min(rect.y + rect.height, binary.rows - 1);
  50. // 提取二进制图像中边界框的ROI(感兴趣区域)
  51. cv::Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
  52. // 创建一个掩码,用于标识ROI中的像素
  53. cv::Mat mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U);
  54. // 将轮廓中的点坐标调整为ROI内的坐标
  55. std::vector<Point> roiContour;
  56. for (size_t i = 0; i < contour.size(); i++) {
  57. Point pt = Point(contour[i].x - xmin, contour[i].y - ymin);
  58. roiContour.push_back(pt);
  59. }
  60. // 使用填充多边形函数将ROI内的轮廓标记为1
  61. std::vector<std::vector<Point>> roiContours = { roiContour };
  62. fillPoly(mask, roiContours, Scalar(1));
  63. // 计算ROI内二进制图像的均值,以掩码为权重
  64. float score = mean(binROI, mask).val[0];
  65. return score;
  66. }
  67. void unclip(std::vector<Point2f>& inPoly, std::vector<Point2f>& outPoly)
  68. {
  69. float unclipRatio = 1.6;
  70. // 计算轮廓的面积
  71. float area = contourArea(inPoly);
  72. float length = arcLength(inPoly, true); // 计算轮廓的周长
  73. float distance = area * unclipRatio / length; // 计算解剪距离
  74. // 获取输入轮廓的点数
  75. size_t numPoints = inPoly.size();
  76. // 存储新的轮廓线段
  77. std::vector<std::vector<Point2f>> newLines;
  78. // 遍历原始轮廓的每个点
  79. for (size_t i = 0; i < numPoints; i++)
  80. {
  81. std::vector<Point2f> newLine;
  82. Point pt1 = inPoly[i];
  83. Point pt2 = inPoly[(i - 1) % numPoints];
  84. Point vec = pt1 - pt2;
  85. // 计算解剪距离
  86. float unclipDis = (float)(distance / norm(vec));
  87. // 计算旋转后的向量
  88. Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis);
  89. // 添加旋转后的点到新线段
  90. newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y));
  91. newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y));
  92. newLines.push_back(newLine);
  93. }
  94. // 获取新线段的数量
  95. size_t numLines = newLines.size();
  96. // 遍历新线段集合
  97. for (size_t i = 0; i < numLines; i++)
  98. {
  99. Point2f a = newLines[i][0];
  100. Point2f b = newLines[i][1];
  101. Point2f c = newLines[(i + 1) % numLines][0];
  102. Point2f d = newLines[(i + 1) % numLines][1];
  103. Point2f pt;
  104. // 计算两向量的夹角余弦值
  105. Point2f v1 = b - a;
  106. Point2f v2 = d - c;
  107. float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2));
  108. // 根据夹角余弦值判断旋转后的点位置
  109. if (fabs(cosAngle) > 0.7)
  110. {
  111. pt.x = (b.x + c.x) * 0.5;
  112. pt.y = (b.y + c.y) * 0.5;
  113. }
  114. else
  115. {
  116. float denom = a.x * (float)(d.y - c.y) + b.x * (float)(c.y - d.y) +
  117. d.x * (float)(b.y - a.y) + c.x * (float)(a.y - b.y);
  118. float num = a.x * (float)(d.y - c.y) + c.x * (float)(a.y - d.y) + d.x * (float)(c.y - a.y);
  119. float s = num / denom;
  120. pt.x = a.x + s * (b.x - a.x);
  121. pt.y = a.y + s * (b.y - a.y);
  122. }
  123. // 将计算得到的点添加到输出轮廓
  124. outPoly.push_back(pt);
  125. }
  126. }
  127. void GetTextBoxes(cv::Mat& binaryIN, cv::Mat& srcimgIN, std::vector<TextBlock> &rsBoxes)
  128. {
  129. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "begin get boxes");
  130. float binaryThreshold = 0.3f;
  131. int maxCandidates = 1000;
  132. float polygonThreshold = 0.5;
  133. int longSideThresh = 3;//minBox 长边门限
  134. // 获取图像的高度和宽度
  135. int h = srcimgIN.rows;
  136. int w = srcimgIN.cols;
  137. // 二值化处理
  138. Mat bitmap;
  139. threshold(binaryIN, bitmap, binaryThreshold, 255, THRESH_BINARY);
  140. //// 计算图像缩放比例
  141. float scaleHeight = (float)(h) / (float)(binaryIN.size[0]);
  142. float scaleWidth = (float)(w) / (float)(binaryIN.size[1]);
  143. // 寻找轮廓
  144. vector< vector<Point> > contours;
  145. bitmap.convertTo(bitmap, CV_8UC1);
  146. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "3 %d %d %d", bitmap.rows, bitmap.cols, bitmap.channels());
  147. findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
  148. // 限制候选框的数量
  149. size_t numCandidate = min(contours.size(), (size_t)(maxCandidates > 0 ? maxCandidates : INT_MAX));
  150. vector<float> confidences;
  151. // 遍历每个候选框
  152. for (size_t i = 0; i < numCandidate; i++)
  153. {
  154. vector<Point>& contour = contours[i];
  155. // 计算文本轮廓分数
  156. float score = contourScore(binaryIN, contour);
  157. float boxScore = 0.0f;
  158. if (score < polygonThreshold) {
  159. boxScore = score;
  160. continue;
  161. }
  162. // 对轮廓进行缩放
  163. vector<Point> contourScaled; contourScaled.reserve(contour.size());
  164. for (size_t j = 0; j < contour.size(); j++)
  165. {
  166. contourScaled.push_back(Point(int(contour[j].x * scaleWidth),
  167. int(contour[j].y * scaleHeight)));
  168. }
  169. // 检查坐标是否有效
  170. bool coordinatesValid = true;
  171. for (size_t j = 0; j < contourScaled.size(); j++) {
  172. if (contourScaled[j].x < 0 || contourScaled[j].y < 0 ||
  173. contourScaled[j].x >= w || contourScaled[j].y >= h) {
  174. coordinatesValid = false;
  175. break;
  176. }
  177. }
  178. // 如果坐标有效,则处理该结果
  179. if (coordinatesValid)
  180. {
  181. TextBlock detectedBox;
  182. // 解除裁剪
  183. RotatedRect box = minAreaRect(contourScaled);
  184. float longSide = std::max(box.size.width, box.size.height);
  185. if (longSide < longSideThresh)
  186. {
  187. continue;
  188. }
  189. // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width
  190. const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property
  191. bool swap_size = false;
  192. if (box.size.width < box.size.height) // horizontal-wide text area is expected
  193. swap_size = true;
  194. else if (fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles
  195. swap_size = true;
  196. if (swap_size)
  197. {
  198. swap(box.size.width, box.size.height);
  199. if (box.angle < 0)
  200. box.angle += 90;
  201. else if (box.angle > 0)
  202. box.angle -= 90;
  203. }
  204. Point2f vertex[4];
  205. box.points(vertex); // order: bl, tl, tr, br
  206. vector<Point2f> approx;
  207. for (int j = 0; j < 4; j++)
  208. approx.emplace_back(vertex[j]);
  209. vector<Point2f> polygon;
  210. unclip(approx, polygon);
  211. box = minAreaRect(polygon);
  212. longSide = std::max(box.size.width, box.size.height);
  213. if (longSide < longSideThresh + 2)
  214. {
  215. continue;
  216. }
  217. detectedBox.boxVertices = polygon;
  218. detectedBox.angle = box.angle;
  219. detectedBox.boxScore = boxScore;
  220. rsBoxes.push_back(detectedBox);
  221. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "box is (%f %f) (%f %f) (%f %f) (%f %f)", detectedBox.boxVertices[0].x, detectedBox.boxVertices[0].y,
  222. detectedBox.boxVertices[1].x, detectedBox.boxVertices[1].y,
  223. detectedBox.boxVertices[2].x, detectedBox.boxVertices[2].y,
  224. detectedBox.boxVertices[3].x, detectedBox.boxVertices[3].y);
  225. }
  226. }
  227. confidences = vector<float>(contours.size(), 1.0f);
  228. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "finish run detect %d", rsBoxes.size());
  229. // 对 results 进行倒序处理
  230. std::reverse(rsBoxes.begin(), rsBoxes.end());
  231. }
  232. // 该函数用于对输入图像进行预处理,包括颜色空间转换和图像缩放
  233. cv::Mat preprocess(cv::Mat srcimg)
  234. {
  235. int shortSize = 736;
  236. cv::Mat dstimg;
  237. // 将输入图像从BGR颜色空间转换为RGB颜色空间
  238. cvtColor(srcimg, dstimg, COLOR_BGR2RGB);
  239. int h = srcimg.rows;
  240. int w = srcimg.cols;
  241. // 初始化高度和宽度的缩放比例
  242. float scale_h = 1;
  243. float scale_w = 1;
  244. // 根据图像的高度和宽度选择缩放比例
  245. if (h < w)
  246. {
  247. // 如果图像高度小于宽度 计算高度缩放比例
  248. scale_h = (float)shortSize / (float)h;
  249. float tar_w = (float)w * scale_h;
  250. tar_w = tar_w - (int)tar_w % 32;
  251. tar_w = max((float)32, tar_w);
  252. scale_w = tar_w / (float)w;
  253. }
  254. else
  255. {
  256. // 如果图像宽度小于等于高度 计算宽度缩放比例
  257. scale_w = (float)shortSize / (float)w;
  258. float tar_h = (float)h * scale_w;
  259. tar_h = tar_h - (int)tar_h % 32;
  260. tar_h = max((float)32, tar_h);
  261. scale_h = tar_h / (float)h;
  262. }
  263. // 使用线性插值对图像进行缩放,以调整到目标尺寸
  264. resize(dstimg, dstimg, Size(int(scale_w * dstimg.cols), int(scale_h * dstimg.rows)), INTER_LINEAR);
  265. return dstimg;
  266. }
  267. std::vector<float> normalize_(cv::Mat img)
  268. {
  269. float meanValues[3] = { 0.485, 0.456, 0.406 };
  270. float normValues[3] = { 0.229, 0.224, 0.225 };
  271. std::vector<float> input_image;
  272. // img.convertTo(img, CV_32F);
  273. int row = img.rows;
  274. int col = img.cols;
  275. input_image.resize(row * col * img.channels());
  276. for (int c = 0; c < 3; c++)
  277. {
  278. for (int i = 0; i < row; i++)
  279. {
  280. for (int j = 0; j < col; j++)
  281. {
  282. float pix = img.ptr<uchar>(i)[j * 3 + c];
  283. input_image[c * row * col + i * col + j] = (pix / 255.0 - meanValues[c]) / normValues[c];
  284. }
  285. }
  286. }
  287. return input_image;
  288. }
  289. cv::Mat getRotateCropImage(cv::Mat& frame, std::vector<Point2f> vertices)
  290. {
  291. // 计算包围轮廓的最小矩形
  292. Rect rect = boundingRect(cv::Mat(vertices));
  293. // 从原始图像中提取感兴趣区域(ROI)
  294. cv::Mat crop_img = frame(rect);
  295. // 设置输出图像的大小为矩形的宽度和高度
  296. const Size outputSize = Size(rect.width, rect.height);
  297. // 定义目标矩形的四个顶点坐标
  298. std::vector<Point2f> targetVertices{ Point2f(0, outputSize.height), Point2f(0, 0), Point2f(outputSize.width, 0), Point2f(outputSize.width, outputSize.height) };
  299. // 将原始轮廓的顶点坐标调整为在裁剪后的图像中的坐标
  300. for (int i = 0; i < 4; i++)
  301. {
  302. vertices[i].x -= rect.x;
  303. vertices[i].y -= rect.y;
  304. }
  305. // 计算透视变换矩阵,将原始轮廓映射到目标矩形
  306. //Mat rotationcv::Matrix = getPerspectiveTransform(vertices, targetVertices);
  307. cv::Mat rotationMatrix = getPerspectiveTransform(vertices, targetVertices);
  308. // 应用透视变换,旋转和裁剪原始图像的感兴趣区域
  309. Mat result;
  310. //warpPerspective(crop_img, result, rotationcv::Matrix, outputSize, cv::BORDER_REPLICATE);
  311. warpPerspective(crop_img, result, rotationMatrix, outputSize, cv::BORDER_REPLICATE);
  312. return result;
  313. }
  314. string PostProcess(int wIn, int hIn, float* pdataIn)
  315. {
  316. int i = 0, j = 0;
  317. // 遍历输出,获取每列的最大值的索引作为标签
  318. for (i = 0; i < wIn; i++)
  319. {
  320. int one_label_idx = 0;
  321. float max_data = -10000;
  322. for (j = 0; j < hIn; j++)
  323. {
  324. float data_ = pdataIn[i * hIn + j];
  325. if (data_ > max_data)
  326. {
  327. max_data = data_;
  328. one_label_idx = j;
  329. }
  330. }
  331. prebLabel[i] = one_label_idx;
  332. }
  333. // 存储去重后的非空白标签
  334. std::vector<int> no_repeat_blank_label;
  335. for (size_t elementIndex = 0; elementIndex < wIn; ++elementIndex)
  336. {
  337. if (prebLabel[elementIndex] != 0 && !(elementIndex > 0 && prebLabel[elementIndex - 1] == prebLabel[elementIndex]))
  338. {
  339. no_repeat_blank_label.push_back(prebLabel[elementIndex] - 1);
  340. }
  341. }
  342. // 构建最终的预测文本
  343. int len_s = no_repeat_blank_label.size();
  344. std::string plate_text;
  345. // todo
  346. for (i = 0; i < len_s; i++)
  347. {
  348. plate_text += alphabet[no_repeat_blank_label[i]];
  349. }
  350. return plate_text;
  351. }
  352. int inpWidth = 320;
  353. int inpHeight = 48;
  354. //cv::Mat preprocessRec(cv::Mat srcimg)
  355. //{
  356. // cv::Mat dstimg;
  357. // int h = srcimg.rows;
  358. // int w = srcimg.cols;
  359. // const float ratio = w / float(h);
  360. // int resized_w = int(ceil((float)inpHeight * ratio));
  361. // if (ceil(inpHeight * ratio) > inpWidth)
  362. // {
  363. // resized_w = inpWidth;
  364. // }
  365. //
  366. // resize(srcimg, dstimg, Size(resized_w, inpHeight), INTER_LINEAR);
  367. // return dstimg;
  368. //}
  369. ////std::vector<float> normalizeRec(cv::Mat img)
  370. ////{
  371. ////
  372. //// std::vector<float> input_image;
  373. //// // img.convertTo(img, CV_32F);
  374. //// int row = img.rows;
  375. //// int col = img.cols;
  376. //// input_image.resize(inpHeight * inpWidth * img.channels());
  377. //// for (int c = 0; c < 3; c++)
  378. //// {
  379. //// for (int i = 0; i < row; i++)
  380. //// {
  381. //// for (int j = 0; j < inpWidth; j++)
  382. //// {
  383. //// if (j < col)
  384. //// {
  385. //// float pix = img.ptr<uchar>(i)[j * 3 + c];
  386. //// //input_image[c * row * inpWidth + i * inpWidth + j] = (pix / 255.0 - 0.5) / 0.5;
  387. //// input_image[c * row * inpWidth + i * inpWidth + j] = (pix);
  388. //// }
  389. //// else
  390. //// {
  391. //// input_image[c * row * inpWidth + i * inpWidth + j] = 0;
  392. //// }
  393. //// }
  394. //// }
  395. //// }
  396. //// return input_image;
  397. ////}
  398. //struct TextBox {
  399. // std::vector<cv::Point> boxPoint;
  400. // float score;
  401. // std::string text;
  402. //};
  403. //bool compareBoxWidth(const TextBox& a, const TextBox& b)
  404. //{
  405. // return abs(a.boxPoint[0].x - a.boxPoint[1].x) > abs(b.boxPoint[0].x - b.boxPoint[1].x);
  406. //}
  407. //cv::Mat getRotateCropImage2(const cv::Mat& src, std::vector<cv::Point> box) {
  408. // cv::Mat image;
  409. // src.copyTo(image);
  410. // std::vector<cv::Point> points = box;
  411. //
  412. // int collectX[4] = { box[0].x, box[1].x, box[2].x, box[3].x };
  413. // int collectY[4] = { box[0].y, box[1].y, box[2].y, box[3].y };
  414. // int left = int(*std::min_element(collectX, collectX + 4));
  415. // int right = int(*std::max_element(collectX, collectX + 4));
  416. // int top = int(*std::min_element(collectY, collectY + 4));
  417. // int bottom = int(*std::max_element(collectY, collectY + 4));
  418. //
  419. // cv::Mat imgCrop;
  420. // image(cv::Rect(left, top, right - left, bottom - top)).copyTo(imgCrop);
  421. //
  422. // for (int i = 0; i < points.size(); i++) {
  423. // points[i].x -= left;
  424. // points[i].y -= top;
  425. // }
  426. //
  427. //
  428. // int imgCropWidth = int(sqrt(pow(points[0].x - points[1].x, 2) +
  429. // pow(points[0].y - points[1].y, 2)));
  430. // int imgCropHeight = int(sqrt(pow(points[0].x - points[3].x, 2) +
  431. // pow(points[0].y - points[3].y, 2)));
  432. //
  433. // cv::Point2f ptsDst[4];
  434. // ptsDst[0] = cv::Point2f(0., 0.);
  435. // ptsDst[1] = cv::Point2f(imgCropWidth, 0.);
  436. // ptsDst[2] = cv::Point2f(imgCropWidth, imgCropHeight);
  437. // ptsDst[3] = cv::Point2f(0.f, imgCropHeight);
  438. //
  439. // cv::Point2f ptsSrc[4];
  440. // ptsSrc[0] = cv::Point2f(points[0].x, points[0].y);
  441. // ptsSrc[1] = cv::Point2f(points[1].x, points[1].y);
  442. // ptsSrc[2] = cv::Point2f(points[2].x, points[2].y);
  443. // ptsSrc[3] = cv::Point2f(points[3].x, points[3].y);
  444. //
  445. // cv::Mat M = cv::getPerspectiveTransform(ptsSrc, ptsDst);
  446. //
  447. // cv::Mat partImg;
  448. // cv::warpPerspective(imgCrop, partImg, M,
  449. // cv::Size(imgCropWidth, imgCropHeight),
  450. // cv::BORDER_REPLICATE);
  451. //
  452. // if (float(partImg.rows) >= float(partImg.cols) * 1.5) {
  453. // cv::Mat srcCopy = cv::Mat(partImg.rows, partImg.cols, partImg.depth());
  454. // cv::transpose(partImg, srcCopy);
  455. // cv::flip(srcCopy, srcCopy, 0);
  456. // return srcCopy;
  457. // }
  458. // else {
  459. // return partImg;
  460. // }
  461. //}
  462. //std::vector<cv::Mat> getPartImages(const cv::Mat& src, std::vector<TextBox>& textBoxes)
  463. //{
  464. // std::sort(textBoxes.begin(), textBoxes.end(), compareBoxWidth);
  465. // std::vector<cv::Mat> partImages;
  466. // if (textBoxes.size() > 0)
  467. // {
  468. // for (int i = 0; i < textBoxes.size(); ++i)
  469. // {
  470. // cv::Mat partImg = getRotateCropImage2(src, textBoxes[i].boxPoint);
  471. // partImages.emplace_back(partImg);
  472. // }
  473. // }
  474. //
  475. // return partImages;
  476. //}
  477. //struct TextLine {
  478. // std::string text;
  479. // std::vector<float> charScores;
  480. //};
  481. //
  482. //template<class ForwardIterator>
  483. //inline static size_t argmax(ForwardIterator first, ForwardIterator last) {
  484. // return std::distance(first, std::max_element(first, last));
  485. //}
  486. //
  487. //TextLine scoreToTextLine(const std::vector<float>& outputData, int h, int w)
  488. //{
  489. // int keySize = alphabet.size();
  490. // std::string strRes;
  491. // std::vector<float> scores;
  492. // int lastIndex = 0;
  493. // int maxIndex;
  494. // float maxValue;
  495. //
  496. // for (int i = 0; i < h; i++)
  497. // {
  498. // maxIndex = 0;
  499. // maxValue = -1000.f;
  500. //
  501. // maxIndex = int(argmax(outputData.begin() + i * w, outputData.begin() + i * w + w));
  502. // maxValue = float(*std::max_element(outputData.begin() + i * w, outputData.begin() + i * w + w));// / partition;
  503. // if (maxIndex > 0 && maxIndex < keySize && (!(i > 0 && maxIndex == lastIndex))) {
  504. // scores.emplace_back(maxValue);
  505. // strRes.append(alphabet[maxIndex - 1]);
  506. // }
  507. // lastIndex = maxIndex;
  508. // }
  509. // return { strRes, scores };
  510. //}
  511. static int ocrProcess(ncnn::Net* dectModel, ncnn::Net* recModel, const cv::Mat& bgr, std::vector<string>& textList)
  512. {
  513. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "start");
  514. prebLabel.clear();
  515. const float prob_threshold = 0.25f;
  516. const float nms_threshold = 0.45f;
  517. int img_w = bgr.cols;
  518. int img_h = bgr.rows;
  519. cv::Mat dstimg = preprocess(bgr);
  520. float pix = dstimg.ptr<uchar>(200)[200 * 3 + 1];
  521. std::vector<float>result = normalize_(dstimg);
  522. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "(200, 200, 1))%f ", pix);
  523. ncnn::Mat in_pad(dstimg.cols, dstimg.rows, 3, (void*)result.data());
  524. ncnn::Extractor dect = dectModel->create_extractor();
  525. dect.input("x", in_pad);
  526. ncnn::Mat out;
  527. dect.extract("sigmoid_0.tmp_0", out);
  528. Mat binary(out.h, out.w, CV_32FC1);
  529. memcpy(binary.data, out.data, out.w* out.h * sizeof(float));
  530. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "dect out %d %d ", out.w , out.h);
  531. std::vector<TextBlock> textBlocks;
  532. GetTextBoxes(binary, const_cast<cv::Mat&>(bgr), textBlocks);
  533. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "finish get boxes");
  534. for (size_t i = 0; i < textBlocks.size(); i++)
  535. {
  536. cv::Mat textimg = getRotateCropImage(const_cast<cv::Mat&>(bgr), textBlocks[i].boxVertices);
  537. const float mean_vals[3] = { 127.5, 127.5, 127.5 };
  538. const float norm_vals[3] = { 1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5 };
  539. float scale = (float)inpHeight / (float)textimg.rows;
  540. int dstWidth = int((float)textimg.cols * scale);
  541. cv::Mat srcResize;
  542. cv::resize(textimg, srcResize, cv::Size(dstWidth, inpHeight));
  543. //if you use PP-OCRv3 you should change PIXEL_RGB to PIXEL_RGB2BGR
  544. ncnn::Mat input = ncnn::Mat::from_pixels(srcResize.data, ncnn::Mat::PIXEL_RGB2BGR, srcResize.cols, srcResize.rows);
  545. input.substract_mean_normalize(mean_vals, norm_vals);
  546. ncnn::Mat in_pack3;
  547. ncnn::convert_packing(input, in_pack3, 3);
  548. cv::Mat aa(input.h, input.w, CV_32FC3);
  549. memcpy((uchar*)aa.data, in_pack3.data, input.w * input.h * 3 * sizeof(float));
  550. ncnn::Extractor rec = recModel->create_extractor();
  551. rec.input("input", input);
  552. ncnn::Mat out;
  553. rec.extract("out", out);
  554. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "finish run rec, out %d %d", out.w, out.h);
  555. float* floatArray = (float*)out.data;
  556. std::vector<float> outputData(floatArray, floatArray + out.h * out.w);
  557. string textResult = "";
  558. // 存储预测的标签
  559. prebLabel.resize(out.h);
  560. string text = PostProcess(out.h, out.w, floatArray);
  561. // 删除标点符号和空格
  562. text.erase(std::remove_if(text.begin(), text.end(), [](unsigned char c) {
  563. return std::ispunct(c) || std::isspace(c);
  564. }), text.end());
  565. // 如果文本内容不为空,则处理结果
  566. if (!text.empty())
  567. {
  568. textResult = text;
  569. }
  570. textList.push_back(textResult);
  571. }
  572. for (int i=0; i< textBlocks.size(); ++i)
  573. {
  574. textBlocks[i].boxVertices.clear();
  575. }
  576. textBlocks.clear();
  577. textBlocks.shrink_to_fit();
  578. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "End");
  579. return 0;
  580. }
  581. extern "C" void CreateNet(void** detectModel, void** recModel, char* decBinMem, char* decParamMem, char* recBinMem, char* recParamMem, const void* modelDataKeys, size_t modelDataLengthKeys)
  582. {
  583. const auto resultDetect = new ncnn::Net;
  584. *detectModel = resultDetect;
  585. const auto resultDetectRec = new ncnn::Net;
  586. *recModel = resultDetectRec;
  587. int ret[4] = {-2};
  588. const unsigned char* dectBinMemP = reinterpret_cast<const unsigned char*>(decBinMem);
  589. ncnn::DataReaderFromMemory drDect(dectBinMemP);
  590. const unsigned char* recBinMemP = reinterpret_cast<const unsigned char*>(recBinMem);
  591. ncnn::DataReaderFromMemory drRec(recBinMemP);
  592. resultDetect->opt.use_vulkan_compute = false;
  593. ret[0] = resultDetect->load_param_mem(decParamMem);
  594. ret[1] = resultDetect->load_model(drDect);
  595. ret[2] = resultDetectRec->load_param_mem(recParamMem);
  596. ret[3] = resultDetectRec->load_model(drRec);
  597. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "LoadDectBin %d %d %d %d", ret[0], ret[1], ret[2], ret[3]);
  598. //string keys_path = keysName;
  599. //ifstream ifs(keys_path);
  600. //std::string line;
  601. //while (getline(ifs, line))
  602. //{
  603. // alphabet.push_back(line);
  604. //}
  605. //alphabet.push_back(" ");
  606. // 将字节数据转换为字符串
  607. std::string text(reinterpret_cast<const char*>(modelDataKeys), modelDataLengthKeys);
  608. // 使用字符串流处理字符串
  609. std::istringstream iss(text);
  610. std::string line;
  611. // 逐行读取并添加到 alphabet 中
  612. while (std::getline(iss, line))
  613. {
  614. alphabet.push_back(line);
  615. }
  616. alphabet.push_back(" ");
  617. }
  618. extern "C" void FreeNet(void* detectModel, void* recModel)
  619. {
  620. const auto yolo = static_cast<ncnn::Net*>(detectModel);
  621. yolo->clear();
  622. const auto rec = static_cast<ncnn::Net*>(recModel);
  623. rec->clear();
  624. delete yolo;
  625. delete rec;
  626. }
  627. typedef struct _StructStringInfo
  628. {
  629. char* result;
  630. int bufferSize;
  631. }StructStringInfo;
  632. extern "C" void Test(void* detectModel, void* recModel, unsigned char* input, int iw, int ih, StructStringInfo*& ob, int* cnt)
  633. {
  634. ncnn::Net* dect = static_cast<ncnn::Net*>(detectModel);
  635. ncnn::Net* rec = static_cast<ncnn::Net*>(recModel);
  636. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "image in");
  637. cv::Mat m = cv::Mat(ih, iw, CV_8UC4, (void*)input);
  638. cvtColor(m, m, cv::COLOR_RGBA2BGR);
  639. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "image cvt out");
  640. // 获取图像在指定坐标(x, y)处的像素值
  641. cv::Vec3b pixel = m.at<cv::Vec3b>(100, 100);
  642. // 输出像素值到Android Logcat
  643. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "Pixel value at (%d, %d): B=%d, G=%d, R=%d",
  644. 100, 100, pixel[0], pixel[1], pixel[2]);
  645. // 获取图像在指定坐标(x, y)处的像素值
  646. pixel = m.at<cv::Vec3b>(200, 200);
  647. // 输出像素值到Android Logcat
  648. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "Pixel value at (%d, %d): B=%d, G=%d, R=%d",
  649. 200, 200, pixel[0], pixel[1], pixel[2]);
  650. // 获取图像在指定坐标(x, y)处的像素值
  651. pixel = m.at<cv::Vec3b>(300, 300);
  652. // 输出像素值到Android Logcat
  653. __android_log_print(ANDROID_LOG_INFO, "EvaluateOneImage", "Pixel value at (%d, %d): B=%d, G=%d, R=%d",
  654. 300, 300, pixel[0], pixel[1], pixel[2]);
  655. std::vector<string> textLists;
  656. ocrProcess(dect, rec, m, textLists);
  657. int count = static_cast<int>(textLists.size());
  658. if(count > 100)
  659. {
  660. count = 100;
  661. }
  662. *cnt = count;
  663. ob = new StructStringInfo[count];
  664. for (int i=0; i< count; ++i)
  665. {
  666. ob[i].result = new char[textLists[i].length()+1];
  667. std::copy(textLists[i].begin(), textLists[i].end(), ob[i].result);
  668. ob[i].result[textLists[i].length()] = '\0'; // 添加null终止符
  669. }
  670. textLists.clear();
  671. }