123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455 |
- #include"InferNetOnnxPaddleOcrDetect.h"
- InferNetOnnxPaddleOcrDetect::InferNetOnnxPaddleOcrDetect()
- {
- }
- void InferNetOnnxPaddleOcrDetect::LoadNetwork(const void* modelData, size_t modelDataLen)
- {
- if (_modelLoaded)
- {
- // 如果模型已加载,则释放之前的模型
- delete net;
- net = nullptr;
- }
- this->binaryThreshold = 0.3;
- this->polygonThreshold = 0.5;
- this->unclipRatio = 1.6;
- this->maxCandidates = 1000;
- sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
- net = new Session(env, modelData, modelDataLen, sessionOptions);
- size_t numInputNodes = net->GetInputCount();
- size_t numOutputNodes = net->GetOutputCount();
- AllocatorWithDefaultOptions allocator;
- for (int i = 0; i < numInputNodes; i++)
- {
- inputNames.push_back(net->GetInputName(i, allocator));
- }
- for (int i = 0; i < numOutputNodes; i++)
- {
- outputNames.push_back(net->GetOutputName(i, allocator));
- }
- _modelLoaded = true;
- }
- std::vector<TextBlock> InferNetOnnxPaddleOcrDetect::Process(cv::Mat& srcimg)
- { // 对图像预处理
- //cv::Mat dstimg = this->preprocess(srcimg);
- cv::Mat dstimg = srcimg.clone();
- this->normalize_(dstimg);
- // 创建用于存储输入形状的数组
- array<int64_t, 4> input_shape_{ 1, 3, dstimg.rows, dstimg.cols };
- // 创建CPU内存的分配器信息
- auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
- // 创建输入张量
- Value input_tensor_ = Value::CreateTensor<float>(allocator_info, input_image_.data(), input_image_.size(), input_shape_.data(), input_shape_.size());
- // 运行ONNX模型获取输出
- vector<Value> ort_outputs = net->Run(RunOptions{ nullptr }, &inputNames[0], &input_tensor_, 1, outputNames.data(), outputNames.size());
- const float* floatArray = ort_outputs[0].GetTensorMutableData<float>();
- int outputCount = 1;
- for (int i = 0; i < ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().size(); i++)
- {
- int dim = ort_outputs.at(0).GetTensorTypeAndShapeInfo().GetShape().at(i);
- outputCount *= dim;
- }
- Mat binary(dstimg.rows, dstimg.cols, CV_32FC1);
- memcpy(binary.data, floatArray, outputCount * sizeof(float));
- // 输出结果提取box
- std::vector<TextBlock> results;
- results = GetTextBoxes(binary, srcimg);
- return results;
- }
- // 该函数用于对输入图像进行预处理,包括颜色空间转换和图像缩放
- cv::Mat InferNetOnnxPaddleOcrDetect::preprocess(cv::Mat srcimg)
- {
- cv::Mat dstimg = srcimg.clone();
- //cv::Mat dstimg;
- int h = srcimg.rows;
- int w = srcimg.cols;
- // 初始化高度和宽度的缩放比例
- float scale_h = 1;
- float scale_w = 1;
- // 根据图像的高度和宽度选择缩放比例
- if (h < w)
- {
- // 如果图像高度小于宽度 计算高度缩放比例
- scale_h = (float)this->shortSize / (float)h;
- float tar_w = (float)w * scale_h;
- tar_w = tar_w - (int)tar_w % 32;
- tar_w = max((float)32, tar_w);
- scale_w = tar_w / (float)w;
- }
- else
- {
- // 如果图像宽度小于等于高度 计算宽度缩放比例
- scale_w = (float)this->shortSize / (float)w;
- float tar_h = (float)h * scale_w;
- tar_h = tar_h - (int)tar_h % 32;
- tar_h = max((float)32, tar_h);
- scale_h = tar_h / (float)h;
- }
- // 使用线性插值对图像进行缩放,以调整到目标尺寸
- resize(dstimg, dstimg, Size(int(scale_w * dstimg.cols), int(scale_h * dstimg.rows)), INTER_LINEAR);
- return dstimg;
- }
- void InferNetOnnxPaddleOcrDetect::normalize_(cv::Mat img)
- {
- // img.convertTo(img, CV_32F);
- int row = img.rows;
- int col = img.cols;
- this->input_image_.resize(row * col * img.channels());
- for (int c = 0; c < 3; c++)
- {
- for (int i = 0; i < row; i++)
- {
- for (int j = 0; j < col; j++)
- {
- float pix = img.ptr<uchar>(i)[j * 3 + c];
- this->input_image_[c * row * col + i * col + j] = (pix / 255.0 - this->meanValues[c]) / this->normValues[c];
- }
- }
- }
- }
- Point2f InferNetOnnxPaddleOcrDetect::pointCenBoxs(vector<Point2f> polygonBoxs)
- {
- // 计算中心点坐标
- Point2f center(0, 0); // 初始化中心点坐标
- // 遍历四个点,累加坐标
- for (const auto& point : polygonBoxs) {
- center.x += point.x;
- center.y += point.y;
- }
- // 计算平均值
- center.x /= polygonBoxs.size();
- center.y /= polygonBoxs.size();
- return center;
- }
- std::vector<TextBlock> InferNetOnnxPaddleOcrDetect::GetTextBoxes(cv::Mat& binaryIN, cv::Mat& srcimgIN)
- {
- // 获取图像的高度和宽度
- int h = srcimgIN.rows;
- int w = srcimgIN.cols;
- // 二值化处理
- Mat bitmap;
- threshold(binaryIN, bitmap, binaryThreshold, 255, THRESH_BINARY);
- //// 计算图像缩放比例
- float scaleHeight = (float)(h) / (float)(binaryIN.size[0]);
- float scaleWidth = (float)(w) / (float)(binaryIN.size[1]);
- // 寻找轮廓
- vector< vector<Point> > contours;
- bitmap.convertTo(bitmap, CV_8UC1);
- findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);
- // 限制候选框的数量
- size_t numCandidate = min(contours.size(), (size_t)(maxCandidates > 0 ? maxCandidates : INT_MAX));
- vector<float> confidences;
- //vector< vector<Point2f> > rsBoxes;
- std::vector<TextBlock> rsBoxes;
- // 遍历每个候选框
- for (size_t i = 0; i < numCandidate; i++)
- {
- vector<Point>& contour = contours[i];
- // 计算文本轮廓分数
- float score = contourScore(binaryIN, contour);
- float boxScore = 0.0f;
- if (score < polygonThreshold) {
- boxScore = score;
- continue;
- }
- //// 对轮廓进行缩放
- vector<Point> contourScaled; contourScaled.reserve(contour.size());
- for (size_t j = 0; j < contour.size(); j++)
- {
- contourScaled.push_back(Point(int(contour[j].x * scaleWidth),
- int(contour[j].y * scaleHeight)));
- }
- // 检查坐标是否有效
- bool coordinatesValid = true;
- for (size_t j = 0; j < contourScaled.size(); j++) {
- if (contourScaled[j].x < 0 || contourScaled[j].y < 0 ||
- contourScaled[j].x >= w || contourScaled[j].y >= h) {
- coordinatesValid = false;
- break;
- }
- }
- // 如果坐标有效,则处理该结果
- if (coordinatesValid)
- {
- TextBlock detectedBox;
- // 解除裁剪
- RotatedRect box = minAreaRect(contourScaled);
- float longSide = std::max(box.size.width, box.size.height);
- if (longSide < longSideThresh)
- {
- continue;
- }
- // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width
- const float angle_threshold = 60; // do not expect vertical text, TODO detection algo property
- bool swap_size = false;
- if (box.size.width < box.size.height) // horizontal-wide text area is expected
- swap_size = true;
- else if (fabs(box.angle) >= angle_threshold) // don't work with vertical rectangles
- swap_size = true;
- if (swap_size)
- {
- swap(box.size.width, box.size.height);
- if (box.angle < 0)
- box.angle += 90;
- else if (box.angle > 0)
- box.angle -= 90;
- }
- Point2f vertex[4];
- box.points(vertex); // order: bl, tl, tr, br
- vector<Point2f> approx;
- for (int j = 0; j < 4; j++)
- approx.emplace_back(vertex[j]);
- vector<Point2f> polygon;
- unclip(approx, polygon);
- box = minAreaRect(polygon);
- longSide = std::max(box.size.width, box.size.height);
- if (longSide < longSideThresh + 2)
- {
- continue;
- }
- if (std::all_of(polygon.begin(), polygon.end(), [w, h](const Point2f& p) {
- return p.x >= 0 && p.x <= w && p.y >= 0 && p.y <= h;
- }))
- {
- Point2f centPoint = pointCenBoxs(polygon);
- detectedBox.boxVertices = polygon;
- detectedBox.boxCenterVer = centPoint;
- detectedBox.angle = box.angle;
- detectedBox.boxScore = boxScore;
- rsBoxes.push_back(detectedBox);
- }
- }
- }
- confidences = vector<float>(contours.size(), 1.0f);
- // 对 results 进行倒序处理
- std::reverse(rsBoxes.begin(), rsBoxes.end());
- return rsBoxes;
- }
- std::vector< std::vector<Point2f> > InferNetOnnxPaddleOcrDetect::order_points_clockwise(std::vector< std::vector<Point2f> > results)
- {
- std::vector< std::vector<Point2f> > order_points(results);
- for (int i = 0; i < results.size(); i++)
- {
- float max_sum_pts = -10000;
- float min_sum_pts = 10000;
- float max_diff_pts = -10000;
- float min_diff_pts = 10000;
- int max_sum_pts_id = 0;
- int min_sum_pts_id = 0;
- int max_diff_pts_id = 0;
- int min_diff_pts_id = 0;
- for (int j = 0; j < 4; j++)
- {
- const float sum_pt = results[i][j].x + results[i][j].y;
- if (sum_pt > max_sum_pts)
- {
- max_sum_pts = sum_pt;
- max_sum_pts_id = j;
- }
- if (sum_pt < min_sum_pts)
- {
- min_sum_pts = sum_pt;
- min_sum_pts_id = j;
- }
- const float diff_pt = results[i][j].y - results[i][j].x;
- if (diff_pt > max_diff_pts)
- {
- max_diff_pts = diff_pt;
- max_diff_pts_id = j;
- }
- if (diff_pt < min_diff_pts)
- {
- min_diff_pts = diff_pt;
- min_diff_pts_id = j;
- }
- }
- order_points[i][0].x = results[i][min_sum_pts_id].x;
- order_points[i][0].y = results[i][min_sum_pts_id].y;
- order_points[i][2].x = results[i][max_sum_pts_id].x;
- order_points[i][2].y = results[i][max_sum_pts_id].y;
- order_points[i][1].x = results[i][min_diff_pts_id].x;
- order_points[i][1].y = results[i][min_diff_pts_id].y;
- order_points[i][3].x = results[i][max_diff_pts_id].x;
- order_points[i][3].y = results[i][max_diff_pts_id].y;
- }
- return order_points;
- }
- void InferNetOnnxPaddleOcrDetect::drawPred(cv::Mat& srcimg, std::vector< std::vector<Point2f> > results)
- {
- for (int i = 0; i < results.size(); i++)
- {
- for (int j = 0; j < 4; j++)
- {
- circle(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), 2, Scalar(0, 0, 255), -1);
- if (j < 3)
- {
- line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][j + 1].x, (int)results[i][j + 1].y), Scalar(0, 255, 0));
- }
- else
- {
- line(srcimg, Point((int)results[i][j].x, (int)results[i][j].y), Point((int)results[i][0].x, (int)results[i][0].y), Scalar(0, 255, 0));
- }
- }
- }
- }
- // 该函数计算二进制图像中指定轮廓的分数
- float InferNetOnnxPaddleOcrDetect::contourScore(cv::Mat& binary, std::vector<Point>& contour)
- {
- // 计算轮廓的边界矩形
- Rect rect = boundingRect(contour);
- // 计算边界框在二进制图像中的有效范围
- int xmin = max(rect.x, 0);
- int xmax = min(rect.x + rect.width, binary.cols - 1);
- int ymin = max(rect.y, 0);
- int ymax = min(rect.y + rect.height, binary.rows - 1);
- // 提取二进制图像中边界框的ROI(感兴趣区域)
- cv::Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
- // 创建一个掩码,用于标识ROI中的像素
- cv::Mat mask = cv::Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U);
- // 将轮廓中的点坐标调整为ROI内的坐标
- std::vector<Point> roiContour;
- for (size_t i = 0; i < contour.size(); i++) {
- Point pt = Point(contour[i].x - xmin, contour[i].y - ymin);
- roiContour.push_back(pt);
- }
- // 使用填充多边形函数将ROI内的轮廓标记为1
- std::vector<std::vector<Point>> roiContours = { roiContour };
- fillPoly(mask, roiContours, Scalar(1));
- // 计算ROI内二进制图像的均值,以掩码为权重
- float score = mean(binROI, mask).val[0];
- return score;
- }
- void InferNetOnnxPaddleOcrDetect::unclip(std::vector<Point2f>& inPoly, std::vector<Point2f>& outPoly)
- {
- // 计算轮廓的面积
- float area = contourArea(inPoly);
- float length = arcLength(inPoly, true); // 计算轮廓的周长
- float distance = area * unclipRatio / length; // 计算解剪距离
- // 获取输入轮廓的点数
- size_t numPoints = inPoly.size();
- // 存储新的轮廓线段
- std::vector<std::vector<Point2f>> newLines;
- // 遍历原始轮廓的每个点
- for (size_t i = 0; i < numPoints; i++)
- {
- std::vector<Point2f> newLine;
- Point pt1 = inPoly[i];
- Point pt2 = inPoly[(i - 1) % numPoints];
- Point vec = pt1 - pt2;
- // 计算解剪距离
- float unclipDis = (float)(distance / norm(vec));
- // 计算旋转后的向量
- Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis);
- // 添加旋转后的点到新线段
- newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y));
- newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y));
- newLines.push_back(newLine);
- }
- // 获取新线段的数量
- size_t numLines = newLines.size();
- // 遍历新线段集合
- for (size_t i = 0; i < numLines; i++)
- {
- Point2f a = newLines[i][0];
- Point2f b = newLines[i][1];
- Point2f c = newLines[(i + 1) % numLines][0];
- Point2f d = newLines[(i + 1) % numLines][1];
- Point2f pt;
- // 计算两向量的夹角余弦值
- Point2f v1 = b - a;
- Point2f v2 = d - c;
- float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2));
- // 根据夹角余弦值判断旋转后的点位置
- if (fabs(cosAngle) > 0.7)
- {
- pt.x = (b.x + c.x) * 0.5;
- pt.y = (b.y + c.y) * 0.5;
- }
- else
- {
- float denom = a.x * (float)(d.y - c.y) + b.x * (float)(c.y - d.y) +
- d.x * (float)(b.y - a.y) + c.x * (float)(a.y - b.y);
- float num = a.x * (float)(d.y - c.y) + c.x * (float)(a.y - d.y) + d.x * (float)(c.y - a.y);
- float s = num / denom;
- pt.x = a.x + s * (b.x - a.x);
- pt.y = a.y + s * (b.y - a.y);
- }
- // 将计算得到的点添加到输出轮廓
- outPoly.push_back(pt);
- }
- }
- cv::Mat InferNetOnnxPaddleOcrDetect::getRotateCropImage(cv::Mat& frame, std::vector<Point2f> vertices)
- {
- // 计算包围轮廓的最小矩形
- Rect rect = boundingRect(cv::Mat(vertices));
- // 从原始图像中提取感兴趣区域(ROI)
- cv::Mat crop_img = frame(rect);
- // 设置输出图像的大小为矩形的宽度和高度
- const Size outputSize = Size(rect.width, rect.height);
- // 定义目标矩形的四个顶点坐标
- std::vector<Point2f> targetVertices{ Point2f(0, outputSize.height), Point2f(0, 0), Point2f(outputSize.width, 0), Point2f(outputSize.width, outputSize.height) };
- // 将原始轮廓的顶点坐标调整为在裁剪后的图像中的坐标
- for (int i = 0; i < 4; i++)
- {
- vertices[i].x -= rect.x;
- vertices[i].y -= rect.y;
- }
- // 计算透视变换矩阵,将原始轮廓映射到目标矩形
- cv::Mat rotationMatrix = cv::getPerspectiveTransform(vertices, targetVertices);
- // 应用透视变换,旋转和裁剪原始图像的感兴趣区域
- cv::Mat result;
- cv::warpPerspective(crop_img, result, rotationMatrix, outputSize, cv::BORDER_REPLICATE);
- return result;
- }
- void InferNetOnnxPaddleOcrDetect::Dispose()
- {
- // 在此处释放资源,确保在对象销毁时调用
- //delete net;
- //net = nullptr;
- }
|