yolov5.cpp 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. // Tencent is pleased to support the open source community by making ncnn available.
  2. //
  3. // Copyright (C) 2022 THL A29 Limited, a Tencent company. All rights reserved.
  4. //
  5. // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
  6. // in compliance with the License. You may obtain a copy of the License at
  7. //
  8. // https://opensource.org/licenses/BSD-3-Clause
  9. //
  10. // Unless required by applicable law or agreed to in writing, software distributed
  11. // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
  12. // CONDITIONS OF ANY KIND, either express or implied. See the License for the
  13. // specific language governing permissions and limitations under the License.
  14. #include "layer.h"
  15. #include "net.h"
  16. #include <opencv2/core/core.hpp>
  17. #include <opencv2/highgui/highgui.hpp>
  18. #include <opencv2/imgproc/imgproc.hpp>
  19. #include <cfloat>
  20. #include <vector>
  21. struct Object
  22. {
  23. cv::Rect_<float> rect;
  24. int label;
  25. float prob;
  26. };
  27. static inline float intersection_area(const Object& a, const Object& b)
  28. {
  29. cv::Rect_<float> inter = a.rect & b.rect;
  30. return inter.area();
  31. }
  32. static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
  33. {
  34. int i = left;
  35. int j = right;
  36. float p = faceobjects[(left + right) / 2].prob;
  37. while (i <= j)
  38. {
  39. while (faceobjects[i].prob > p)
  40. i++;
  41. while (faceobjects[j].prob < p)
  42. j--;
  43. if (i <= j)
  44. {
  45. // swap
  46. std::swap(faceobjects[i], faceobjects[j]);
  47. i++;
  48. j--;
  49. }
  50. }
  51. #pragma omp parallel sections
  52. {
  53. #pragma omp section
  54. {
  55. if (left < j) qsort_descent_inplace(faceobjects, left, j);
  56. }
  57. #pragma omp section
  58. {
  59. if (i < right) qsort_descent_inplace(faceobjects, i, right);
  60. }
  61. }
  62. }
  63. static void qsort_descent_inplace(std::vector<Object>& faceobjects)
  64. {
  65. if (faceobjects.empty())
  66. return;
  67. qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
  68. }
  69. static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold, bool agnostic = false)
  70. {
  71. picked.clear();
  72. const int n = faceobjects.size();
  73. std::vector<float> areas(n);
  74. for (int i = 0; i < n; i++)
  75. {
  76. areas[i] = faceobjects[i].rect.area();
  77. }
  78. for (int i = 0; i < n; i++)
  79. {
  80. const Object& a = faceobjects[i];
  81. int keep = 1;
  82. for (int j = 0; j < (int)picked.size(); j++)
  83. {
  84. const Object& b = faceobjects[picked[j]];
  85. if (!agnostic && a.label != b.label)
  86. continue;
  87. // intersection over union
  88. float inter_area = intersection_area(a, b);
  89. float union_area = areas[i] + areas[picked[j]] - inter_area;
  90. // float IoU = inter_area / union_area
  91. if (inter_area / union_area > nms_threshold)
  92. keep = 0;
  93. }
  94. if (keep)
  95. picked.push_back(i);
  96. }
  97. }
  98. static inline float sigmoid(float x)
  99. {
  100. float a = static_cast<float>(1.f / (1.f + exp(-x)));
  101. return a;
  102. }
  103. static void generate_proposals(const ncnn::Mat& anchors, int stride, const ncnn::Mat& in_pad, const ncnn::Mat& feat_blob, float prob_threshold, std::vector<Object>& objects)
  104. {
  105. const int num_grid_x = feat_blob.w;
  106. const int num_grid_y = feat_blob.h;
  107. const int num_anchors = anchors.w / 2;
  108. const int num_class = feat_blob.c / num_anchors - 5;
  109. const int feat_offset = num_class + 5;
  110. for (int q = 0; q < num_anchors; q++)
  111. {
  112. const float anchor_w = anchors[q * 2];
  113. const float anchor_h = anchors[q * 2 + 1];
  114. for (int i = 0; i < num_grid_y; i++)
  115. {
  116. for (int j = 0; j < num_grid_x; j++)
  117. {
  118. // find class index with max class score
  119. int class_index = 0;
  120. float class_score = -FLT_MAX;
  121. for (int k = 0; k < num_class; k++)
  122. {
  123. float score = feat_blob.channel(q * feat_offset + 5 + k).row(i)[j];
  124. if (score > class_score)
  125. {
  126. class_index = k;
  127. class_score = score;
  128. }
  129. }
  130. float box_score = feat_blob.channel(q * feat_offset + 4).row(i)[j];
  131. float confidence = sigmoid(box_score) * sigmoid(class_score);
  132. if (confidence >= prob_threshold)
  133. {
  134. // yolov5/models/yolo.py Detect forward
  135. // y = x[i].sigmoid()
  136. // y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
  137. // y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
  138. float dx = sigmoid(feat_blob.channel(q * feat_offset + 0).row(i)[j]);
  139. float dy = sigmoid(feat_blob.channel(q * feat_offset + 1).row(i)[j]);
  140. float dw = sigmoid(feat_blob.channel(q * feat_offset + 2).row(i)[j]);
  141. float dh = sigmoid(feat_blob.channel(q * feat_offset + 3).row(i)[j]);
  142. float pb_cx = (dx * 2.f - 0.5f + j) * stride;
  143. float pb_cy = (dy * 2.f - 0.5f + i) * stride;
  144. float pb_w = pow(dw * 2.f, 2) * anchor_w;
  145. float pb_h = pow(dh * 2.f, 2) * anchor_h;
  146. float x0 = pb_cx - pb_w * 0.5f;
  147. float y0 = pb_cy - pb_h * 0.5f;
  148. float x1 = pb_cx + pb_w * 0.5f;
  149. float y1 = pb_cy + pb_h * 0.5f;
  150. Object obj;
  151. obj.rect.x = x0;
  152. obj.rect.y = y0;
  153. obj.rect.width = x1 - x0;
  154. obj.rect.height = y1 - y0;
  155. obj.label = class_index;
  156. obj.prob = confidence;
  157. objects.push_back(obj);
  158. }
  159. }
  160. }
  161. }
  162. }
  163. static int detect_yolov5(ncnn::Net* yolov5, const cv::Mat& bgr, std::vector<Object>& objects)
  164. {
  165. const int target_size = 640;
  166. const float prob_threshold = 0.25f;
  167. const float nms_threshold = 0.45f;
  168. int img_w = bgr.cols;
  169. int img_h = bgr.rows;
  170. // yolov5/models/common.py DetectMultiBackend
  171. const int max_stride = 64;
  172. // letterbox pad to multiple of max_stride
  173. int w = img_w;
  174. int h = img_h;
  175. float scale = 1.f;
  176. if (w > h)
  177. {
  178. scale = (float)target_size / w;
  179. w = target_size;
  180. h = h * scale;
  181. }
  182. else
  183. {
  184. scale = (float)target_size / h;
  185. h = target_size;
  186. w = w * scale;
  187. }
  188. ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h, w, h);
  189. // pad to target_size rectangle
  190. // yolov5/utils/datasets.py letterbox
  191. int wpad = (w + max_stride - 1) / max_stride * max_stride - w;
  192. int hpad = (h + max_stride - 1) / max_stride * max_stride - h;
  193. ncnn::Mat in_pad;
  194. ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 114.f);
  195. const float norm_vals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
  196. in_pad.substract_mean_normalize(0, norm_vals);
  197. ncnn::Extractor ex = yolov5->create_extractor();
  198. ex.input("in0", in_pad);
  199. std::vector<Object> proposals;
  200. // anchor setting from yolov5/models/yolov5s.yaml
  201. // stride 8
  202. {
  203. ncnn::Mat out;
  204. ex.extract("out0", out);
  205. ncnn::Mat anchors(6);
  206. anchors[0] = 10.f;
  207. anchors[1] = 13.f;
  208. anchors[2] = 16.f;
  209. anchors[3] = 30.f;
  210. anchors[4] = 33.f;
  211. anchors[5] = 23.f;
  212. std::vector<Object> objects8;
  213. generate_proposals(anchors, 8, in_pad, out, prob_threshold, objects8);
  214. proposals.insert(proposals.end(), objects8.begin(), objects8.end());
  215. }
  216. // stride 16
  217. {
  218. ncnn::Mat out;
  219. ex.extract("out1", out);
  220. ncnn::Mat anchors(6);
  221. anchors[0] = 30.f;
  222. anchors[1] = 61.f;
  223. anchors[2] = 62.f;
  224. anchors[3] = 45.f;
  225. anchors[4] = 59.f;
  226. anchors[5] = 119.f;
  227. std::vector<Object> objects16;
  228. generate_proposals(anchors, 16, in_pad, out, prob_threshold, objects16);
  229. proposals.insert(proposals.end(), objects16.begin(), objects16.end());
  230. }
  231. // stride 32
  232. {
  233. ncnn::Mat out;
  234. ex.extract("out2", out);
  235. ncnn::Mat anchors(6);
  236. anchors[0] = 116.f;
  237. anchors[1] = 90.f;
  238. anchors[2] = 156.f;
  239. anchors[3] = 198.f;
  240. anchors[4] = 373.f;
  241. anchors[5] = 326.f;
  242. std::vector<Object> objects32;
  243. generate_proposals(anchors, 32, in_pad, out, prob_threshold, objects32);
  244. proposals.insert(proposals.end(), objects32.begin(), objects32.end());
  245. }
  246. // sort all proposals by score from highest to lowest
  247. qsort_descent_inplace(proposals);
  248. // apply nms with nms_threshold
  249. std::vector<int> picked;
  250. nms_sorted_bboxes(proposals, picked, nms_threshold);
  251. int count = picked.size();
  252. objects.resize(count);
  253. for (int i = 0; i < count; i++)
  254. {
  255. objects[i] = proposals[picked[i]];
  256. // adjust offset to original unpadded
  257. float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
  258. float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
  259. float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
  260. float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
  261. // clip
  262. x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
  263. y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
  264. x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
  265. y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
  266. objects[i].rect.x = x0;
  267. objects[i].rect.y = y0;
  268. objects[i].rect.width = x1 - x0;
  269. objects[i].rect.height = y1 - y0;
  270. }
  271. return 0;
  272. }
  273. struct Box
  274. {
  275. float x;
  276. float y;
  277. float width;
  278. float height;
  279. int label;
  280. float prob;
  281. };
  282. static Box object_to_box(const Object object)
  283. {
  284. Box box;
  285. box.x = object.rect.x;
  286. box.y = object.rect.y;
  287. box.width = object.rect.width;
  288. box.height = object.rect.height;
  289. box.label = object.label;
  290. box.prob = object.prob;
  291. return box;
  292. }
  293. extern "C" void* CreateNet(unsigned char* model, char* param)
  294. {
  295. const auto yolo = new ncnn::Net;
  296. yolo->opt.use_vulkan_compute = true;
  297. yolo->load_param_mem(param);
  298. yolo->load_model(model);
  299. return yolo;
  300. }
  301. extern "C" void FreeNet(void* net)
  302. {
  303. const auto yolo = static_cast<ncnn::Net*>(net);
  304. yolo->clear();
  305. delete yolo;
  306. }
  307. extern "C" void Test(void* net, unsigned char* input, int iw, int ih, void* ob, int* cnt)
  308. {
  309. ncnn::Net* yolo = static_cast<ncnn::Net*>(net);
  310. cv::Mat m = cv::Mat(ih, iw, CV_8UC4, (void*)input);
  311. cvtColor(m, m, cv::COLOR_RGBA2RGB);
  312. std::vector<Object> objects;
  313. detect_yolov5(yolo, m, objects);
  314. int count = static_cast<int>(objects.size());
  315. const auto boxes = new Box[count];
  316. for (int i = 0; i < count; i++)
  317. {
  318. boxes[i] = object_to_box(objects[i]);
  319. }
  320. if (count > 100)
  321. {
  322. count = 100;
  323. }
  324. memcpy(ob, &boxes[0], sizeof(Box) * count);
  325. *cnt = count;
  326. delete[] boxes;
  327. }