IDCardAnalyser.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. using AI.Common.Interface;
  2. using IDCardRecognitionLibs.IDCardRecognitionCore;
  3. using IDCardRecognitionLibs.InferNet;
  4. using IDCardRecognitionLibs.OCRProcessCore;
  5. using IDCardRecognitionLibs.OCRProcessCore.PaddleOCR;
  6. using System.Net;
  7. using System.Text.RegularExpressions;
  8. using System.Xml.Linq;
  9. namespace IDCardRecognitionLibs
  10. {
  11. public class IDCardAnalyser : IDisposable
  12. {
  13. #region public
  14. private string _netDir = "";
  15. private IOCRProcessCore? _ocrProcessCore;
  16. private bool _name = false; // 姓名
  17. private bool _idNumber = false; // 身份证号码
  18. private bool _nation = false; //民族
  19. private bool _address = false; // 地址
  20. #endregion
  21. #region public
  22. /// <summary>
  23. /// 推理单张图
  24. /// </summary>
  25. /// <param name="image"></param>
  26. /// <returns></returns>
  27. public IDCardRecogResult EvaluateOneImage(RawImage image)
  28. {
  29. try
  30. {
  31. // ocr处理(检测、文字提取)
  32. OCRResult ocrResult = _ocrProcessCore.EvaluateOneImage(image);
  33. // 整理结果,返回给用户必要的信息
  34. IDCardRecogResult recogResult = ProcessIdCardInformation(ocrResult);
  35. return recogResult;
  36. }
  37. catch (Exception excep)
  38. {
  39. throw new Exception(excep.Message);
  40. }
  41. }
  42. #endregion
  43. #region constructor
  44. /// <summary>
  45. /// 构造函数
  46. /// </summary>
  47. /// <param name="dir">模型地址</param>
  48. /// <param name="config">模型配置项</param>
  49. /// <param name="parameter">参数</param>
  50. public IDCardAnalyser(string dir, EnumOCRModel modelType)
  51. {
  52. // 根据模型类型选择要实例化哪个类
  53. _netDir = dir;
  54. if (modelType == EnumOCRModel.PaddleOCR)
  55. {
  56. // 实例化PaddlerOcr
  57. _ocrProcessCore = new PaddleOCRProcessCore();
  58. _ocrProcessCore?.LoadModel(_netDir);
  59. }
  60. else
  61. {
  62. throw new Exception("Other models are not supported yet!");
  63. }
  64. }
  65. #endregion
  66. public void Dispose()
  67. {
  68. if (_ocrProcessCore!= null)
  69. {
  70. _ocrProcessCore?.Dispose();
  71. _ocrProcessCore = null;
  72. }
  73. }
  74. private IDCardRecogResult ProcessIdCardInformation(OCRResult ocrResult)
  75. {
  76. IDCardRecogResult idInfor= new IDCardRecogResult();
  77. idInfor.TimeSpan = ocrResult.TimeAllProcessOneImage;
  78. string inputTextIn = string.Empty;
  79. List<string> inputTextList = new List<string> { };
  80. foreach (var item in ocrResult.TextBlocks)
  81. {
  82. inputTextList.Add(item.Text);
  83. inputTextIn+= item.Text;
  84. }
  85. string pattern = "[\\p{P}]";
  86. string textInforAll = Regex.Replace(inputTextIn, pattern, "");
  87. // 判断身份证号码是否存在
  88. string idNumber = ExtractIdNumber(textInforAll); // 提取身份证号码
  89. if (string.IsNullOrEmpty(idNumber))
  90. {
  91. LogHelper.ErrorLog("IdNumber extraction failed:", string.Empty);
  92. idInfor.NumerStatus = -1;
  93. return idInfor;
  94. }
  95. _idNumber = true;
  96. LogHelper.InfoLog("IdNumber extraction successed:" + idNumber);
  97. idInfor.IdNumber = idNumber;
  98. textInforAll = textInforAll.Replace(idNumber, "");
  99. string birthdate, gender;
  100. ExtractBirthdateAndGender(idNumber, out birthdate, out gender);
  101. idInfor.Gender = gender;
  102. idInfor.Birthdate = birthdate;
  103. LogHelper.InfoLog("Gender extraction successed:" + gender);
  104. LogHelper.InfoLog("Birthdate extraction successed:" + birthdate);
  105. List<string> inputTextListIn = new List<string>(inputTextList);
  106. //inputTextListIn.RemoveAll(text => text.Contains("性别") || text.Contains(idInfor.Gender));
  107. inputTextListIn.RemoveAll(text => text.Contains("出生") || text.Contains(idInfor.Birthdate));
  108. inputTextListIn.RemoveAll(text => text.Contains(idNumber));
  109. string address = string.Empty;
  110. string name = string.Empty;
  111. string nation = string.Empty;
  112. string[] keywordsAll = { "姓名", "民族", "住址" };
  113. string[] keywordsName = { "姓名" };
  114. string[] keywordsNation = { "民族" };
  115. string[] keywordsAddress = { "住址" };
  116. if (keywordsName.Any(keyword => inputTextIn.Contains(keyword)))
  117. {
  118. foreach (string text in inputTextListIn)
  119. {
  120. if (text.Contains("姓名"))
  121. {
  122. name = ExtractName(text, "姓名");
  123. if (!string.IsNullOrEmpty(name))
  124. {
  125. idInfor.Name = name;
  126. textInforAll = textInforAll.Replace("姓名", "").Replace(idInfor.Name, "");
  127. inputTextListIn.RemoveAll(text => text.Contains("姓名") || text.Contains(idInfor.Name));
  128. _name = true;
  129. LogHelper.InfoLog("Name extraction successful:" + name);
  130. break;
  131. }
  132. else
  133. {
  134. LogHelper.ErrorLog("Name extraction failed:", string.Empty);
  135. _name = false;
  136. break;
  137. }
  138. }
  139. }
  140. }
  141. else
  142. {
  143. LogHelper.ErrorLog("Name extraction failed:", string.Empty);
  144. _name = false;
  145. }
  146. // 民族
  147. if (keywordsNation.Any(keyword => inputTextIn.Contains(keyword)))
  148. {
  149. foreach (string text in inputTextListIn)
  150. {
  151. if (text.Contains("民族"))
  152. {
  153. nation = ExtractName(text, "民族");
  154. if (!string.IsNullOrEmpty(nation))
  155. {
  156. idInfor.Nation = nation;
  157. textInforAll = textInforAll.Replace("民族", "").Replace(idInfor.Nation, "");
  158. inputTextListIn.RemoveAll(text => text.Contains("民族") || text.Contains(idInfor.Nation));
  159. _nation = true;
  160. LogHelper.InfoLog("Nation extraction successful:" + nation);
  161. break;
  162. }
  163. else
  164. {
  165. LogHelper.ErrorLog("Nation extraction failed:", string.Empty);
  166. _nation = false;
  167. break;
  168. }
  169. }
  170. }
  171. }
  172. else
  173. {
  174. LogHelper.ErrorLog("Nation extraction failed:", string.Empty);
  175. _nation = false;
  176. }
  177. //inputTextListIn.RemoveAll(text => text.Contains("性别") || text.Contains(idInfor.Gender));
  178. // 住址
  179. if (keywordsAddress.Any(keyword => inputTextIn.Contains(keyword)))
  180. {
  181. foreach (string text in inputTextListIn)
  182. {
  183. if (text.Contains("住址"))
  184. {
  185. if (!textInforAll.Contains("公民身份号码"))
  186. {
  187. textInforAll = inputTextIn.Replace(idNumber, "公民身份号码");
  188. }
  189. var patternAdd = @"住址([\s\S]*?)公民身份号码";
  190. Match match = Regex.Match(textInforAll, patternAdd);
  191. address = match.Groups[1].Value.Trim().Replace(" ", "");
  192. // 判断地址是否全
  193. if (string.IsNullOrEmpty(address))
  194. {
  195. _address = false;
  196. LogHelper.ErrorLog("Address extraction failed:", string.Empty);
  197. }
  198. else if (address.Contains("省") || address.Contains("北京市") || address.Contains("上海市") || address.Contains("天津市") || address.Contains("自治区") || address.Contains("重庆市"))
  199. {
  200. idInfor.Address = address;
  201. _address = true;
  202. LogHelper.InfoLog("Address extraction successful:" + address);
  203. break;
  204. }
  205. else
  206. {
  207. inputTextListIn.Add(address);
  208. foreach (string textIn in inputTextListIn)
  209. {
  210. if (text.Contains("省"))
  211. {
  212. idInfor.Address = text + address;
  213. _address = true;
  214. LogHelper.InfoLog("Address extraction successful:" + address);
  215. break;
  216. }
  217. }
  218. }
  219. }
  220. }
  221. }
  222. else
  223. {
  224. LogHelper.ErrorLog("Address extraction failed:", string.Empty);
  225. }
  226. // 判断检测结果有哪些
  227. if (_name && _nation && _address)
  228. {
  229. idInfor.NumerStatus = 1;
  230. LogHelper.InfoLog("IDCardInfor extraction successful,NumerStatus: 1");
  231. }
  232. else if (_name || _nation || _address)
  233. {
  234. idInfor.NumerStatus = 2;
  235. LogHelper.InfoLog("Part IDCardInfor extraction successful, NumerStatus: 2");
  236. }
  237. else if (!_name && !_nation && !_address)
  238. {
  239. idInfor.NumerStatus = 3;
  240. LogHelper.InfoLog("Part IDCardInfor extraction successful,NumerStatus: 3");
  241. }
  242. return idInfor;
  243. }
  244. /// <summary>
  245. ///
  246. /// </summary>
  247. /// <param name="input"></param>
  248. /// <returns></returns>
  249. private static string ExtractName(string input, string searchKeyword)
  250. {
  251. // 从输入字符串中提取地址的正则表达式模式,包含数字
  252. string pattern = $"{searchKeyword}([\u4e00-\u9fa5\\d]+)";
  253. Regex regex = new Regex(pattern);
  254. Match match = regex.Match(input);
  255. // 如果匹配成功,返回匹配的地址信息(包含数字);否则返回空字符串
  256. return match.Success ? match.Groups[1].Value : string.Empty;
  257. }
  258. /// <summary>
  259. /// 提取身份证号码
  260. /// </summary>
  261. /// <param name="input"></param>
  262. /// <returns></returns>
  263. private static string ExtractIdNumber(string input)
  264. {
  265. // 从输入字符串中提取身份证号码
  266. string pattern = @"(\d{17}[\dxX])[\dxX]?";
  267. Regex regex = new Regex(pattern);
  268. Match match = regex.Match(input);
  269. return match.Success ? match.Value : string.Empty;
  270. }
  271. /// <summary>
  272. /// 根据身份证号码提取性别 出生
  273. /// </summary>
  274. /// <param name="idNumber"></param>
  275. /// <returns></returns>
  276. private static void ExtractBirthdateAndGender(string idNumber, out string birthdate, out string gender)
  277. {
  278. birthdate = string.Empty;
  279. gender = string.Empty;
  280. if (idNumber.Length == 18)
  281. {
  282. // 提取前17位作为日期和顺序码
  283. string dateAndSeq = idNumber.Substring(6, 8);
  284. // 提取年、月、日
  285. int year = int.Parse(dateAndSeq.Substring(0, 4));
  286. int month = int.Parse(dateAndSeq.Substring(4, 2));
  287. int day = int.Parse(dateAndSeq.Substring(6, 2));
  288. // 构造日期字符串
  289. birthdate = $"{year}年{month}月{day}日";
  290. // 提取性别信息
  291. int genderCode = int.Parse(idNumber.Substring(16, 1));
  292. gender = genderCode % 2 == 0 ? "女" : "男";
  293. }
  294. }
  295. }
  296. }