123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333 |
- using AI.Common.Interface;
- using IDCardRecognitionLibs.IDCardRecognitionCore;
- using IDCardRecognitionLibs.InferNet;
- using IDCardRecognitionLibs.OCRProcessCore;
- using IDCardRecognitionLibs.OCRProcessCore.PaddleOCR;
- using System.Net;
- using System.Text.RegularExpressions;
- using System.Xml.Linq;
- namespace IDCardRecognitionLibs
- {
- public class IDCardAnalyser : IDisposable
- {
- #region public
- private string _netDir = "";
- private IOCRProcessCore? _ocrProcessCore;
- private bool _name = false; // 姓名
- private bool _idNumber = false; // 身份证号码
- private bool _nation = false; //民族
- private bool _address = false; // 地址
- #endregion
- #region public
- /// <summary>
- /// 推理单张图
- /// </summary>
- /// <param name="image"></param>
- /// <returns></returns>
- public IDCardRecogResult EvaluateOneImage(RawImage image)
- {
- try
- {
- // ocr处理(检测、文字提取)
- OCRResult ocrResult = _ocrProcessCore.EvaluateOneImage(image);
- // 整理结果,返回给用户必要的信息
- IDCardRecogResult recogResult = ProcessIdCardInformation(ocrResult);
- return recogResult;
- }
- catch (Exception excep)
- {
- throw new Exception(excep.Message);
- }
- }
- #endregion
- #region constructor
- /// <summary>
- /// 构造函数
- /// </summary>
- /// <param name="dir">模型地址</param>
- /// <param name="config">模型配置项</param>
- /// <param name="parameter">参数</param>
- public IDCardAnalyser(string dir, EnumOCRModel modelType)
- {
- // 根据模型类型选择要实例化哪个类
- _netDir = dir;
- if (modelType == EnumOCRModel.PaddleOCR)
- {
- // 实例化PaddlerOcr
- _ocrProcessCore = new PaddleOCRProcessCore();
- _ocrProcessCore?.LoadModel(_netDir);
- }
- else
- {
- throw new Exception("Other models are not supported yet!");
- }
- }
- #endregion
- public void Dispose()
- {
- if (_ocrProcessCore!= null)
- {
- _ocrProcessCore?.Dispose();
- _ocrProcessCore = null;
- }
- }
- private IDCardRecogResult ProcessIdCardInformation(OCRResult ocrResult)
- {
- IDCardRecogResult idInfor= new IDCardRecogResult();
- idInfor.TimeSpan = ocrResult.TimeAllProcessOneImage;
- string inputTextIn = string.Empty;
- List<string> inputTextList = new List<string> { };
- foreach (var item in ocrResult.TextBlocks)
- {
- inputTextList.Add(item.Text);
- inputTextIn+= item.Text;
- }
- string pattern = "[\\p{P}]";
- string textInforAll = Regex.Replace(inputTextIn, pattern, "");
- // 判断身份证号码是否存在
- string idNumber = ExtractIdNumber(textInforAll); // 提取身份证号码
- if (string.IsNullOrEmpty(idNumber))
- {
- LogHelper.ErrorLog("IdNumber extraction failed:", string.Empty);
- idInfor.NumerStatus = -1;
- return idInfor;
- }
- _idNumber = true;
- LogHelper.InfoLog("IdNumber extraction successed:" + idNumber);
- idInfor.IdNumber = idNumber;
- textInforAll = textInforAll.Replace(idNumber, "");
- string birthdate, gender;
- ExtractBirthdateAndGender(idNumber, out birthdate, out gender);
- idInfor.Gender = gender;
- idInfor.Birthdate = birthdate;
- LogHelper.InfoLog("Gender extraction successed:" + gender);
- LogHelper.InfoLog("Birthdate extraction successed:" + birthdate);
- List<string> inputTextListIn = new List<string>(inputTextList);
- //inputTextListIn.RemoveAll(text => text.Contains("性别") || text.Contains(idInfor.Gender));
- inputTextListIn.RemoveAll(text => text.Contains("出生") || text.Contains(idInfor.Birthdate));
- inputTextListIn.RemoveAll(text => text.Contains(idNumber));
- string address = string.Empty;
- string name = string.Empty;
- string nation = string.Empty;
- string[] keywordsAll = { "姓名", "民族", "住址" };
- string[] keywordsName = { "姓名" };
- string[] keywordsNation = { "民族" };
- string[] keywordsAddress = { "住址" };
- if (keywordsName.Any(keyword => inputTextIn.Contains(keyword)))
- {
- foreach (string text in inputTextListIn)
- {
- if (text.Contains("姓名"))
- {
- name = ExtractName(text, "姓名");
- if (!string.IsNullOrEmpty(name))
- {
- idInfor.Name = name;
- textInforAll = textInforAll.Replace("姓名", "").Replace(idInfor.Name, "");
- inputTextListIn.RemoveAll(text => text.Contains("姓名") || text.Contains(idInfor.Name));
- _name = true;
- LogHelper.InfoLog("Name extraction successful:" + name);
- break;
- }
- else
- {
- LogHelper.ErrorLog("Name extraction failed:", string.Empty);
- _name = false;
- break;
- }
- }
- }
- }
- else
- {
- LogHelper.ErrorLog("Name extraction failed:", string.Empty);
- _name = false;
- }
- // 民族
- if (keywordsNation.Any(keyword => inputTextIn.Contains(keyword)))
- {
- foreach (string text in inputTextListIn)
- {
- if (text.Contains("民族"))
- {
- nation = ExtractName(text, "民族");
- if (!string.IsNullOrEmpty(nation))
- {
- idInfor.Nation = nation;
- textInforAll = textInforAll.Replace("民族", "").Replace(idInfor.Nation, "");
- inputTextListIn.RemoveAll(text => text.Contains("民族") || text.Contains(idInfor.Nation));
- _nation = true;
- LogHelper.InfoLog("Nation extraction successful:" + nation);
- break;
- }
- else
- {
- LogHelper.ErrorLog("Nation extraction failed:", string.Empty);
- _nation = false;
- break;
- }
- }
- }
- }
- else
- {
- LogHelper.ErrorLog("Nation extraction failed:", string.Empty);
- _nation = false;
- }
- //inputTextListIn.RemoveAll(text => text.Contains("性别") || text.Contains(idInfor.Gender));
- // 住址
- if (keywordsAddress.Any(keyword => inputTextIn.Contains(keyword)))
- {
- foreach (string text in inputTextListIn)
- {
- if (text.Contains("住址"))
- {
- if (!textInforAll.Contains("公民身份号码"))
- {
- textInforAll = inputTextIn.Replace(idNumber, "公民身份号码");
- }
- var patternAdd = @"住址([\s\S]*?)公民身份号码";
- Match match = Regex.Match(textInforAll, patternAdd);
- address = match.Groups[1].Value.Trim().Replace(" ", "");
- // 判断地址是否全
- if (string.IsNullOrEmpty(address))
- {
- _address = false;
- LogHelper.ErrorLog("Address extraction failed:", string.Empty);
- }
- else if (address.Contains("省") || address.Contains("北京市") || address.Contains("上海市") || address.Contains("天津市") || address.Contains("自治区") || address.Contains("重庆市"))
- {
- idInfor.Address = address;
- _address = true;
- LogHelper.InfoLog("Address extraction successful:" + address);
- break;
- }
- else
- {
- inputTextListIn.Add(address);
- foreach (string textIn in inputTextListIn)
- {
- if (text.Contains("省"))
- {
- idInfor.Address = text + address;
- _address = true;
- LogHelper.InfoLog("Address extraction successful:" + address);
- break;
- }
- }
- }
- }
- }
- }
- else
- {
- LogHelper.ErrorLog("Address extraction failed:", string.Empty);
- }
- // 判断检测结果有哪些
- if (_name && _nation && _address)
- {
- idInfor.NumerStatus = 1;
- LogHelper.InfoLog("IDCardInfor extraction successful,NumerStatus: 1");
- }
- else if (_name || _nation || _address)
- {
- idInfor.NumerStatus = 2;
- LogHelper.InfoLog("Part IDCardInfor extraction successful, NumerStatus: 2");
- }
- else if (!_name && !_nation && !_address)
- {
- idInfor.NumerStatus = 3;
- LogHelper.InfoLog("Part IDCardInfor extraction successful,NumerStatus: 3");
- }
- return idInfor;
- }
- /// <summary>
- ///
- /// </summary>
- /// <param name="input"></param>
- /// <returns></returns>
- private static string ExtractName(string input, string searchKeyword)
- {
- // 从输入字符串中提取地址的正则表达式模式,包含数字
- string pattern = $"{searchKeyword}([\u4e00-\u9fa5\\d]+)";
- Regex regex = new Regex(pattern);
- Match match = regex.Match(input);
- // 如果匹配成功,返回匹配的地址信息(包含数字);否则返回空字符串
- return match.Success ? match.Groups[1].Value : string.Empty;
- }
- /// <summary>
- /// 提取身份证号码
- /// </summary>
- /// <param name="input"></param>
- /// <returns></returns>
- private static string ExtractIdNumber(string input)
- {
- // 从输入字符串中提取身份证号码
- string pattern = @"(\d{17}[\dxX])[\dxX]?";
- Regex regex = new Regex(pattern);
- Match match = regex.Match(input);
- return match.Success ? match.Value : string.Empty;
- }
- /// <summary>
- /// 根据身份证号码提取性别 出生
- /// </summary>
- /// <param name="idNumber"></param>
- /// <returns></returns>
- private static void ExtractBirthdateAndGender(string idNumber, out string birthdate, out string gender)
- {
- birthdate = string.Empty;
- gender = string.Empty;
- if (idNumber.Length == 18)
- {
- // 提取前17位作为日期和顺序码
- string dateAndSeq = idNumber.Substring(6, 8);
- // 提取年、月、日
- int year = int.Parse(dateAndSeq.Substring(0, 4));
- int month = int.Parse(dateAndSeq.Substring(4, 2));
- int day = int.Parse(dateAndSeq.Substring(6, 2));
- // 构造日期字符串
- birthdate = $"{year}年{month}月{day}日";
- // 提取性别信息
- int genderCode = int.Parse(idNumber.Substring(16, 1));
- gender = genderCode % 2 == 0 ? "女" : "男";
- }
- }
- }
- }
|