using System; using System.Collections.Generic; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; using System.Runtime.InteropServices; using System.IO; using System.Runtime.InteropServices; namespace YOLODetectProcessLib { public abstract class InferNetOnnxDetectBasicYolov8 : IInferenceNetwork { #region dll import public enum YoloType { yolov5, yolov8 } /// /// cpp dll 中筛选轮廓的参数的struct /// [StructLayout(LayoutKind.Sequential, Pack = 16)] public struct YoloDetectInput { public YoloType YoloType; // yolo后处理的类型,目前支持yolov5和yolov8 public IntPtr Alldetectresult; // 输入的所有结果:(batch_size, allboxesnum, 5+classesnum) float32型 public IntPtr Outputresult; // (batch_size,maxdet,6) float32型 public IntPtr Errormsg; // 错误信息 public float Confthres; // box框的置信度,默认值0.6 //默认的confthres是box的置信度,添加一个类别的置信度,之前都是默认一致的 public float Clsconfthres; // 类别的置信度,默认值0.5 public int Batchsize; // batchsize,默认值1 public int Allboxesnum; //输入的所有boxes数量 public int Classesnum; //类别数量,去除背景类 public int Maxdet; //最终输出结果的最大数量,默认是10 public int Errormsgmaxlen; // 错误信息最大长度(超过该长度则不复制) public int Modelinputheight; //模型输入高度,用于去除面积较小的box,默认值320 public int Modelinputwidth; //模型输入宽度,用于去除面积较小的box,默认值320 public float Minboxratio; //面积太小的box会删除,box面积占图像面积的百分比,默认是0.01,低于该值的box去除 //ApplyPostProcessToBBox中,用于单个类别的box筛选 public int Postprocesstopk; //ApplyPostProcessToBBox中选取的topk数量,默认是5 public float Ioufltth; //iou滤除参数,默认值0.3 public float Iosfltth; //ios滤除参数,默认值0.3 //FindBoxesToUnion中 当两框重叠率高于一定程度,且合并后增加面积并不多,则合并两框 public float Unioniouth; // union参数,默认1.f public float Unioniosth; // union参数,默认1.f public float Unionuobth; // union参数,默认1.f //ApplyBoxClassFilter中,同一幅图上有多个框,用于不用类别的box筛选 public float Ioufltthdiffcls; //ApplyBoxClassFilter中, iou滤除参数,默认值0.3 public float Iosfltthdiffcls; //ApplyBoxClassFilter中, ios滤除参数,默认值0.3 [MarshalAs(UnmanagedType.I1)] public bool Enableioufilt; // ApplyPostProcessToBBox中,是否启用iou滤除,默认为true (当两框IOU大于iouflttht时,舍弃分值低的框) [MarshalAs(UnmanagedType.I1)] public bool Enableiosfilt; //ApplyPostProcessToBBox中,是否启用ios滤除,默认为false(当两框IOS大于iosfltth时,舍弃分值低的框) [MarshalAs(UnmanagedType.I1)] public bool Enableunion; // FindBoxesToUnion中是否开启框合并,默认false [MarshalAs(UnmanagedType.I1)] public bool Enableioufiltdiffcls; //ApplyBoxClassFilter中, 是否启用iou滤除,默认为true (当两框IOU大于iouflttht时,舍弃分值低的框) [MarshalAs(UnmanagedType.I1)] public bool Enableiosfiltdiffcls; //ApplyBoxClassFilter中, 是否启用ios滤除,默认为false(当两框IOS大于iosfltth时,舍弃分值低的框) }; [DllImport(@"YOLOOutputPostProcessUtil.dll", CallingConvention = CallingConvention.Cdecl)] [return: MarshalAs(UnmanagedType.I1)] public static extern bool SelectNeededBoxes(YoloDetectInput input); #endregion #region protected SessionOptions _sessionOption = null; protected volatile bool _modelLoaded; private InferenceSession _inferSession; private object _sessLocker = new object(); protected int[] _inputTensorShape = null; protected int[] _outputTensorShape = null; private FixedBufferOnnxValue _inputValue; private FixedBufferOnnxValue _outputValue; private int _modelInputH = 448; private int _modelInputW = 448; private int _modelInputC = 3; private string[] _inputTensorNames = new[] { "images" }; private string[] _outputTensorNames = new[] { "output0" }; protected readonly EnumResizeMode _resizeMode = EnumResizeMode.Warp; protected readonly EnumMeanValueType _meanValueType = EnumMeanValueType.None; protected readonly float _meanValueR = 0; protected readonly float _meanValueG = 0; protected readonly float _meanValueB = 0; protected readonly EnumScaleValueType _scaleValueType = EnumScaleValueType.ConstantScale; protected readonly float _scaleValueR = 255.0f; protected readonly float _scaleValueG = 255.0f; protected readonly float _scaleValueB = 255.0f; protected readonly EnumNormalizationType _normType = EnumNormalizationType.None; protected readonly EnumChannelOrder _channelOrder = EnumChannelOrder.RGB; protected readonly EnumAxisOrder _axisOrder = EnumAxisOrder.CHW; protected YoloType _yolotype = YoloType.yolov8; private float[] _detectedResultData; protected MoldedImage _moldedImage = null; protected char[] _errormsg; protected float _confthres = 0.3f; protected float _clsconfthres = 0.4f; protected int _batchsize = 1; protected int _allboxesnum = 16660; protected int _classesnum = 4; protected int _maxdet = 50; protected int _errormsgmaxlen = 256; protected int _modelinputheight = 448; protected int _modelinputwidth = 448; protected float _minboxratio = 0.00001f; protected int _postprocesstopk = 20; protected bool _enableioufilt = true; protected bool _enableiosfilt = true; protected float _ioufltth = 0.3f; protected float _iosfltth = 0.3f; protected float[] _outputresultData; protected bool _enableunion = false; protected float _unioniouth = 1.0f; protected float _unioniosth = 1.0f; protected float _unionuobth = 1.0f; protected bool _enableioufiltdiffcls = false; protected bool _enableiosfiltdiffcls = false; protected float _ioufltthdiffcls = 0.01f; protected float _iosfltthdiffcls = 0.01f; #endregion /// /// 哈希值 /// public abstract string HashCode { get; } /// /// 网络名 /// public abstract string NetworkName { get; } /// /// 模型是否已加载 /// public bool NetworkLoaded => _modelLoaded; /// /// 通知订阅者,推理过程中发生了错误 /// public event EventHandler NotifyError; /// /// 加载模型 /// /// /// public virtual void LoadNetwork(int numCPU, string netDir) { // 不重复加载 if (_modelLoaded) { return; } byte[] trainedNetwork = InferenceNetworkUtils.ReadNetworkDataFromFile(netDir, NetworkName, HashCode); _sessionOption = new SessionOptions(); _sessionOption.InterOpNumThreads = numCPU; _sessionOption.IntraOpNumThreads = numCPU; lock (_sessLocker) { _inferSession = new InferenceSession(trainedNetwork, _sessionOption); } // 检查模型输入输出tensor名和尺寸 _inputTensorShape = new int[] { 1, _modelInputC, _modelInputH, _modelInputW }; _outputTensorShape = new int[] { 1, _classesnum + 4 , _allboxesnum}; var inputTensorDimensions = new List { _inputTensorShape }; var outputTensorDimensions = new List { _outputTensorShape }; InferenceNetworkUtils.CheckOnnxModel(_inferSession, _inputTensorNames, _outputTensorNames, inputTensorDimensions, outputTensorDimensions); // 创建moldedImage _moldedImage = new MoldedImage(_modelInputH, _modelInputW, _modelInputC, _resizeMode, _meanValueType, _meanValueR, _meanValueG, _meanValueB, _scaleValueType, _scaleValueR, _scaleValueG, _scaleValueB, _normType, _channelOrder, _axisOrder); _detectedResultData = new float[_allboxesnum * (_classesnum + 4)]; var tensorInput = new DenseTensor(_moldedImage.DataBuffer, _inputTensorShape); var tensorOutput = new DenseTensor(_detectedResultData, _outputTensorShape); _inputValue = FixedBufferOnnxValue.CreateFromTensor(tensorInput); _outputValue = FixedBufferOnnxValue.CreateFromTensor(tensorOutput); _modelLoaded = true; } /// /// 进行推理 /// /// /// public virtual IDetectedObject[][] Process(InferenceNetworkInputImage[] images) { try { int batchSize = images.Length; IDetectedObject[][] results = new IDetectedObject[batchSize][]; for (int ni = 0; ni < batchSize; ni++) { _moldedImage.Process(images[ni]); var inputValues = new[] { _inputValue }; var outputValues = new[] { _outputValue }; lock (_sessLocker) { _inferSession.Run(_inputTensorNames, inputValues, _outputTensorNames, outputValues); } // 后处理 var result = DetectionPostProcess((int)(images[ni].ROI.Right - images[ni].ROI.Left), (int)(images[ni].ROI.Bottom - images[ni].ROI.Top), (int)images[ni].ROI.Left, (int)images[ni].ROI.Top); results[ni] = result; } return results; } catch (Exception excep) { return null; } } public IDetectedObject[] DetectionPostProcess(int RoiWidth, int RoiHeight, int RoiLeft, int RoiTop) { List results = new List(); YoloDetectInput yoloinput = new YoloDetectInput(); yoloinput.YoloType = _yolotype; GCHandle hdetectresult = GCHandle.Alloc(_detectedResultData, GCHandleType.Pinned); IntPtr pdetectdata = hdetectresult.AddrOfPinnedObject(); yoloinput.Alldetectresult = pdetectdata; _outputresultData = new float[_batchsize * _maxdet * 6]; GCHandle houtputresult = GCHandle.Alloc(_outputresultData, GCHandleType.Pinned); IntPtr poutputdata = houtputresult.AddrOfPinnedObject(); yoloinput.Outputresult = poutputdata; _errormsg = new char[_errormsgmaxlen]; GCHandle herrormsg = GCHandle.Alloc(_errormsg, GCHandleType.Pinned); IntPtr perrormsgdata = herrormsg.AddrOfPinnedObject(); yoloinput.Errormsg = perrormsgdata; yoloinput.Confthres = _confthres; yoloinput.Clsconfthres = _clsconfthres; yoloinput.Batchsize = _batchsize; yoloinput.Allboxesnum = _allboxesnum; yoloinput.Classesnum = _classesnum; yoloinput.Maxdet = _maxdet; yoloinput.Errormsgmaxlen = _errormsgmaxlen; yoloinput.Modelinputheight = _modelinputheight; yoloinput.Modelinputwidth = _modelinputwidth; yoloinput.Minboxratio = _minboxratio; yoloinput.Postprocesstopk = _postprocesstopk; yoloinput.Enableioufilt = _enableioufilt; yoloinput.Enableiosfilt = _enableiosfilt; yoloinput.Ioufltth = _ioufltth; yoloinput.Iosfltth = _iosfltth; yoloinput.Enableunion = _enableunion; yoloinput.Unioniouth = _unioniouth; yoloinput.Unioniosth = _unioniosth; yoloinput.Unionuobth = _unionuobth; yoloinput.Enableioufiltdiffcls = _enableioufiltdiffcls; yoloinput.Enableiosfiltdiffcls = _enableiosfiltdiffcls; yoloinput.Ioufltthdiffcls = _ioufltthdiffcls; yoloinput.Iosfltthdiffcls = _iosfltthdiffcls; bool ret = SelectNeededBoxes(yoloinput); if (!ret) { NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("Failed at calling DetectionPostProcess"))); } float[] outs = _outputresultData; //只支持单幅图像 if (_batchsize != 1) { NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("batchsize must be 1"))); } for (int ni = 0; ni < _maxdet; ni++) { int label = (int)outs[ni * 6]; float conf = outs[ni * 6 + 1]; float left = outs[ni * 6 + 2] * _modelinputwidth; float top = outs[ni * 6 + 3] * _modelinputheight; float right = outs[ni * 6 + 4] * _modelinputwidth; float bottom = outs[ni * 6 + 5] * _modelinputheight; if (conf != 0.0f && (int)right != 0) { //// 将box转回原始图像坐标系 float scaleW = (float)RoiWidth / _modelInputW; float scaleH = (float)RoiHeight / _modelInputH; // 先将坐标转换到原始图像的ROI范围内 float leftRoi = (left - 0) * scaleW; float rightRoi = (right - 0) * scaleW; float topRoi = (top - 0) * scaleH; float bottomRoi = (bottom - 0) * scaleH; // 不要超过原来的ROI范围 leftRoi = Math.Min(Math.Max(leftRoi, 0), RoiWidth); rightRoi = Math.Min(Math.Max(rightRoi, 0), RoiWidth); topRoi = Math.Min(Math.Max(topRoi, 0), RoiHeight); bottomRoi = Math.Min(Math.Max(bottomRoi, 0), RoiHeight); // 再转换到原始图像坐标系 float leftOrig = leftRoi + RoiLeft; float rightOrig = rightRoi + RoiLeft; float topOrig = topRoi + RoiTop; float bottomOrig = bottomRoi + RoiTop; Rect rect = new Rect((int)leftOrig, (int)topOrig, (int)rightOrig - (int)leftOrig, (int)bottomOrig - (int)topOrig); DetectedObject detectedObject = new DetectedObject(label, conf, rect); results.Add(detectedObject); } } return results.ToArray(); } /// /// 销毁 /// public virtual void Dispose() { DoDispose(); GC.SuppressFinalize(this); } /// /// 析构 /// ~InferNetOnnxDetectBasicYolov8() { DoDispose(); } #region private private void DoDispose() { lock (_sessLocker) { _inferSession?.Dispose(); _inferSession = null; } _inputValue?.Dispose(); _inputValue = null; _outputValue?.Dispose(); _outputValue = null; _moldedImage?.Dispose(); } #endregion } }