cooper.sun
/
Algorithm-Repo


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
							using System;
using System.Collections.Generic;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System.Runtime.InteropServices;
using System.IO;
using System.Runtime.InteropServices;

namespace YOLODetectProcessLib
{
    public abstract class InferNetOnnxDetectBasicYolov8 : IInferenceNetwork
    {
        #region dll import

        public enum YoloType
        {
            yolov5,
            yolov8
        }

        /// <summary>
        /// cpp dll 中筛选轮廓的参数的struct
        /// </summary>
        [StructLayout(LayoutKind.Sequential, Pack = 16)]
        public struct YoloDetectInput
        {
            public YoloType YoloType; // yolo后处理的类型，目前支持yolov5和yolov8
            public IntPtr Alldetectresult; // 输入的所有结果：(batch_size, allboxesnum, 5+classesnum) float32型
            public IntPtr Outputresult; // (batch_size,maxdet,6)    float32型
            public IntPtr Errormsg; // 错误信息
            public float Confthres; // box框的置信度，默认值0.6
            //默认的confthres是box的置信度，添加一个类别的置信度，之前都是默认一致的
            public float Clsconfthres; // 类别的置信度，默认值0.5
            public int Batchsize; // batchsize，默认值1
            public int Allboxesnum; //输入的所有boxes数量
            public int Classesnum; //类别数量，去除背景类
            public int Maxdet; //最终输出结果的最大数量,默认是10
            public int Errormsgmaxlen; // 错误信息最大长度（超过该长度则不复制）
            public int Modelinputheight; //模型输入高度，用于去除面积较小的box，默认值320
            public int Modelinputwidth; //模型输入宽度，用于去除面积较小的box，默认值320
            public float Minboxratio; //面积太小的box会删除，box面积占图像面积的百分比，默认是0.01，低于该值的box去除
            //ApplyPostProcessToBBox中，用于单个类别的box筛选
            public int Postprocesstopk; //ApplyPostProcessToBBox中选取的topk数量，默认是5
            public float Ioufltth; //iou滤除参数，默认值0.3
            public float Iosfltth; //ios滤除参数，默认值0.3
            //FindBoxesToUnion中 当两框重叠率高于一定程度，且合并后增加面积并不多，则合并两框
            public float Unioniouth; // union参数，默认1.f
            public float Unioniosth; // union参数，默认1.f
            public float Unionuobth; // union参数，默认1.f
            //ApplyBoxClassFilter中，同一幅图上有多个框，用于不用类别的box筛选
            public float Ioufltthdiffcls; //ApplyBoxClassFilter中, iou滤除参数，默认值0.3
            public float Iosfltthdiffcls; //ApplyBoxClassFilter中, ios滤除参数，默认值0.3
            [MarshalAs(UnmanagedType.I1)]
            public bool Enableioufilt; // ApplyPostProcessToBBox中，是否启用iou滤除,默认为true (当两框IOU大于iouflttht时，舍弃分值低的框)
            [MarshalAs(UnmanagedType.I1)]
            public bool Enableiosfilt; //ApplyPostProcessToBBox中，是否启用ios滤除,默认为false（当两框IOS大于iosfltth时，舍弃分值低的框）
            [MarshalAs(UnmanagedType.I1)]
            public bool Enableunion; //	FindBoxesToUnion中是否开启框合并，默认false
            [MarshalAs(UnmanagedType.I1)]
            public bool Enableioufiltdiffcls; //ApplyBoxClassFilter中, 是否启用iou滤除,默认为true (当两框IOU大于iouflttht时，舍弃分值低的框)
            [MarshalAs(UnmanagedType.I1)]
            public bool Enableiosfiltdiffcls; //ApplyBoxClassFilter中, 是否启用ios滤除,默认为false（当两框IOS大于iosfltth时，舍弃分值低的框）
        };

        [DllImport(@"YOLOOutputPostProcessUtil.dll", CallingConvention = CallingConvention.Cdecl)]
        [return: MarshalAs(UnmanagedType.I1)]
        public static extern bool SelectNeededBoxes(YoloDetectInput input);

        #endregion

        #region protected

        SessionOptions _sessionOption = null;
        protected volatile bool _modelLoaded;
        private InferenceSession _inferSession;
        private object _sessLocker = new object();
        protected int[] _inputTensorShape = null;
        protected int[] _outputTensorShape = null;
        private FixedBufferOnnxValue _inputValue;
        private FixedBufferOnnxValue _outputValue;
        private int _modelInputH = 448;
        private int _modelInputW = 448;
        private int _modelInputC = 3;
        private string[] _inputTensorNames = new[] { "images" };
        private string[] _outputTensorNames = new[] { "output0" };
        protected readonly EnumResizeMode _resizeMode = EnumResizeMode.Warp;
        protected readonly EnumMeanValueType _meanValueType = EnumMeanValueType.None;
        protected readonly float _meanValueR = 0;
        protected readonly float _meanValueG = 0;
        protected readonly float _meanValueB = 0;
        protected readonly EnumScaleValueType _scaleValueType = EnumScaleValueType.ConstantScale;
        protected readonly float _scaleValueR = 255.0f;
        protected readonly float _scaleValueG = 255.0f;
        protected readonly float _scaleValueB = 255.0f;
        protected readonly EnumNormalizationType _normType = EnumNormalizationType.None;
        protected readonly EnumChannelOrder _channelOrder = EnumChannelOrder.RGB;
        protected readonly EnumAxisOrder _axisOrder = EnumAxisOrder.CHW;
        protected YoloType _yolotype = YoloType.yolov8;
        private float[] _detectedResultData;
        protected MoldedImage _moldedImage = null;
        protected char[] _errormsg;
        protected float _confthres = 0.3f;
        protected float _clsconfthres = 0.4f;
        protected int _batchsize = 1;
        protected int _allboxesnum = 16660;
        protected int _classesnum = 4;
        protected int _maxdet = 50;
        protected int _errormsgmaxlen = 256;
        protected int _modelinputheight = 448;
        protected int _modelinputwidth = 448;
        protected float _minboxratio = 0.00001f;
        protected int _postprocesstopk = 20;
        protected bool _enableioufilt = true;
        protected bool _enableiosfilt = true;
        protected float _ioufltth = 0.3f;
        protected float _iosfltth = 0.3f;
        protected float[] _outputresultData;
        protected bool _enableunion = false;
        protected float _unioniouth = 1.0f;
        protected float _unioniosth = 1.0f;
        protected float _unionuobth = 1.0f;
        protected bool _enableioufiltdiffcls = false;
        protected bool _enableiosfiltdiffcls = false;
        protected float _ioufltthdiffcls = 0.01f;
        protected float _iosfltthdiffcls = 0.01f;

        #endregion


        /// <summary>
        /// 哈希值
        /// </summary>
        public abstract string HashCode { get; }

        /// <summary>
        /// 网络名
        /// </summary>
        public abstract string NetworkName { get; }

        /// <summary>
        /// 模型是否已加载
        /// </summary>
        public bool NetworkLoaded => _modelLoaded;

        /// <summary>
        /// 通知订阅者，推理过程中发生了错误
        /// </summary>
        public event EventHandler<ErrorEventArgs> NotifyError;


        /// <summary>
        /// 加载模型
        /// </summary>
        /// <param name="numCPU"></param>
        /// <param name="netDirU"></param>
        public virtual void LoadNetwork(int numCPU, string netDir)
        {
            // 不重复加载
            if (_modelLoaded)
            {
                return;
            }

            byte[] trainedNetwork = InferenceNetworkUtils.ReadNetworkDataFromFile(netDir, NetworkName, HashCode);
            _sessionOption = new SessionOptions();
            _sessionOption.InterOpNumThreads = numCPU;
            _sessionOption.IntraOpNumThreads = numCPU;
            lock (_sessLocker)
            {
                _inferSession = new InferenceSession(trainedNetwork, _sessionOption);
            }

            // 检查模型输入输出tensor名和尺寸
            _inputTensorShape = new int[] { 1, _modelInputC, _modelInputH, _modelInputW };
            _outputTensorShape = new int[] { 1, _classesnum + 4 , _allboxesnum};

            var inputTensorDimensions = new List<int[]> { _inputTensorShape };
            var outputTensorDimensions = new List<int[]> { _outputTensorShape };
            InferenceNetworkUtils.CheckOnnxModel(_inferSession, _inputTensorNames, _outputTensorNames, inputTensorDimensions, outputTensorDimensions);

            // 创建moldedImage
            _moldedImage = new MoldedImage(_modelInputH, _modelInputW, _modelInputC, _resizeMode, _meanValueType,
                _meanValueR, _meanValueG, _meanValueB, _scaleValueType, _scaleValueR, _scaleValueG, _scaleValueB,
                _normType, _channelOrder, _axisOrder);
            _detectedResultData = new float[_allboxesnum * (_classesnum + 4)];
            var tensorInput = new DenseTensor<float>(_moldedImage.DataBuffer, _inputTensorShape);
            var tensorOutput = new DenseTensor<float>(_detectedResultData, _outputTensorShape);
            _inputValue = FixedBufferOnnxValue.CreateFromTensor(tensorInput);
            _outputValue = FixedBufferOnnxValue.CreateFromTensor(tensorOutput);
            _modelLoaded = true;
        }

        /// <summary>
        /// 进行推理
        /// </summary>
        /// <param name="images"></param>
        /// <returns></returns>
        public virtual IDetectedObject[][] Process(InferenceNetworkInputImage[] images)
        {
            try
            {
                int batchSize = images.Length;
                IDetectedObject[][] results = new IDetectedObject[batchSize][];
                for (int ni = 0; ni < batchSize; ni++)
                {
                    _moldedImage.Process(images[ni]);

                    var inputValues = new[] { _inputValue };
                    var outputValues = new[] { _outputValue };
                    lock (_sessLocker)
                    {
                        _inferSession.Run(_inputTensorNames, inputValues, _outputTensorNames, outputValues);
                    }

                    // 后处理
                    var result = DetectionPostProcess((int)(images[ni].ROI.Right - images[ni].ROI.Left),
                        (int)(images[ni].ROI.Bottom - images[ni].ROI.Top),
                        (int)images[ni].ROI.Left,
                        (int)images[ni].ROI.Top);
                    results[ni] = result;
                }

                return results;
            }
            catch (Exception excep)
            {

                return null;
            }
        }

        public IDetectedObject[] DetectionPostProcess(int RoiWidth, int RoiHeight, int RoiLeft, int RoiTop)
        {
            List<IDetectedObject> results = new List<IDetectedObject>();

            YoloDetectInput yoloinput = new YoloDetectInput();

            yoloinput.YoloType = _yolotype;
            GCHandle hdetectresult = GCHandle.Alloc(_detectedResultData, GCHandleType.Pinned);
            IntPtr pdetectdata = hdetectresult.AddrOfPinnedObject();
            yoloinput.Alldetectresult = pdetectdata;
            _outputresultData = new float[_batchsize * _maxdet * 6];
            GCHandle houtputresult = GCHandle.Alloc(_outputresultData, GCHandleType.Pinned);
            IntPtr poutputdata = houtputresult.AddrOfPinnedObject();
            yoloinput.Outputresult = poutputdata;
            _errormsg = new char[_errormsgmaxlen];
            GCHandle herrormsg = GCHandle.Alloc(_errormsg, GCHandleType.Pinned);
            IntPtr perrormsgdata = herrormsg.AddrOfPinnedObject();
            yoloinput.Errormsg = perrormsgdata;
            yoloinput.Confthres = _confthres;
            yoloinput.Clsconfthres = _clsconfthres;
            yoloinput.Batchsize = _batchsize;
            yoloinput.Allboxesnum = _allboxesnum;
            yoloinput.Classesnum = _classesnum;
            yoloinput.Maxdet = _maxdet;
            yoloinput.Errormsgmaxlen = _errormsgmaxlen;
            yoloinput.Modelinputheight = _modelinputheight;
            yoloinput.Modelinputwidth = _modelinputwidth;
            yoloinput.Minboxratio = _minboxratio;
            yoloinput.Postprocesstopk = _postprocesstopk;
            yoloinput.Enableioufilt = _enableioufilt;
            yoloinput.Enableiosfilt = _enableiosfilt;
            yoloinput.Ioufltth = _ioufltth;
            yoloinput.Iosfltth = _iosfltth;
            yoloinput.Enableunion = _enableunion;
            yoloinput.Unioniouth = _unioniouth;
            yoloinput.Unioniosth = _unioniosth;
            yoloinput.Unionuobth = _unionuobth;
            yoloinput.Enableioufiltdiffcls = _enableioufiltdiffcls;
            yoloinput.Enableiosfiltdiffcls = _enableiosfiltdiffcls;
            yoloinput.Ioufltthdiffcls = _ioufltthdiffcls;
            yoloinput.Iosfltthdiffcls = _iosfltthdiffcls;

            bool ret = SelectNeededBoxes(yoloinput);
            if (!ret)
            {
                NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("Failed at calling DetectionPostProcess")));
            }

            float[] outs = _outputresultData;
            //只支持单幅图像
            if (_batchsize != 1)
            {
                NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("batchsize must be 1")));
            }

            for (int ni = 0; ni < _maxdet; ni++)
            {
                int label = (int)outs[ni * 6];
                float conf = outs[ni * 6 + 1];

                float left = outs[ni * 6 + 2] * _modelinputwidth;
                float top = outs[ni * 6 + 3] * _modelinputheight;
                float right = outs[ni * 6 + 4] * _modelinputwidth;
                float bottom = outs[ni * 6 + 5] * _modelinputheight;
                if (conf != 0.0f && (int)right != 0)
                {
                    //// 将box转回原始图像坐标系
                    float scaleW = (float)RoiWidth / _modelInputW;
                    float scaleH = (float)RoiHeight / _modelInputH;
                    // 先将坐标转换到原始图像的ROI范围内
                    float leftRoi = (left - 0) * scaleW;
                    float rightRoi = (right - 0) * scaleW;
                    float topRoi = (top - 0) * scaleH;
                    float bottomRoi = (bottom - 0) * scaleH;

                    // 不要超过原来的ROI范围
                    leftRoi = Math.Min(Math.Max(leftRoi, 0), RoiWidth);
                    rightRoi = Math.Min(Math.Max(rightRoi, 0), RoiWidth);
                    topRoi = Math.Min(Math.Max(topRoi, 0), RoiHeight);
                    bottomRoi = Math.Min(Math.Max(bottomRoi, 0), RoiHeight);
                    // 再转换到原始图像坐标系
                    float leftOrig = leftRoi + RoiLeft;
                    float rightOrig = rightRoi + RoiLeft;
                    float topOrig = topRoi + RoiTop;
                    float bottomOrig = bottomRoi + RoiTop;
                    Rect rect = new Rect((int)leftOrig, (int)topOrig, (int)rightOrig - (int)leftOrig, (int)bottomOrig - (int)topOrig);
                    DetectedObject detectedObject = new DetectedObject(label, conf, rect);
                    results.Add(detectedObject);
                }
            }
            return results.ToArray();
        }

        /// <summary>
        /// 销毁
        /// </summary>
        public virtual void Dispose()
        {
            DoDispose();
            GC.SuppressFinalize(this);
        }

        /// <summary>
        /// 析构
        /// </summary>
        ~InferNetOnnxDetectBasicYolov8()
        {
            DoDispose();
        }

        #region private

        private void DoDispose()
        {
            lock (_sessLocker)
            {
                _inferSession?.Dispose();
                _inferSession = null;
            }
            _inputValue?.Dispose();
            _inputValue = null;
            _outputValue?.Dispose();
            _outputValue = null;
            _moldedImage?.Dispose();
        }

        #endregion
    }
}