using System;
using System.Collections.Generic;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
using System.Runtime.InteropServices;
using System.IO;
using System.Runtime.InteropServices;
namespace YOLODetectProcessLib
{
public abstract class InferNetOnnxDetectBasicYolov8 : IInferenceNetwork
{
#region dll import
public enum YoloType
{
yolov5,
yolov8
}
///
/// cpp dll 中筛选轮廓的参数的struct
///
[StructLayout(LayoutKind.Sequential, Pack = 16)]
public struct YoloDetectInput
{
public YoloType YoloType; // yolo后处理的类型,目前支持yolov5和yolov8
public IntPtr Alldetectresult; // 输入的所有结果:(batch_size, allboxesnum, 5+classesnum) float32型
public IntPtr Outputresult; // (batch_size,maxdet,6) float32型
public IntPtr Errormsg; // 错误信息
public float Confthres; // box框的置信度,默认值0.6
//默认的confthres是box的置信度,添加一个类别的置信度,之前都是默认一致的
public float Clsconfthres; // 类别的置信度,默认值0.5
public int Batchsize; // batchsize,默认值1
public int Allboxesnum; //输入的所有boxes数量
public int Classesnum; //类别数量,去除背景类
public int Maxdet; //最终输出结果的最大数量,默认是10
public int Errormsgmaxlen; // 错误信息最大长度(超过该长度则不复制)
public int Modelinputheight; //模型输入高度,用于去除面积较小的box,默认值320
public int Modelinputwidth; //模型输入宽度,用于去除面积较小的box,默认值320
public float Minboxratio; //面积太小的box会删除,box面积占图像面积的百分比,默认是0.01,低于该值的box去除
//ApplyPostProcessToBBox中,用于单个类别的box筛选
public int Postprocesstopk; //ApplyPostProcessToBBox中选取的topk数量,默认是5
public float Ioufltth; //iou滤除参数,默认值0.3
public float Iosfltth; //ios滤除参数,默认值0.3
//FindBoxesToUnion中 当两框重叠率高于一定程度,且合并后增加面积并不多,则合并两框
public float Unioniouth; // union参数,默认1.f
public float Unioniosth; // union参数,默认1.f
public float Unionuobth; // union参数,默认1.f
//ApplyBoxClassFilter中,同一幅图上有多个框,用于不用类别的box筛选
public float Ioufltthdiffcls; //ApplyBoxClassFilter中, iou滤除参数,默认值0.3
public float Iosfltthdiffcls; //ApplyBoxClassFilter中, ios滤除参数,默认值0.3
[MarshalAs(UnmanagedType.I1)]
public bool Enableioufilt; // ApplyPostProcessToBBox中,是否启用iou滤除,默认为true (当两框IOU大于iouflttht时,舍弃分值低的框)
[MarshalAs(UnmanagedType.I1)]
public bool Enableiosfilt; //ApplyPostProcessToBBox中,是否启用ios滤除,默认为false(当两框IOS大于iosfltth时,舍弃分值低的框)
[MarshalAs(UnmanagedType.I1)]
public bool Enableunion; // FindBoxesToUnion中是否开启框合并,默认false
[MarshalAs(UnmanagedType.I1)]
public bool Enableioufiltdiffcls; //ApplyBoxClassFilter中, 是否启用iou滤除,默认为true (当两框IOU大于iouflttht时,舍弃分值低的框)
[MarshalAs(UnmanagedType.I1)]
public bool Enableiosfiltdiffcls; //ApplyBoxClassFilter中, 是否启用ios滤除,默认为false(当两框IOS大于iosfltth时,舍弃分值低的框)
};
[DllImport(@"YOLOOutputPostProcessUtil.dll", CallingConvention = CallingConvention.Cdecl)]
[return: MarshalAs(UnmanagedType.I1)]
public static extern bool SelectNeededBoxes(YoloDetectInput input);
#endregion
#region protected
SessionOptions _sessionOption = null;
protected volatile bool _modelLoaded;
private InferenceSession _inferSession;
private object _sessLocker = new object();
protected int[] _inputTensorShape = null;
protected int[] _outputTensorShape = null;
private FixedBufferOnnxValue _inputValue;
private FixedBufferOnnxValue _outputValue;
private int _modelInputH = 448;
private int _modelInputW = 448;
private int _modelInputC = 3;
private string[] _inputTensorNames = new[] { "images" };
private string[] _outputTensorNames = new[] { "output0" };
protected readonly EnumResizeMode _resizeMode = EnumResizeMode.Warp;
protected readonly EnumMeanValueType _meanValueType = EnumMeanValueType.None;
protected readonly float _meanValueR = 0;
protected readonly float _meanValueG = 0;
protected readonly float _meanValueB = 0;
protected readonly EnumScaleValueType _scaleValueType = EnumScaleValueType.ConstantScale;
protected readonly float _scaleValueR = 255.0f;
protected readonly float _scaleValueG = 255.0f;
protected readonly float _scaleValueB = 255.0f;
protected readonly EnumNormalizationType _normType = EnumNormalizationType.None;
protected readonly EnumChannelOrder _channelOrder = EnumChannelOrder.RGB;
protected readonly EnumAxisOrder _axisOrder = EnumAxisOrder.CHW;
protected YoloType _yolotype = YoloType.yolov8;
private float[] _detectedResultData;
protected MoldedImage _moldedImage = null;
protected char[] _errormsg;
protected float _confthres = 0.3f;
protected float _clsconfthres = 0.4f;
protected int _batchsize = 1;
protected int _allboxesnum = 16660;
protected int _classesnum = 4;
protected int _maxdet = 50;
protected int _errormsgmaxlen = 256;
protected int _modelinputheight = 448;
protected int _modelinputwidth = 448;
protected float _minboxratio = 0.00001f;
protected int _postprocesstopk = 20;
protected bool _enableioufilt = true;
protected bool _enableiosfilt = true;
protected float _ioufltth = 0.3f;
protected float _iosfltth = 0.3f;
protected float[] _outputresultData;
protected bool _enableunion = false;
protected float _unioniouth = 1.0f;
protected float _unioniosth = 1.0f;
protected float _unionuobth = 1.0f;
protected bool _enableioufiltdiffcls = false;
protected bool _enableiosfiltdiffcls = false;
protected float _ioufltthdiffcls = 0.01f;
protected float _iosfltthdiffcls = 0.01f;
#endregion
///
/// 哈希值
///
public abstract string HashCode { get; }
///
/// 网络名
///
public abstract string NetworkName { get; }
///
/// 模型是否已加载
///
public bool NetworkLoaded => _modelLoaded;
///
/// 通知订阅者,推理过程中发生了错误
///
public event EventHandler NotifyError;
///
/// 加载模型
///
///
///
public virtual void LoadNetwork(int numCPU, string netDir)
{
// 不重复加载
if (_modelLoaded)
{
return;
}
byte[] trainedNetwork = InferenceNetworkUtils.ReadNetworkDataFromFile(netDir, NetworkName, HashCode);
_sessionOption = new SessionOptions();
_sessionOption.InterOpNumThreads = numCPU;
_sessionOption.IntraOpNumThreads = numCPU;
lock (_sessLocker)
{
_inferSession = new InferenceSession(trainedNetwork, _sessionOption);
}
// 检查模型输入输出tensor名和尺寸
_inputTensorShape = new int[] { 1, _modelInputC, _modelInputH, _modelInputW };
_outputTensorShape = new int[] { 1, _classesnum + 4 , _allboxesnum};
var inputTensorDimensions = new List { _inputTensorShape };
var outputTensorDimensions = new List { _outputTensorShape };
InferenceNetworkUtils.CheckOnnxModel(_inferSession, _inputTensorNames, _outputTensorNames, inputTensorDimensions, outputTensorDimensions);
// 创建moldedImage
_moldedImage = new MoldedImage(_modelInputH, _modelInputW, _modelInputC, _resizeMode, _meanValueType,
_meanValueR, _meanValueG, _meanValueB, _scaleValueType, _scaleValueR, _scaleValueG, _scaleValueB,
_normType, _channelOrder, _axisOrder);
_detectedResultData = new float[_allboxesnum * (_classesnum + 4)];
var tensorInput = new DenseTensor(_moldedImage.DataBuffer, _inputTensorShape);
var tensorOutput = new DenseTensor(_detectedResultData, _outputTensorShape);
_inputValue = FixedBufferOnnxValue.CreateFromTensor(tensorInput);
_outputValue = FixedBufferOnnxValue.CreateFromTensor(tensorOutput);
_modelLoaded = true;
}
///
/// 进行推理
///
///
///
public virtual IDetectedObject[][] Process(InferenceNetworkInputImage[] images)
{
try
{
int batchSize = images.Length;
IDetectedObject[][] results = new IDetectedObject[batchSize][];
for (int ni = 0; ni < batchSize; ni++)
{
_moldedImage.Process(images[ni]);
var inputValues = new[] { _inputValue };
var outputValues = new[] { _outputValue };
lock (_sessLocker)
{
_inferSession.Run(_inputTensorNames, inputValues, _outputTensorNames, outputValues);
}
// 后处理
var result = DetectionPostProcess((int)(images[ni].ROI.Right - images[ni].ROI.Left),
(int)(images[ni].ROI.Bottom - images[ni].ROI.Top),
(int)images[ni].ROI.Left,
(int)images[ni].ROI.Top);
results[ni] = result;
}
return results;
}
catch (Exception excep)
{
return null;
}
}
public IDetectedObject[] DetectionPostProcess(int RoiWidth, int RoiHeight, int RoiLeft, int RoiTop)
{
List results = new List();
YoloDetectInput yoloinput = new YoloDetectInput();
yoloinput.YoloType = _yolotype;
GCHandle hdetectresult = GCHandle.Alloc(_detectedResultData, GCHandleType.Pinned);
IntPtr pdetectdata = hdetectresult.AddrOfPinnedObject();
yoloinput.Alldetectresult = pdetectdata;
_outputresultData = new float[_batchsize * _maxdet * 6];
GCHandle houtputresult = GCHandle.Alloc(_outputresultData, GCHandleType.Pinned);
IntPtr poutputdata = houtputresult.AddrOfPinnedObject();
yoloinput.Outputresult = poutputdata;
_errormsg = new char[_errormsgmaxlen];
GCHandle herrormsg = GCHandle.Alloc(_errormsg, GCHandleType.Pinned);
IntPtr perrormsgdata = herrormsg.AddrOfPinnedObject();
yoloinput.Errormsg = perrormsgdata;
yoloinput.Confthres = _confthres;
yoloinput.Clsconfthres = _clsconfthres;
yoloinput.Batchsize = _batchsize;
yoloinput.Allboxesnum = _allboxesnum;
yoloinput.Classesnum = _classesnum;
yoloinput.Maxdet = _maxdet;
yoloinput.Errormsgmaxlen = _errormsgmaxlen;
yoloinput.Modelinputheight = _modelinputheight;
yoloinput.Modelinputwidth = _modelinputwidth;
yoloinput.Minboxratio = _minboxratio;
yoloinput.Postprocesstopk = _postprocesstopk;
yoloinput.Enableioufilt = _enableioufilt;
yoloinput.Enableiosfilt = _enableiosfilt;
yoloinput.Ioufltth = _ioufltth;
yoloinput.Iosfltth = _iosfltth;
yoloinput.Enableunion = _enableunion;
yoloinput.Unioniouth = _unioniouth;
yoloinput.Unioniosth = _unioniosth;
yoloinput.Unionuobth = _unionuobth;
yoloinput.Enableioufiltdiffcls = _enableioufiltdiffcls;
yoloinput.Enableiosfiltdiffcls = _enableiosfiltdiffcls;
yoloinput.Ioufltthdiffcls = _ioufltthdiffcls;
yoloinput.Iosfltthdiffcls = _iosfltthdiffcls;
bool ret = SelectNeededBoxes(yoloinput);
if (!ret)
{
NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("Failed at calling DetectionPostProcess")));
}
float[] outs = _outputresultData;
//只支持单幅图像
if (_batchsize != 1)
{
NotifyError?.Invoke(this, new ErrorEventArgs(new Exception("batchsize must be 1")));
}
for (int ni = 0; ni < _maxdet; ni++)
{
int label = (int)outs[ni * 6];
float conf = outs[ni * 6 + 1];
float left = outs[ni * 6 + 2] * _modelinputwidth;
float top = outs[ni * 6 + 3] * _modelinputheight;
float right = outs[ni * 6 + 4] * _modelinputwidth;
float bottom = outs[ni * 6 + 5] * _modelinputheight;
if (conf != 0.0f && (int)right != 0)
{
//// 将box转回原始图像坐标系
float scaleW = (float)RoiWidth / _modelInputW;
float scaleH = (float)RoiHeight / _modelInputH;
// 先将坐标转换到原始图像的ROI范围内
float leftRoi = (left - 0) * scaleW;
float rightRoi = (right - 0) * scaleW;
float topRoi = (top - 0) * scaleH;
float bottomRoi = (bottom - 0) * scaleH;
// 不要超过原来的ROI范围
leftRoi = Math.Min(Math.Max(leftRoi, 0), RoiWidth);
rightRoi = Math.Min(Math.Max(rightRoi, 0), RoiWidth);
topRoi = Math.Min(Math.Max(topRoi, 0), RoiHeight);
bottomRoi = Math.Min(Math.Max(bottomRoi, 0), RoiHeight);
// 再转换到原始图像坐标系
float leftOrig = leftRoi + RoiLeft;
float rightOrig = rightRoi + RoiLeft;
float topOrig = topRoi + RoiTop;
float bottomOrig = bottomRoi + RoiTop;
Rect rect = new Rect((int)leftOrig, (int)topOrig, (int)rightOrig - (int)leftOrig, (int)bottomOrig - (int)topOrig);
DetectedObject detectedObject = new DetectedObject(label, conf, rect);
results.Add(detectedObject);
}
}
return results.ToArray();
}
///
/// 销毁
///
public virtual void Dispose()
{
DoDispose();
GC.SuppressFinalize(this);
}
///
/// 析构
///
~InferNetOnnxDetectBasicYolov8()
{
DoDispose();
}
#region private
private void DoDispose()
{
lock (_sessLocker)
{
_inferSession?.Dispose();
_inferSession = null;
}
_inputValue?.Dispose();
_inputValue = null;
_outputValue?.Dispose();
_outputValue = null;
_moldedImage?.Dispose();
}
#endregion
}
}