如何在 ML.Net 中为 yolo v3 或 v4 onnx 模型实现 post-processing
How to impelement post-proccesing for yolo v3 or v4 onnx models in ML.Net
我遵循 this microsoft tutorial and there was no problem. but i wanted to change model to yolo v3 or v4. I get the YOLOv4 onnx model from onnx/models 并且能够获得 yolov4 onnx 模型的所有三个浮点输出数组,但问题在于 post-processing,我无法从这些输出中获得正确的边界框.
我更改了所有内容,例如锚点、步幅、输出网格大小、一些函数和...在 Microsoft 教程 src 代码中,以与 yolov4 兼容。但我无法得到正确的结果。
我用 python implementation 检查了我所有的代码,但我不知道问题出在哪里。
有没有人有 link 或知道如何使用 ML.Net
在 c# 中实现 yolo v3 或 v4 onnx 模型
任何帮助将不胜感激
我认为直接将微软的教程从 YOLO v2 移植到 v3 是不可能的,因为它依赖于每个模型的输入和输出。
附带说明一下,我在 this GitHub repo: 'YOLOv3MLNet' 中移植了另一个 YOLO v3 模型到 ML.Net。它包含一个功能齐全的 ML.Net 管道。
我还在此处提供了此答案的代码:
回到您的模型,我将以 YOLO v3(在 onnx/models 存储库中可用)为例。可以找到模型的一个很好的解释 here。
第一个建议是使用 Netron 查看模型。这样做,您将看到输入层和输出层。他们还在 onnx/models 文档中描述了这些层。
Netron's yolov3-10 screenshot
(我在 Netron 中看到这个特定的 YOLO v3 模型也通过 Non-maximum 抑制步骤做了一些 post-processing。)
- 输入图层名称:
input_1
、image_shape
- 输出图层名称:
yolonms_layer_1/ExpandDims_1:0
、yolonms_layer_1/ExpandDims_3:0
、yolonms_layer_1/concat_2:0
根据模型文档,输入形状为:
Resized image (1x3x416x416) Original image size (1x2) which is [image.size['1], image.size[0]]
我们首先需要定义ML.Net输入和输出类如下:
public class YoloV3BitmapData
{
[ColumnName("bitmap")]
[ImageType(416, 416)]
public Bitmap Image { get; set; }
[ColumnName("width")]
public float ImageWidth => Image.Width;
[ColumnName("height")]
public float ImageHeight => Image.Height;
}
public class YoloV3Prediction
{
/// <summary>
/// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647.
/// </summary>
public const int YoloV3BboxPredictionCount = 10_647;
/// <summary>
/// Boxes
/// </summary>
[ColumnName("yolonms_layer_1/ExpandDims_1:0")]
public float[] Boxes { get; set; }
/// <summary>
/// Scores
/// </summary>
[ColumnName("yolonms_layer_1/ExpandDims_3:0")]
public float[] Scores { get; set; }
/// <summary>
/// Concat
/// </summary>
[ColumnName("yolonms_layer_1/concat_2:0")]
public int[] Concat { get; set; }
}
然后我们创建 ML.Net 管道并加载预测引擎:
// Define scoring pipeline
var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad)
.Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", outputAsFloatArray: true, scaleImage: 1f / 255f))
.Append(mlContext.Transforms.Concatenate("image_shape", "height", "width"))
.Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary<string, int[]>() { { "input_1", new[] { 1, 3, 416, 416 } } },
inputColumnNames: new[]
{
"input_1",
"image_shape"
},
outputColumnNames: new[]
{
"yolonms_layer_1/ExpandDims_1:0",
"yolonms_layer_1/ExpandDims_3:0",
"yolonms_layer_1/concat_2:0"
},
modelFile: @"D:\yolov3-10.onnx"));
// Fit on empty list to obtain input data schema
var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<YoloV3BitmapData>()));
// Create prediction engine
var predictionEngine = mlContext.Model.CreatePredictionEngine<YoloV3BitmapData, YoloV3Prediction>(model);
注意:我们需要定义shapeDictionary
参数,因为它们在模型中没有完全定义。
根据模型文档,输出形状为:
The model has 3 outputs. boxes: (1x'n_candidates'x4), the coordinates of all anchor boxes, scores: (1x80x'n_candidates'), the scores of all anchor boxes per class, indices: ('nbox'x3), selected indices from the boxes tensor. The selected index format is (batch_index, class_index, box_index).
下面的函数会帮你处理结果,我交给你fine-tune吧。
public IReadOnlyList<YoloV3Result> GetResults(YoloV3Prediction prediction, string[] categories)
{
if (prediction.Concat == null || prediction.Concat.Length == 0)
{
return new List<YoloV3Result>();
}
if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4)
{
throw new ArgumentException();
}
if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length)
{
throw new ArgumentException();
}
List<YoloV3Result> results = new List<YoloV3Result>();
// Concat size is 'nbox'x3 (batch_index, class_index, box_index)
int resulstCount = prediction.Concat.Length / 3;
for (int c = 0; c < resulstCount; c++)
{
var res = prediction.Concat.Skip(c * 3).Take(3).ToArray();
var batch_index = res[0];
var class_index = res[1];
var box_index = res[2];
var label = categories[class_index];
var bbox = new float[]
{
prediction.Boxes[box_index * 4],
prediction.Boxes[box_index * 4 + 1],
prediction.Boxes[box_index * 4 + 2],
prediction.Boxes[box_index * 4 + 3],
};
var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount];
results.Add(new YoloV3Result(bbox, label, score));
}
return results;
}
在此版本的模型中,它们是 80 类(有关 link,请参阅模型的 GitHub 文档)。
您可以像这样使用上面的内容:
// load image
string imageName = "dog_cat.jpg";
using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName))))
{
// predict
var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap });
var results = GetResults(predict, classesNames);
// draw predictions
using (var g = Graphics.FromImage(bitmap))
{
foreach (var result in results)
{
var y1 = result.BBox[0];
var x1 = result.BBox[1];
var y2 = result.BBox[2];
var x2 = result.BBox[3];
g.DrawRectangle(Pens.Red, x1, y1, x2-x1, y2-y1);
using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red)))
{
g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1);
}
g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"),
new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1));
}
bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName))));
}
}
您可以找到 result example here.
我遵循 this microsoft tutorial and there was no problem. but i wanted to change model to yolo v3 or v4. I get the YOLOv4 onnx model from onnx/models 并且能够获得 yolov4 onnx 模型的所有三个浮点输出数组,但问题在于 post-processing,我无法从这些输出中获得正确的边界框.
我更改了所有内容,例如锚点、步幅、输出网格大小、一些函数和...在 Microsoft 教程 src 代码中,以与 yolov4 兼容。但我无法得到正确的结果。 我用 python implementation 检查了我所有的代码,但我不知道问题出在哪里。 有没有人有 link 或知道如何使用 ML.Net
在 c# 中实现 yolo v3 或 v4 onnx 模型任何帮助将不胜感激
我认为直接将微软的教程从 YOLO v2 移植到 v3 是不可能的,因为它依赖于每个模型的输入和输出。
附带说明一下,我在 this GitHub repo: 'YOLOv3MLNet' 中移植了另一个 YOLO v3 模型到 ML.Net。它包含一个功能齐全的 ML.Net 管道。
我还在此处提供了此答案的代码:
回到您的模型,我将以 YOLO v3(在 onnx/models 存储库中可用)为例。可以找到模型的一个很好的解释 here。
第一个建议是使用 Netron 查看模型。这样做,您将看到输入层和输出层。他们还在 onnx/models 文档中描述了这些层。
Netron's yolov3-10 screenshot
(我在 Netron 中看到这个特定的 YOLO v3 模型也通过 Non-maximum 抑制步骤做了一些 post-processing。)
- 输入图层名称:
input_1
、image_shape
- 输出图层名称:
yolonms_layer_1/ExpandDims_1:0
、yolonms_layer_1/ExpandDims_3:0
、yolonms_layer_1/concat_2:0
根据模型文档,输入形状为:
Resized image (1x3x416x416) Original image size (1x2) which is [image.size['1], image.size[0]]
我们首先需要定义ML.Net输入和输出类如下:
public class YoloV3BitmapData
{
[ColumnName("bitmap")]
[ImageType(416, 416)]
public Bitmap Image { get; set; }
[ColumnName("width")]
public float ImageWidth => Image.Width;
[ColumnName("height")]
public float ImageHeight => Image.Height;
}
public class YoloV3Prediction
{
/// <summary>
/// ((52 x 52) + (26 x 26) + 13 x 13)) x 3 = 10,647.
/// </summary>
public const int YoloV3BboxPredictionCount = 10_647;
/// <summary>
/// Boxes
/// </summary>
[ColumnName("yolonms_layer_1/ExpandDims_1:0")]
public float[] Boxes { get; set; }
/// <summary>
/// Scores
/// </summary>
[ColumnName("yolonms_layer_1/ExpandDims_3:0")]
public float[] Scores { get; set; }
/// <summary>
/// Concat
/// </summary>
[ColumnName("yolonms_layer_1/concat_2:0")]
public int[] Concat { get; set; }
}
然后我们创建 ML.Net 管道并加载预测引擎:
// Define scoring pipeline
var pipeline = mlContext.Transforms.ResizeImages(inputColumnName: "bitmap", outputColumnName: "input_1", imageWidth: 416, imageHeight: 416, resizing: ResizingKind.IsoPad)
.Append(mlContext.Transforms.ExtractPixels(outputColumnName: "input_1", outputAsFloatArray: true, scaleImage: 1f / 255f))
.Append(mlContext.Transforms.Concatenate("image_shape", "height", "width"))
.Append(mlContext.Transforms.ApplyOnnxModel(shapeDictionary: new Dictionary<string, int[]>() { { "input_1", new[] { 1, 3, 416, 416 } } },
inputColumnNames: new[]
{
"input_1",
"image_shape"
},
outputColumnNames: new[]
{
"yolonms_layer_1/ExpandDims_1:0",
"yolonms_layer_1/ExpandDims_3:0",
"yolonms_layer_1/concat_2:0"
},
modelFile: @"D:\yolov3-10.onnx"));
// Fit on empty list to obtain input data schema
var model = pipeline.Fit(mlContext.Data.LoadFromEnumerable(new List<YoloV3BitmapData>()));
// Create prediction engine
var predictionEngine = mlContext.Model.CreatePredictionEngine<YoloV3BitmapData, YoloV3Prediction>(model);
注意:我们需要定义shapeDictionary
参数,因为它们在模型中没有完全定义。
根据模型文档,输出形状为:
The model has 3 outputs. boxes: (1x'n_candidates'x4), the coordinates of all anchor boxes, scores: (1x80x'n_candidates'), the scores of all anchor boxes per class, indices: ('nbox'x3), selected indices from the boxes tensor. The selected index format is (batch_index, class_index, box_index).
下面的函数会帮你处理结果,我交给你fine-tune吧。
public IReadOnlyList<YoloV3Result> GetResults(YoloV3Prediction prediction, string[] categories)
{
if (prediction.Concat == null || prediction.Concat.Length == 0)
{
return new List<YoloV3Result>();
}
if (prediction.Boxes.Length != YoloV3Prediction.YoloV3BboxPredictionCount * 4)
{
throw new ArgumentException();
}
if (prediction.Scores.Length != YoloV3Prediction.YoloV3BboxPredictionCount * categories.Length)
{
throw new ArgumentException();
}
List<YoloV3Result> results = new List<YoloV3Result>();
// Concat size is 'nbox'x3 (batch_index, class_index, box_index)
int resulstCount = prediction.Concat.Length / 3;
for (int c = 0; c < resulstCount; c++)
{
var res = prediction.Concat.Skip(c * 3).Take(3).ToArray();
var batch_index = res[0];
var class_index = res[1];
var box_index = res[2];
var label = categories[class_index];
var bbox = new float[]
{
prediction.Boxes[box_index * 4],
prediction.Boxes[box_index * 4 + 1],
prediction.Boxes[box_index * 4 + 2],
prediction.Boxes[box_index * 4 + 3],
};
var score = prediction.Scores[box_index + class_index * YoloV3Prediction.YoloV3BboxPredictionCount];
results.Add(new YoloV3Result(bbox, label, score));
}
return results;
}
在此版本的模型中,它们是 80 类(有关 link,请参阅模型的 GitHub 文档)。
您可以像这样使用上面的内容:
// load image
string imageName = "dog_cat.jpg";
using (var bitmap = new Bitmap(Image.FromFile(Path.Combine(imageFolder, imageName))))
{
// predict
var predict = predictionEngine.Predict(new YoloV3BitmapData() { Image = bitmap });
var results = GetResults(predict, classesNames);
// draw predictions
using (var g = Graphics.FromImage(bitmap))
{
foreach (var result in results)
{
var y1 = result.BBox[0];
var x1 = result.BBox[1];
var y2 = result.BBox[2];
var x2 = result.BBox[3];
g.DrawRectangle(Pens.Red, x1, y1, x2-x1, y2-y1);
using (var brushes = new SolidBrush(Color.FromArgb(50, Color.Red)))
{
g.FillRectangle(brushes, x1, y1, x2 - x1, y2 - y1);
}
g.DrawString(result.Label + " " + result.Confidence.ToString("0.00"),
new Font("Arial", 12), Brushes.Blue, new PointF(x1, y1));
}
bitmap.Save(Path.Combine(imageOutputFolder, Path.ChangeExtension(imageName, "_processed" + Path.GetExtension(imageName))));
}
}
您可以找到 result example here.