ML.Net 将 Float32 结果四舍五入为 0 或 1
ML.Net Rounding Float32 Results to 0 or 1
using System.Drawing;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Image;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext();
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.imageWidth},{InputModel.imageHeight})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { ImagePath = imageLocation };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var outputs = predictionEngine.Predict(data);
var outputMapped = tags.Zip(outputs.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => Math.Abs(a.Value - 1) < 0.00001f).Select(a => a.Key).OrderBy(a => a)
.ToList();
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.LoadImages(InputModel.ModelInput, "", nameof(InputModel.ImagePath))
.Append(mlContext.Transforms.ResizeImages(InputModel.ModelInput, InputModel.imageWidth,
InputModel.imageHeight, InputModel.ModelInput, ImageResizingEstimator.ResizingKind.IsoPad))
.Append(mlContext.Transforms.ExtractPixels(InputModel.ModelInput, InputModel.ModelInput))
.Append(mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation));
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
class InputModel
{
public const int imageHeight = 512;
public const int imageWidth = 512;
// input tensor name
public const string ModelInput = "input_1:0";
public string ImagePath { get; set; }
[ColumnName(ModelInput)]
[ImageType(imageHeight, imageWidth)]
public Bitmap Image { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
我编写了一个非常简单的测试程序来尝试获得与 python project 匹配的输出,仅在 C# 中,因此我可以在 ASP.Net api 中有效地使用它(也只是喜欢 C#)。原始 Python 有效,即使在我将其修改为使用 onnxruntime 而不是模型起源的 keras 之后也是如此。它给出了分数为 0-1 的浮点数 [9176],它与 tags.txt 中的标签列表相匹配,以确定该标签是否应应用于给定图像。
这是一个 TensorFlow 的多分类问题。我使用对象检测示例到达这里,它 returns 结果,结果是......正确的,但不是。无论出于何种原因,它都在四舍五入。
我是 ML 的新手,ML.Net 那里的知识很少,所以我想我会在很长一段时间内使用我的第一个问题,并希望有人能为我阐明这个问题。
好的,新的一天。我跟踪了 python 项目的代码路径并制作了一个 MVP。在这样做的过程中,我很少有东西可以看。
import os
import onnxruntime
import skimage.transform
import tensorflow as tf
def main():
# disable CUDA acceleration for simplicity in running the test
# you need drivers, an nvidia gpu, etc. for that
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
image_path = 'C:\Users\da3ds\Pictures\image.jpg'
model_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx'
# load tags
tags_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt'
with open(tags_path, 'r') as tags_stream:
tags = [tag for tag in (tag.strip() for tag in tags_stream) if tag]
# create inference session
model = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])
width = model.get_inputs()[0].shape[1] # 512
height = model.get_inputs()[0].shape[2] # 512
image_raw = tf.io.read_file(image_path)
image = tf.io.decode_png(image_raw, channels=3)
image = tf.image.resize(image, size=(width, height), method=tf.image.ResizeMethod.AREA, preserve_aspect_ratio=True)
image = image.numpy() # EagerTensor to np.array
image_width = image.shape[0]
image_height = image.shape[1]
t = skimage.transform.AffineTransform(translation=(-image_width * 0.5, -image_height * 0.5))
t += skimage.transform.AffineTransform(translation=(width * 0.5, height * 0.5))
image = skimage.transform.warp(image, t.inverse, output_shape=(width, height), order=1, mode='edge')
# at this point all widths and heights are probably 512
# normalize the image
image = image / 255.0
image_shape = image.shape
# build the input shape of Vector<1, 512, 512, 3>
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
onnx_result = model.run(None, {'input_1:0': image})
# onnx_result is 2 arrays deep for reason
# 1 would make sense, as it can handle batches
onnx_result = onnx_result[0][0]
# print a nice result
for i, tag in enumerate(tags):
print(f'({onnx_result[i]:05.3f}) {tag}')
if __name__ == '__main__':
main()
方便的是,在这样做的过程中,我在默认值中犯了一个错误,它产生了与 ML.Net 结果相同的结果:(不是)规范化图像。我不知道如何在 ML.Net 管道中做到这一点,所以我用 Magick.Net 制作了数组并将其直接提供给 ML.Net。
这是最终代码:
using ImageMagick;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext(seed: 0);
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.Width},{InputModel.Height})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { Data = GetImage(imageLocation) };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var output = predictionEngine.Predict(data);
var outputMapped = tags.Zip(output.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => a.Value > 0.80f).Select(a => (Tag: a.Key, Score: a.Value))
.ToList();
foreach (var tag in outputTags)
{
Console.WriteLine($"({tag.Score:P1}) {tag.Tag}");
}
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var transformer = GetBasicTransformer(mlContext, modelLocation);
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
private static ITransformer GetBasicTransformer(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation);
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
return transformer;
}
public static float[] GetImage(string imagePath)
{
using var mImage = new MagickImage(imagePath);
mImage.Quality = 100;
mImage.BackgroundColor = new MagickColor(0, 0, 0);
mImage.HasAlpha = false;
mImage.Resize(new MagickGeometry($"{InputModel.Width}>x{InputModel.Height}>"));
mImage.Extent(InputModel.Width, InputModel.Height, Gravity.Center, new MagickColor(0,0,0));
var pixels = mImage.GetPixels();
var array = pixels.ToArray();
var data = new float[InputModel.Width * InputModel.Height * InputModel.Channels];
for (var index = 0; index < array.Length; index++)
{
data[index] = array[index] / 255.0f;
}
return data;
}
class InputModel
{
public const int Width = 512;
public const int Height = 512;
public const int Channels = 3;
public const string ModelInput = "input_1:0";
[ColumnName(ModelInput)]
[VectorType(1, Width, Height, Channels)]
public float[] Data { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
显然,最终的...最终代码将不是 MVP,但这是一个测试。我将此作为我努力的线索,以防其他人遇到类似问题。至少,它给出了我的调试步骤和一些示例代码。谢谢你做我的橡皮鸭。
using System.Drawing;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Image;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext();
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.imageWidth},{InputModel.imageHeight})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { ImagePath = imageLocation };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var outputs = predictionEngine.Predict(data);
var outputMapped = tags.Zip(outputs.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => Math.Abs(a.Value - 1) < 0.00001f).Select(a => a.Key).OrderBy(a => a)
.ToList();
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.LoadImages(InputModel.ModelInput, "", nameof(InputModel.ImagePath))
.Append(mlContext.Transforms.ResizeImages(InputModel.ModelInput, InputModel.imageWidth,
InputModel.imageHeight, InputModel.ModelInput, ImageResizingEstimator.ResizingKind.IsoPad))
.Append(mlContext.Transforms.ExtractPixels(InputModel.ModelInput, InputModel.ModelInput))
.Append(mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation));
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
class InputModel
{
public const int imageHeight = 512;
public const int imageWidth = 512;
// input tensor name
public const string ModelInput = "input_1:0";
public string ImagePath { get; set; }
[ColumnName(ModelInput)]
[ImageType(imageHeight, imageWidth)]
public Bitmap Image { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
我编写了一个非常简单的测试程序来尝试获得与 python project 匹配的输出,仅在 C# 中,因此我可以在 ASP.Net api 中有效地使用它(也只是喜欢 C#)。原始 Python 有效,即使在我将其修改为使用 onnxruntime 而不是模型起源的 keras 之后也是如此。它给出了分数为 0-1 的浮点数 [9176],它与 tags.txt 中的标签列表相匹配,以确定该标签是否应应用于给定图像。
这是一个 TensorFlow 的多分类问题。我使用对象检测示例到达这里,它 returns 结果,结果是......正确的,但不是。无论出于何种原因,它都在四舍五入。
我是 ML 的新手,ML.Net 那里的知识很少,所以我想我会在很长一段时间内使用我的第一个问题,并希望有人能为我阐明这个问题。
好的,新的一天。我跟踪了 python 项目的代码路径并制作了一个 MVP。在这样做的过程中,我很少有东西可以看。
import os
import onnxruntime
import skimage.transform
import tensorflow as tf
def main():
# disable CUDA acceleration for simplicity in running the test
# you need drivers, an nvidia gpu, etc. for that
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
image_path = 'C:\Users\da3ds\Pictures\image.jpg'
model_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx'
# load tags
tags_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt'
with open(tags_path, 'r') as tags_stream:
tags = [tag for tag in (tag.strip() for tag in tags_stream) if tag]
# create inference session
model = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])
width = model.get_inputs()[0].shape[1] # 512
height = model.get_inputs()[0].shape[2] # 512
image_raw = tf.io.read_file(image_path)
image = tf.io.decode_png(image_raw, channels=3)
image = tf.image.resize(image, size=(width, height), method=tf.image.ResizeMethod.AREA, preserve_aspect_ratio=True)
image = image.numpy() # EagerTensor to np.array
image_width = image.shape[0]
image_height = image.shape[1]
t = skimage.transform.AffineTransform(translation=(-image_width * 0.5, -image_height * 0.5))
t += skimage.transform.AffineTransform(translation=(width * 0.5, height * 0.5))
image = skimage.transform.warp(image, t.inverse, output_shape=(width, height), order=1, mode='edge')
# at this point all widths and heights are probably 512
# normalize the image
image = image / 255.0
image_shape = image.shape
# build the input shape of Vector<1, 512, 512, 3>
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
onnx_result = model.run(None, {'input_1:0': image})
# onnx_result is 2 arrays deep for reason
# 1 would make sense, as it can handle batches
onnx_result = onnx_result[0][0]
# print a nice result
for i, tag in enumerate(tags):
print(f'({onnx_result[i]:05.3f}) {tag}')
if __name__ == '__main__':
main()
方便的是,在这样做的过程中,我在默认值中犯了一个错误,它产生了与 ML.Net 结果相同的结果:(不是)规范化图像。我不知道如何在 ML.Net 管道中做到这一点,所以我用 Magick.Net 制作了数组并将其直接提供给 ML.Net。
这是最终代码:
using ImageMagick;
using Microsoft.ML;
using Microsoft.ML.Data;
namespace OnnxTest;
public static class Program
{
public static void Main(string[] args)
{
var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
MLContext mlContext = new MLContext(seed: 0);
Console.WriteLine("Read model");
Console.WriteLine($"Model location: {modelLocation}");
Console.WriteLine(
$"Default parameters: image size=({InputModel.Width},{InputModel.Height})");
Console.WriteLine($"Images location: {imageLocation}");
Console.WriteLine("");
Console.WriteLine("=====Identify the objects in the images=====");
Console.WriteLine("");
// Create IDataView from empty list to obtain input data schema
var data = new InputModel { Data = GetImage(imageLocation) };
// Define scoring pipeline
var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
var output = predictionEngine.Predict(data);
var outputMapped = tags.Zip(output.Scores).Select(t => new { Tag = t.First, f = t.Second })
.ToDictionary(a => a.Tag, a => a.f);
var outputTags = outputMapped.Where(a => a.Value > 0.80f).Select(a => (Tag: a.Key, Score: a.Value))
.ToList();
foreach (var tag in outputTags)
{
Console.WriteLine($"({tag.Score:P1}) {tag.Tag}");
}
}
private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
{
var transformer = GetBasicTransformer(mlContext, modelLocation);
// Fit scoring pipeline
var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
return predictionEngine;
}
private static ITransformer GetBasicTransformer(MLContext mlContext, string modelLocation)
{
var estimator = mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
modelLocation);
var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
return transformer;
}
public static float[] GetImage(string imagePath)
{
using var mImage = new MagickImage(imagePath);
mImage.Quality = 100;
mImage.BackgroundColor = new MagickColor(0, 0, 0);
mImage.HasAlpha = false;
mImage.Resize(new MagickGeometry($"{InputModel.Width}>x{InputModel.Height}>"));
mImage.Extent(InputModel.Width, InputModel.Height, Gravity.Center, new MagickColor(0,0,0));
var pixels = mImage.GetPixels();
var array = pixels.ToArray();
var data = new float[InputModel.Width * InputModel.Height * InputModel.Channels];
for (var index = 0; index < array.Length; index++)
{
data[index] = array[index] / 255.0f;
}
return data;
}
class InputModel
{
public const int Width = 512;
public const int Height = 512;
public const int Channels = 3;
public const string ModelInput = "input_1:0";
[ColumnName(ModelInput)]
[VectorType(1, Width, Height, Channels)]
public float[] Data { get; set; }
}
class OutputModel
{
// output tensor name
public const string ModelOutput = "Identity:0";
[ColumnName(ModelOutput)]
public float[] Scores { get; set; }
}
}
显然,最终的...最终代码将不是 MVP,但这是一个测试。我将此作为我努力的线索,以防其他人遇到类似问题。至少,它给出了我的调试步骤和一些示例代码。谢谢你做我的橡皮鸭。