ML.Net 将 Float32 结果四舍五入为 0 或 1

Question

using System.Drawing;
using Microsoft.ML;
using Microsoft.ML.Data;
using Microsoft.ML.Transforms.Image;

namespace OnnxTest;

public static class Program
{
    public static void Main(string[] args)
    {
        var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
        var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
        var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
        MLContext mlContext = new MLContext();
        
        Console.WriteLine("Read model");
        Console.WriteLine($"Model location: {modelLocation}");
        Console.WriteLine(
            $"Default parameters: image size=({InputModel.imageWidth},{InputModel.imageHeight})");
        Console.WriteLine($"Images location: {imageLocation}");
        Console.WriteLine("");
        Console.WriteLine("=====Identify the objects in the images=====");
        Console.WriteLine("");

        // Create IDataView from empty list to obtain input data schema
        var data = new InputModel { ImagePath = imageLocation };
        // Define scoring pipeline
        var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
        var outputs = predictionEngine.Predict(data);

        var outputMapped = tags.Zip(outputs.Scores).Select(t => new { Tag = t.First, f = t.Second })
            .ToDictionary(a => a.Tag, a => a.f);

        var outputTags = outputMapped.Where(a => Math.Abs(a.Value - 1) < 0.00001f).Select(a => a.Key).OrderBy(a => a)
            .ToList();
    }

    private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
    {
        var estimator = mlContext.Transforms.LoadImages(InputModel.ModelInput, "", nameof(InputModel.ImagePath))
            .Append(mlContext.Transforms.ResizeImages(InputModel.ModelInput, InputModel.imageWidth,
                InputModel.imageHeight, InputModel.ModelInput, ImageResizingEstimator.ResizingKind.IsoPad))
            .Append(mlContext.Transforms.ExtractPixels(InputModel.ModelInput, InputModel.ModelInput))
            .Append(mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
                modelLocation));

        var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));

        // Fit scoring pipeline
        var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
        return predictionEngine;
    }

    class InputModel
    {
        public const int imageHeight = 512;
        public const int imageWidth = 512;
        // input tensor name
        public const string ModelInput = "input_1:0";

        public string ImagePath { get; set; }
        [ColumnName(ModelInput)]
        [ImageType(imageHeight, imageWidth)]
        public Bitmap Image { get; set; }
    }

    class OutputModel
    {
        // output tensor name
        public const string ModelOutput = "Identity:0";

        [ColumnName(ModelOutput)]
        public float[] Scores { get; set; }
    }
}

我编写了一个非常简单的测试程序来尝试获得与 python project 匹配的输出，仅在 C# 中，因此我可以在 ASP.Net api 中有效地使用它（也只是喜欢 C#）。原始 Python 有效，即使在我将其修改为使用 onnxruntime 而不是模型起源的 keras 之后也是如此。它给出了分数为 0-1 的浮点数 [9176]，它与 tags.txt 中的标签列表相匹配，以确定该标签是否应应用于给定图像。

这是一个 TensorFlow 的多分类问题。我使用对象检测示例到达这里，它 returns 结果，结果是......正确的，但不是。无论出于何种原因，它都在四舍五入。

我是 ML 的新手，ML.Net 那里的知识很少，所以我想我会在很长一段时间内使用我的第一个问题，并希望有人能为我阐明这个问题。

Answer 1

好的，新的一天。我跟踪了 python 项目的代码路径并制作了一个 MVP。在这样做的过程中，我很少有东西可以看。

import os

import onnxruntime
import skimage.transform
import tensorflow as tf


def main():
    # disable CUDA acceleration for simplicity in running the test
    # you need drivers, an nvidia gpu, etc. for that
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

    image_path = 'C:\Users\da3ds\Pictures\image.jpg'
    model_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx'

    # load tags
    tags_path = 'C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt'
    with open(tags_path, 'r') as tags_stream:
        tags = [tag for tag in (tag.strip() for tag in tags_stream) if tag]

    # create inference session
    model = onnxruntime.InferenceSession(model_path, providers=['CPUExecutionProvider'])

    width = model.get_inputs()[0].shape[1]  # 512
    height = model.get_inputs()[0].shape[2]  # 512

    image_raw = tf.io.read_file(image_path)
    image = tf.io.decode_png(image_raw, channels=3)

    image = tf.image.resize(image, size=(width, height), method=tf.image.ResizeMethod.AREA, preserve_aspect_ratio=True)
    image = image.numpy()  # EagerTensor to np.array

    image_width = image.shape[0]
    image_height = image.shape[1]

    t = skimage.transform.AffineTransform(translation=(-image_width * 0.5, -image_height * 0.5))
    t += skimage.transform.AffineTransform(translation=(width * 0.5, height * 0.5))

    image = skimage.transform.warp(image, t.inverse, output_shape=(width, height), order=1, mode='edge')

    # at this point all widths and heights are probably 512
    
    # normalize the image
    image = image / 255.0

    image_shape = image.shape
    # build the input shape of Vector<1, 512, 512, 3>
    image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))
    onnx_result = model.run(None, {'input_1:0': image})
    # onnx_result is 2 arrays deep for reason
    # 1 would make sense, as it can handle batches
    onnx_result = onnx_result[0][0]

    # print a nice result
    for i, tag in enumerate(tags):
        print(f'({onnx_result[i]:05.3f}) {tag}')


if __name__ == '__main__':
    main()

方便的是，在这样做的过程中，我在默认值中犯了一个错误，它产生了与 ML.Net 结果相同的结果：（不是）规范化图像。我不知道如何在 ML.Net 管道中做到这一点，所以我用 Magick.Net 制作了数组并将其直接提供给 ML.Net。

这是最终代码：

using ImageMagick;
using Microsoft.ML;
using Microsoft.ML.Data;

namespace OnnxTest;

public static class Program
{
    public static void Main(string[] args)
    {
        var tags = File.ReadLines(@"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\tags.txt");
        var imageLocation = @"C:\Users\da3ds\Pictures\image.jpg";
        var modelLocation = @"C:\Users\da3ds\Downloads\deepdanbooru-v3-20211112-sgd-e28\model-resnet-custom_v3.onnx";
        MLContext mlContext = new MLContext(seed: 0);
        
        Console.WriteLine("Read model");
        Console.WriteLine($"Model location: {modelLocation}");
        Console.WriteLine(
            $"Default parameters: image size=({InputModel.Width},{InputModel.Height})");
        Console.WriteLine($"Images location: {imageLocation}");
        Console.WriteLine("");
        Console.WriteLine("=====Identify the objects in the images=====");
        Console.WriteLine("");

        // Create IDataView from empty list to obtain input data schema
        var data = new InputModel { Data = GetImage(imageLocation) };
        // Define scoring pipeline
        var predictionEngine = GetPredictionEngine(mlContext, modelLocation);
        var output = predictionEngine.Predict(data);

        var outputMapped = tags.Zip(output.Scores).Select(t => new { Tag = t.First, f = t.Second })
            .ToDictionary(a => a.Tag, a => a.f);

        var outputTags = outputMapped.Where(a => a.Value > 0.80f).Select(a => (Tag: a.Key, Score: a.Value))
            .ToList();
        foreach (var tag in outputTags)
        {
            Console.WriteLine($"({tag.Score:P1}) {tag.Tag}");
        }
    }

    private static PredictionEngine<InputModel, OutputModel> GetPredictionEngine(MLContext mlContext, string modelLocation)
    {
        var transformer = GetBasicTransformer(mlContext, modelLocation);

        // Fit scoring pipeline
        var predictionEngine = mlContext.Model.CreatePredictionEngine<InputModel, OutputModel>(transformer);
        return predictionEngine;
    }

    private static ITransformer GetBasicTransformer(MLContext mlContext, string modelLocation)
    {
        var estimator = mlContext.Transforms.ApplyOnnxModel(OutputModel.ModelOutput, InputModel.ModelInput,
                modelLocation);

        var transformer = estimator.Fit(mlContext.Data.LoadFromEnumerable(Array.Empty<InputModel>()));
        return transformer;
    }

    public static float[] GetImage(string imagePath)
    {
        using var mImage = new MagickImage(imagePath);
        mImage.Quality = 100;
        mImage.BackgroundColor = new MagickColor(0, 0, 0);
        mImage.HasAlpha = false;
        mImage.Resize(new MagickGeometry($"{InputModel.Width}>x{InputModel.Height}>"));
        mImage.Extent(InputModel.Width, InputModel.Height, Gravity.Center, new MagickColor(0,0,0));
        var pixels = mImage.GetPixels();
        var array = pixels.ToArray();
        var data = new float[InputModel.Width * InputModel.Height * InputModel.Channels];
        for (var index = 0; index < array.Length; index++)
        {
            data[index] = array[index] / 255.0f;
        }

        return data;
    }

    class InputModel
    {
        public const int Width = 512;
        public const int Height = 512;
        public const int Channels = 3;
        public const string ModelInput = "input_1:0";

        [ColumnName(ModelInput)]
        [VectorType(1, Width, Height, Channels)]
        public float[] Data { get; set; }
    }

    class OutputModel
    {
        // output tensor name
        public const string ModelOutput = "Identity:0";

        [ColumnName(ModelOutput)]
        public float[] Scores { get; set; }
    }
}

显然，最终的...最终代码将不是 MVP，但这是一个测试。我将此作为我努力的线索，以防其他人遇到类似问题。至少，它给出了我的调试步骤和一些示例代码。谢谢你做我的橡皮鸭。

ML.Net 将 Float32 结果四舍五入为 0 或 1

ML.Net Rounding Float32 Results to 0 or 1

c#

machine-learning

onnx

ml.net

image-classification