Microsoft ML.Net SDCA 回归训练器找不到输入列数据

Microsoft ML.Net SDCA Regression Trainer Can't Find Input Column Data

我决定尝试掌握 Microsoft 的新 ML.Net library

我正在尝试使用我拥有的一些演示数据来做我自己的 taxi fair 示例版本,但是它总是抛出一个错误,说它可以找到我的专栏之一。

这是我的代码。

using Microsoft.ML;
using System;

namespace MLTest
{
    class Program
    {
        static void Main(string[] args)
        {
            string fileLocTraining = "C:\Temp\a49673f6-c073-4731-b00d-922e4313826ftraining.txt";
            string fileLocTesting = "C:\Temp\f6bd1704-2d78-4b6f-901c-def4cecea638testing.txt";

            MLContext mlContext = new MLContext(0);

            IDataView trainingDataView = mlContext.Data.LoadFromTextFile(fileLocTraining,
                                                                         new Microsoft.ML.Data.TextLoader.Options()
                                                                         {
                                                                             Separators = new char[] { ',' },
                                                                             HasHeader = true,
                                                                             AllowQuoting = true,
                                                                             TrimWhitespace = true,
                                                                             AllowSparse = false
                                                                         });

            IDataView testingDataView = mlContext.Data.LoadFromTextFile(fileLocTesting,
                                                                        new Microsoft.ML.Data.TextLoader.Options()
                                                                        {
                                                                            Separators = new char[] { ',' },
                                                                            HasHeader = true,
                                                                            AllowQuoting = true,
                                                                            TrimWhitespace = true,
                                                                            AllowSparse = false
                                                                        });

            var pipeline = mlContext.Transforms.CopyColumns("Label", "PCT_DIABETES_ADULTS13")
            .Append(mlContext.Transforms.NormalizeMeanVariance("PCH_RECFAC_09_14"))
            .Append(mlContext.Transforms.NormalizeMeanVariance("PCH_RECFACPTH_09_14"))
            .Append(mlContext.Transforms.NormalizeMeanVariance("PCT_DIABETES_ADULTS08"))
            .Append(mlContext.Transforms.Concatenate("Features", new string[] { "PCH_RECFAC_09_14", "PCH_RECFACPTH_09_14", "PCT_DIABETES_ADULTS08" }));

            var trainer = mlContext.Regression.Trainers.Sdca(new Microsoft.ML.Trainers.SdcaRegressionTrainer.Options()
            {
                LabelColumnName = "Label",
                FeatureColumnName = "Features"
            });

            var trainingPipeline = pipeline.Append(trainer);
            var trainedModel = trainingPipeline.Fit(trainingDataView);

            IDataView predictions = trainedModel.Transform(testingDataView);
            var metrics = mlContext.Regression.Evaluate(predictions, labelColumnName: "Label", scoreColumnName: "Score");

            Console.WriteLine($"*Metrics for {trainer.ToString()} regression model");
            Console.WriteLine(string.Empty);
            Console.WriteLine($"*LossFn:        {metrics.LossFunction:0.##}");
            Console.WriteLine($"*R2 Score:      {metrics.RSquared:0.##}");
            Console.WriteLine($"*Absolute loss: {metrics.MeanAbsoluteError:#.##}");
            Console.WriteLine($"*Squared loss:  {metrics.MeanSquaredError:#.##}");
            Console.WriteLine($"*RMS loss:      {metrics.RootMeanSquaredError:#.##}");
        }
    }
}

这是我的训练数据:

"PCH_RECFAC_09_14","PCH_RECFACPTH_09_14","PCT_DIABETES_ADULTS08","State","PCT_DIABETES_ADULTS13"
"171.4759665630000023","170.35666635199999745","99.0","VT","113.2000000000000004"
"-933.5367823810000094","-1139.57172938600001097","1861.7999999999999914","GA","2017.1999999999999974"
"29.1010194630000001","9.9002080500000003","30.5","DE","35.0"
"-167.8067765570000007","-236.87467502700001471","451.0999999999999965","MT","523.1000000000000028"
"-1498.4938905129999974","-1469.44056142600000057","825.2999999999999946","MI","943.7999999999999959"
"415.2291004140000053","424.05151568300002718","527.8000000000000042","NY","597.1000000000000025"
"-258.5561990550000000","-309.17482086900000954","874.7000000000000037","AL","1037.7999999999999973"
"-926.7641660469999960","-850.90679617099999136","898.0999999999999944","IL","1071.5999999999999991"
"-1074.4047619060000027","-1082.9095524559999993","696.0999999999999997","WV","766.900000000000003"
"-230.055210490000003","-278.38893846099999238","376.9000000000000028","ID","406.4999999999999990"
"-204.1902273029999988","-815.70881217100001012","2510.7999999999999941","TX","2640.5999999999999894"
"-1161.7043315749999929","-1355.53681784899999911","1079.5999999999999956","NC","1232.8999999999999983"
"78.7183300220000005","37.91106390300000234","116.2999999999999993","MA","126.8000000000000000"
"-268.1089583949999992","-321.57691234899998861","239.2999999999999962","MD","265.5000000000000007"
"-405.7849735820000025","-506.98501040599999381","563.1000000000000000","SC","660.9000000000000027"
"-705.3722863439999958","-768.52692485499999770","657.9999999999999979","MN","770.6999999999999977"
"-375.9511803369999965","-513.45842248599999286","441.2000000000000007","CA","508.9000000000000001"
"220.885885677999998","189.43248436999999054","608.3999999999999955","WI","698.8999999999999994"
"-199.1347947649999924","-311.44753966100000708","767.3999999999999997","LA","825.199999999999999"
"-992.7746890779999904","-1055.50188427299999511","929.1999999999999910","IN","1107.8999999999999987"
"-883.1533289950000014","-999.50961415399995717","1120.3999999999999997","TN","1310.9000000000000000"
"-749.076479076999998","-788.46602512299997671","1020.3999999999999902","KS","1142.2999999999999993"
"148.966976209000002","-14.91930678100006021","218.8999999999999972","UT","241.0999999999999990"
"182.4915824919999989","186.41089870600000035","81.4000000000000007","NH","98.7999999999999997"
"589.9071328560000117","463.30462715499996500","873.5999999999999952","OK","1002.9999999999999997"
"-926.4009669510000067","-912.01494010899998550","940.7999999999999960","OH","1093.8999999999999976"

正如您从我的代码中看到的那样,我正在尝试根据 "PCH_RECFAC_09_14"、"PCH_RECFACPTH_09_14" 和 "PCT_DIABETES_ADULTS08" 预测 "PCT_DIABETES_ADULTS13"。问题是当我尝试创建训练模型时系统似乎找不到 "PCT_DIABETES_ADULTS13" 列。

有谁知道我哪里出了问题以及我该如何解决这个问题?

修改所有以 pipeline.Append(...) 开头的行。 Append(...) 方法不是 void,而是 return 的一种 IEstimator<ITransformer>。您必须将 return 值分配回您的管道。更改所有

pipeline.Append(...)

pipeline = pipeline.Append(...)

当我试图从 SentimentAnalysis 示例中推导出一些东西时,我不得不亲自学习它。

错误是由于我加载的数据没有在 LoadFromTextFile 中指定的类型。

这可以通过执行类似于以下的操作来解决

List<Microsoft.ML.Data.TextLoader.Column> mlCols = new List<Microsoft.ML.Data.TextLoader.Column>();

mlCols.Add(new Microsoft.ML.Data.TextLoader.Column("PCH_RECFAC_09_14", Microsoft.ML.Data.DataKind.Single, 0));
mlCols.Add(new Microsoft.ML.Data.TextLoader.Column("PCH_RECFACPTH_09_14", Microsoft.ML.Data.DataKind.Single, 1));
mlCols.Add(new Microsoft.ML.Data.TextLoader.Column("PCT_DIABETES_ADULTS08", Microsoft.ML.Data.DataKind.Single, 2));
mlCols.Add(new Microsoft.ML.Data.TextLoader.Column("State", Microsoft.ML.Data.DataKind.String, 3));
mlCols.Add(new Microsoft.ML.Data.TextLoader.Column("PCT_DIABETES_ADULTS13", Microsoft.ML.Data.DataKind.Single, 4));

IDataView trainingDataView = mlContext.Data.LoadFromTextFile(fileLocTraining, mlCols.ToArray(),
                                                             ',', true, true, true, false);