找不到列 'PredictedLabel'
Could not find column 'PredictedLabel'
我正在使用 ML.Net 并尝试对一些数据进行聚类,但我遇到的异常是在标题中:“找不到列 'PredictedLabel'”。但是我的预测中确实有这个专栏 class!
public class ClusterPrediction
{
public uint Label { get; set; }
// Predicted label from the trainer.
//[ColumnName("PredictedLabel")] <-- doesn't make a differnce if I have this or not
public uint PredictedLabel { get; set; }
public float[] Score { get; set; } = new float[0];
public float[] Features { get; set; } = new float[0];
}
以下是我设置管道的方式:
var clusterSchema = SchemaDefinition.Create(typeof(InputCombinedClusterData));
for (int i = 0; i < 5; i++)
{
var col = clusterSchema[i];
var itemType = ((VectorDataViewType)col.ColumnType).ItemType;
col.ColumnType = new VectorDataViewType(itemType, InputCombinedClusterData.LOOKBACK);
}
var trainingData = mlContext.Data.LoadFromEnumerable(data.TrainingData);
var options = new KMeansTrainer.Options
{
// will tune this later
NumberOfClusters = 15,
OptimizationTolerance = 1e-9f,
MaximumNumberOfIterations = 10000,
};
var fundamentalCols = new List<string> { tons of columns };
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("Sector")
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Industry"));
foreach (var col in fundamentalCols)
{
pipeline.Append(mlContext.Transforms.NormalizeMinMax(col));
}
pipeline.Append(mlContext.Transforms.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume", "Sector", "Industry", {lots more columns}));
pipeline.Append(mlContext.Clustering.Trainers.KMeans(options));
// Train the model.
var model = pipeline.Fit(trainingData);
// test the model
var testData = mlContext.Data.LoadFromEnumerable(data.TestingData);
// Run the model on test data set
var transformedTestData = model.Transform(testData);
// Goes BOOM here
var predictions = mlContext.Data.CreateEnumerable<ClusterPrediction>(
transformedTestData, reuseRowObject: false).ToList();
我确实有一个替代的聚类训练器,它确实有效,但它的列数要少得多。我没有看到为什么当两个培训师都使用相同的预测 class 时,上面有更多列的那个会因为找不到 PredictedLabel 列而爆炸?这是一个可供参考的作品:
var clusterSchema = SchemaDefinition.Create(typeof(InputPriceClusterData));
for (int i = 0; i < 5; i++)
{
var col = clusterSchema[i];
var itemType = ((VectorDataViewType)col.ColumnType).ItemType;
col.ColumnType = new VectorDataViewType(itemType, InputPriceClusterData.LOOKBACK);
}
var trainingData = mlContext.Data.LoadFromEnumerable(data.TrainingData);
var options = new KMeansTrainer.Options
{
NumberOfClusters = 15,
OptimizationTolerance = 1e-9f,
MaximumNumberOfIterations = 10000
};
// Define the trainer.
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms
.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume")
.Append(mlContext.Clustering.Trainers.KMeans(options));
// Train the model.
var model = pipeline.Fit(trainingData);
// test the model
var testData = mlContext.Data.LoadFromEnumerable(data.TestingData);
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<ClusterPrediction>(
transformedTestData, reuseRowObject: false).ToList();
对于最初的破教练,我错过了什么?附录的顺序?
可能有点伤感,但管道上的 .Append
方法不会像 someIList.Sort()
那样进行就地追加。我还发现我也可以对所有列对进行批处理以进行标准化。一口气解决了所有问题:
string featuresColumnName = "Features";
var colPairs = new List<InputOutputColumnPair>();
foreach (var col in fundamentalCols)
{
colPairs.Add(new InputOutputColumnPair(col, col));
}
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("Sector")
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Industry"))
.Append(mlContext.Transforms.NormalizeMinMax(colPairs.ToArray()))
.Append(mlContext.Transforms.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume", "Sector", "Industry", {boatload of columns}))
.Append(mlContext.Clustering.Trainers.KMeans(options));
我正在使用 ML.Net 并尝试对一些数据进行聚类,但我遇到的异常是在标题中:“找不到列 'PredictedLabel'”。但是我的预测中确实有这个专栏 class!
public class ClusterPrediction
{
public uint Label { get; set; }
// Predicted label from the trainer.
//[ColumnName("PredictedLabel")] <-- doesn't make a differnce if I have this or not
public uint PredictedLabel { get; set; }
public float[] Score { get; set; } = new float[0];
public float[] Features { get; set; } = new float[0];
}
以下是我设置管道的方式:
var clusterSchema = SchemaDefinition.Create(typeof(InputCombinedClusterData));
for (int i = 0; i < 5; i++)
{
var col = clusterSchema[i];
var itemType = ((VectorDataViewType)col.ColumnType).ItemType;
col.ColumnType = new VectorDataViewType(itemType, InputCombinedClusterData.LOOKBACK);
}
var trainingData = mlContext.Data.LoadFromEnumerable(data.TrainingData);
var options = new KMeansTrainer.Options
{
// will tune this later
NumberOfClusters = 15,
OptimizationTolerance = 1e-9f,
MaximumNumberOfIterations = 10000,
};
var fundamentalCols = new List<string> { tons of columns };
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("Sector")
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Industry"));
foreach (var col in fundamentalCols)
{
pipeline.Append(mlContext.Transforms.NormalizeMinMax(col));
}
pipeline.Append(mlContext.Transforms.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume", "Sector", "Industry", {lots more columns}));
pipeline.Append(mlContext.Clustering.Trainers.KMeans(options));
// Train the model.
var model = pipeline.Fit(trainingData);
// test the model
var testData = mlContext.Data.LoadFromEnumerable(data.TestingData);
// Run the model on test data set
var transformedTestData = model.Transform(testData);
// Goes BOOM here
var predictions = mlContext.Data.CreateEnumerable<ClusterPrediction>(
transformedTestData, reuseRowObject: false).ToList();
我确实有一个替代的聚类训练器,它确实有效,但它的列数要少得多。我没有看到为什么当两个培训师都使用相同的预测 class 时,上面有更多列的那个会因为找不到 PredictedLabel 列而爆炸?这是一个可供参考的作品:
var clusterSchema = SchemaDefinition.Create(typeof(InputPriceClusterData));
for (int i = 0; i < 5; i++)
{
var col = clusterSchema[i];
var itemType = ((VectorDataViewType)col.ColumnType).ItemType;
col.ColumnType = new VectorDataViewType(itemType, InputPriceClusterData.LOOKBACK);
}
var trainingData = mlContext.Data.LoadFromEnumerable(data.TrainingData);
var options = new KMeansTrainer.Options
{
NumberOfClusters = 15,
OptimizationTolerance = 1e-9f,
MaximumNumberOfIterations = 10000
};
// Define the trainer.
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms
.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume")
.Append(mlContext.Clustering.Trainers.KMeans(options));
// Train the model.
var model = pipeline.Fit(trainingData);
// test the model
var testData = mlContext.Data.LoadFromEnumerable(data.TestingData);
// Run the model on test data set.
var transformedTestData = model.Transform(testData);
// Convert IDataView object to a list.
var predictions = mlContext.Data.CreateEnumerable<ClusterPrediction>(
transformedTestData, reuseRowObject: false).ToList();
对于最初的破教练,我错过了什么?附录的顺序?
可能有点伤感,但管道上的 .Append
方法不会像 someIList.Sort()
那样进行就地追加。我还发现我也可以对所有列对进行批处理以进行标准化。一口气解决了所有问题:
string featuresColumnName = "Features";
var colPairs = new List<InputOutputColumnPair>();
foreach (var col in fundamentalCols)
{
colPairs.Add(new InputOutputColumnPair(col, col));
}
var pipeline = mlContext.Transforms.Categorical.OneHotEncoding("Sector")
.Append(mlContext.Transforms.Categorical.OneHotEncoding("Industry"))
.Append(mlContext.Transforms.NormalizeMinMax(colPairs.ToArray()))
.Append(mlContext.Transforms.Concatenate(featuresColumnName, "Open", "High", "Low", "Close", "Volume", "Sector", "Industry", {boatload of columns}))
.Append(mlContext.Clustering.Trainers.KMeans(options));