System.ArgumentOutOfRangeException: '未找到特征列 'Feature'(参数 'schema')'
System.ArgumentOutOfRangeException: 'Features column 'Feature' not found (Parameter 'schema')'
我在训练模型时遇到问题。我有一系列 HTTP 请求,我希望能够确定请求是否来自机器人。为了训练这个,我有一系列的这些:
public class Request
{
public string Url { get; set; }
public string UserAgent { get; set; }
public bool IsBot { get; set; }
}
预测 class 是这样的:
public class IsBotPrediction
{
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }
public float Score { get; set; }
}
为了这个例子,我创建了一个硬编码数据列表:
var trainingData = new List<Request>
{
new Request { Url = "/wp-admin", UserAgent = "a bot", IsBot = true },
new Request { Url = "/backoffice", UserAgent = "a bot", IsBot = true },
new Request { Url = "/hack", UserAgent = "a bot", IsBot = true },
new Request { Url = "/login", UserAgent = "a bot", IsBot = false },
new Request { Url = "/dashboard", UserAgent = "a bot", IsBot = false },
new Request { Url = "/humans.txt", UserAgent = "a bot", IsBot = false },
new Request { Url = "/admin", UserAgent = "a bot", IsBot = true },
};
为了训练模型,我使用了以下代码:
IDataView mlData = mlContext.Data.LoadFromEnumerable(trainingData);
var dataPrepPipeline = mlContext
.Transforms
.Text
.FeaturizeText("UrlF", "Url")
.Append(mlContext.Transforms.Text.FeaturizeText("UserAgentF", "UserAgent"))
.Append(mlContext.Transforms.Concatenate("Features", "UrlF", "UserAgentF"))
.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(mlContext);
var prepPipeline = dataPrepPipeline.Fit(mlData);
var trainer = mlContext
.BinaryClassification
.Trainers
.AveragedPerceptron(labelColumnName: "IsBot", numberOfIterations: 10, featureColumnName: "Features");
var preprocessedData = prepPipeline.Transform(mlData);
ITransformer trainedModel = trainer.Fit(preprocessedData);
训练好的模型似乎成功了。但是当我尝试创建预测引擎时:
var predEngine = mlContext.Model.CreatePredictionEngine<Request, IsBotPrediction>(trainedModel);
我得到以下异常:
System.ArgumentOutOfRangeException: 'Features column 'Feature' not found (Parameter 'schema')'
你能帮我看看这是什么意思吗?
这可能是由于在将数据拟合到模型之前对数据进行了转换。
以下设置应该有效。
var dataPrepPipeline = mlContext.Transforms.Text.FeaturizeText("UrlF", "Url")
.Append(mlContext.Transforms.Text.FeaturizeText("UserAgentF", "UserAgent"))
.Append(mlContext.Transforms.Concatenate("Features", "UrlF", "UserAgentF"))
.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(mlContext);
var dataPrepModel = dataPrepPipeline.Fit(mlData);
var dataPrepDataView = dataPrepModel.Transform(mlData);
var pipeline = dataPrepPipeline.Append(
mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "IsBot", numberOfIterations: 10, featureColumnName: "Features"));
mlContext.Model.Save(dataPrepModel, dataPrepDataView.Schema, "./dataprep.zip");
var model = pipeline.Fit(mlData);
var modelDataView = model.Transform(mlData);
mlContext.Model.Save(model, modelDataView.Schema, "./model.zip");
var predEngine = mlContext.Model.CreatePredictionEngine<Request, IsBotPrediction>(model);
我在训练模型时遇到问题。我有一系列 HTTP 请求,我希望能够确定请求是否来自机器人。为了训练这个,我有一系列的这些:
public class Request
{
public string Url { get; set; }
public string UserAgent { get; set; }
public bool IsBot { get; set; }
}
预测 class 是这样的:
public class IsBotPrediction
{
[ColumnName("PredictedLabel")]
public bool Prediction { get; set; }
public float Score { get; set; }
}
为了这个例子,我创建了一个硬编码数据列表:
var trainingData = new List<Request>
{
new Request { Url = "/wp-admin", UserAgent = "a bot", IsBot = true },
new Request { Url = "/backoffice", UserAgent = "a bot", IsBot = true },
new Request { Url = "/hack", UserAgent = "a bot", IsBot = true },
new Request { Url = "/login", UserAgent = "a bot", IsBot = false },
new Request { Url = "/dashboard", UserAgent = "a bot", IsBot = false },
new Request { Url = "/humans.txt", UserAgent = "a bot", IsBot = false },
new Request { Url = "/admin", UserAgent = "a bot", IsBot = true },
};
为了训练模型,我使用了以下代码:
IDataView mlData = mlContext.Data.LoadFromEnumerable(trainingData);
var dataPrepPipeline = mlContext
.Transforms
.Text
.FeaturizeText("UrlF", "Url")
.Append(mlContext.Transforms.Text.FeaturizeText("UserAgentF", "UserAgent"))
.Append(mlContext.Transforms.Concatenate("Features", "UrlF", "UserAgentF"))
.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(mlContext);
var prepPipeline = dataPrepPipeline.Fit(mlData);
var trainer = mlContext
.BinaryClassification
.Trainers
.AveragedPerceptron(labelColumnName: "IsBot", numberOfIterations: 10, featureColumnName: "Features");
var preprocessedData = prepPipeline.Transform(mlData);
ITransformer trainedModel = trainer.Fit(preprocessedData);
训练好的模型似乎成功了。但是当我尝试创建预测引擎时:
var predEngine = mlContext.Model.CreatePredictionEngine<Request, IsBotPrediction>(trainedModel);
我得到以下异常:
System.ArgumentOutOfRangeException: 'Features column 'Feature' not found (Parameter 'schema')'
你能帮我看看这是什么意思吗?
这可能是由于在将数据拟合到模型之前对数据进行了转换。
以下设置应该有效。
var dataPrepPipeline = mlContext.Transforms.Text.FeaturizeText("UrlF", "Url")
.Append(mlContext.Transforms.Text.FeaturizeText("UserAgentF", "UserAgent"))
.Append(mlContext.Transforms.Concatenate("Features", "UrlF", "UserAgentF"))
.Append(mlContext.Transforms.NormalizeMinMax("Features", "Features"))
.AppendCacheCheckpoint(mlContext);
var dataPrepModel = dataPrepPipeline.Fit(mlData);
var dataPrepDataView = dataPrepModel.Transform(mlData);
var pipeline = dataPrepPipeline.Append(
mlContext.BinaryClassification.Trainers.AveragedPerceptron(labelColumnName: "IsBot", numberOfIterations: 10, featureColumnName: "Features"));
mlContext.Model.Save(dataPrepModel, dataPrepDataView.Schema, "./dataprep.zip");
var model = pipeline.Fit(mlData);
var modelDataView = model.Transform(mlData);
mlContext.Model.Save(model, modelDataView.Schema, "./model.zip");
var predEngine = mlContext.Model.CreatePredictionEngine<Request, IsBotPrediction>(model);