MLR 重采样为多标签分类创建单类问题
MLR resampling creates oneclass problems for multilabel classification
我正在尝试使用交叉验证测量某些 MLR 分类器的多标签分类性能
我尝试使用 MLR resample
方法或传递我自己的子集,但是在这两种情况下都会抛出错误(根据我的发现,当用于训练的子集仅包含某些值的单个值时会发生这种情况标签)
下面是发生此问题的一个小例子:
learner = mlr::makeLearner("classif.logreg")
learner = makeMultilabelClassifierChainsWrapper(learner)
data = data.frame(
attr1 = c(1, 2, 2, 1, 2, 1, 2),
attr2 = c(2, 1, 2, 2, 1, 2, 1),
lab1 = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
lab2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE))
task = mlr::makeMultilabelTask(data=data, target=c('lab1', 'lab2'))
这里有两种出错的方法:
1.
rDesc = makeResampleDesc("CV", iters = 3)
resample(learner, task, rDesc)
2.
model = mlr::train(learner, task, subset=c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE))
错误信息:
Error in checkLearnerBeforeTrain(task, learner, weights): Task 'lab1' is a one-class-problem, but learner 'classif.logreg' does not support that!
由于 MLR 中没有支持 one-class ( https://mlr.mlr-org.com/articles/tutorial/integrated_learners.html ) class化和拆分数据的学习器可能需要大惊小怪(尤其是对于像 reutersk500 这样的数据集),我已经为两个 class 学习者创建了一个包装器,如果给定具有单个目标 class 的任务,将始终 return 这个 class 唯一值,以及更多 class es 将使用包装学习器:
(此代码将成为存储库 https://github.com/lychanl/ChainsOfClassification 的一部分)
makeOneClassWrapper = function(learner) {
learner = checkLearner(learner, type='classif')
id = paste("classif.oneClassWrapper", getLearnerId(learner), sep = ".")
packs = getLearnerPackages(learner)
type = getLearnerType(learner)
x = mlr::makeBaseWrapper(id, type, learner, packs, makeParamSet(),
learner.subclass = c("OneClassWrapper"),
model.subclass = c("OneClassWrapperModel"))
x$type = "classif"
x$properties = c(learner$properties, 'oneclass')
return(x)
}
trainLearner.OneClassWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) {
if (length(getTaskDesc(.task)$class.levels) <= 1) {
x = list(oneclass=TRUE, value=.task$task.desc$positive)
class(x) = "OneClassWrapperModel"
return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
}
model = train(.learner$next.learner, .task, .subset, .weights)
x = list(oneclass=FALSE, model=model)
class(x) = "OneClassWrapperModel"
return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
}
predictLearner.OneClassWrapper = function(.learner, .model, .newdata, ...) {
.model = mlr::getLearnerModel(.model, more.unwrap = FALSE)
if (.model$oneclass) {
out = as.logical(rep(.model$value, nrow(.newdata)))
}
else {
pred = predict(.model$model, newdata=.newdata)
if (.learner$predict.type == "response") {
out = getPredictionResponse(pred)
} else {
out = getPredictionProbabilities(pred, cl="TRUE")
}
}
return(as.factor(out))
}
getLearnerProperties.OneClassWrapper = function(.learner) {
return(.learner$properties)
}
isFailureModel.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
return(!model$oneclass && isFailureModel(model$model))
}
getFailureModelMsg.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
if (model$oneclass)
return("")
return(getFailureModelMsg(model$model))
}
getFailureModelDump.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
if (model$oneclass)
return("")
return(getFailureModelDump(model$model))
}
registerS3method("trainLearner", "<OneClassWrapper>",
trainLearner.OneClassWrapper)
registerS3method("getLearnerProperties", "<OneClassWrapper>",
getLearnerProperties.OneClassWrapper)
registerS3method("isFailureModel", "<OneClassWrapperModel>",
isFailureModel.OneClassWrapperModel)
registerS3method("getFailureModelMsg", "<OneClassWrapperModel>",
getFailureModelMsg.OneClassWrapperModel)
registerS3method("getFailureModelDump", "<OneClassWrapperModel>",
getFailureModelDump.OneClassWrapperModel)
我正在尝试使用交叉验证测量某些 MLR 分类器的多标签分类性能
我尝试使用 MLR resample
方法或传递我自己的子集,但是在这两种情况下都会抛出错误(根据我的发现,当用于训练的子集仅包含某些值的单个值时会发生这种情况标签)
下面是发生此问题的一个小例子:
learner = mlr::makeLearner("classif.logreg")
learner = makeMultilabelClassifierChainsWrapper(learner)
data = data.frame(
attr1 = c(1, 2, 2, 1, 2, 1, 2),
attr2 = c(2, 1, 2, 2, 1, 2, 1),
lab1 = c(FALSE, FALSE, TRUE, FALSE, FALSE, FALSE, FALSE),
lab2 = c(FALSE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE))
task = mlr::makeMultilabelTask(data=data, target=c('lab1', 'lab2'))
这里有两种出错的方法:
1.
rDesc = makeResampleDesc("CV", iters = 3)
resample(learner, task, rDesc)
2.
model = mlr::train(learner, task, subset=c(TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE))
错误信息:
Error in checkLearnerBeforeTrain(task, learner, weights): Task 'lab1' is a one-class-problem, but learner 'classif.logreg' does not support that!
由于 MLR 中没有支持 one-class ( https://mlr.mlr-org.com/articles/tutorial/integrated_learners.html ) class化和拆分数据的学习器可能需要大惊小怪(尤其是对于像 reutersk500 这样的数据集),我已经为两个 class 学习者创建了一个包装器,如果给定具有单个目标 class 的任务,将始终 return 这个 class 唯一值,以及更多 class es 将使用包装学习器:
(此代码将成为存储库 https://github.com/lychanl/ChainsOfClassification 的一部分)
makeOneClassWrapper = function(learner) {
learner = checkLearner(learner, type='classif')
id = paste("classif.oneClassWrapper", getLearnerId(learner), sep = ".")
packs = getLearnerPackages(learner)
type = getLearnerType(learner)
x = mlr::makeBaseWrapper(id, type, learner, packs, makeParamSet(),
learner.subclass = c("OneClassWrapper"),
model.subclass = c("OneClassWrapperModel"))
x$type = "classif"
x$properties = c(learner$properties, 'oneclass')
return(x)
}
trainLearner.OneClassWrapper = function(.learner, .task, .subset = NULL, .weights = NULL, ...) {
if (length(getTaskDesc(.task)$class.levels) <= 1) {
x = list(oneclass=TRUE, value=.task$task.desc$positive)
class(x) = "OneClassWrapperModel"
return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
}
model = train(.learner$next.learner, .task, .subset, .weights)
x = list(oneclass=FALSE, model=model)
class(x) = "OneClassWrapperModel"
return(makeChainModel(next.model = x, cl = c(.learner$model.subclass)))
}
predictLearner.OneClassWrapper = function(.learner, .model, .newdata, ...) {
.model = mlr::getLearnerModel(.model, more.unwrap = FALSE)
if (.model$oneclass) {
out = as.logical(rep(.model$value, nrow(.newdata)))
}
else {
pred = predict(.model$model, newdata=.newdata)
if (.learner$predict.type == "response") {
out = getPredictionResponse(pred)
} else {
out = getPredictionProbabilities(pred, cl="TRUE")
}
}
return(as.factor(out))
}
getLearnerProperties.OneClassWrapper = function(.learner) {
return(.learner$properties)
}
isFailureModel.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
return(!model$oneclass && isFailureModel(model$model))
}
getFailureModelMsg.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
if (model$oneclass)
return("")
return(getFailureModelMsg(model$model))
}
getFailureModelDump.OneClassWrapperModel = function(model) {
model = mlr::getLearnerModel(model, more.unwrap = FALSE)
if (model$oneclass)
return("")
return(getFailureModelDump(model$model))
}
registerS3method("trainLearner", "<OneClassWrapper>",
trainLearner.OneClassWrapper)
registerS3method("getLearnerProperties", "<OneClassWrapper>",
getLearnerProperties.OneClassWrapper)
registerS3method("isFailureModel", "<OneClassWrapperModel>",
isFailureModel.OneClassWrapperModel)
registerS3method("getFailureModelMsg", "<OneClassWrapperModel>",
getFailureModelMsg.OneClassWrapperModel)
registerS3method("getFailureModelDump", "<OneClassWrapperModel>",
getFailureModelDump.OneClassWrapperModel)