如何使用mlr自定义分类模型

How to use mlr to customize classification model

正在研究mlr,尝试定制自己的分类模型。我正在使用 https://mlr-org.github.io/mlr-tutorial/release/html/create_learner/index.html#classification 中的示例。这是我的代码:

library(mlr)
library(MASS)
makeRLearner.classif.lda = function() {
  makeRLearnerClassif(
    cl = "classif.lda",
    package = "MASS",
    par.set = makeParamSet(
      makeDiscreteLearnerParam(id = "method", default = "moment", values = c("moment", "mle", "mve", "t")),
      makeNumericLearnerParam(id = "nu", lower = 2, requires = quote(method == "t")),
      makeNumericLearnerParam(id = "tol", default = 1e-4, lower = 0),
      makeDiscreteLearnerParam(id = "predict.method", values = c("plug-in", "predictive", "debiased"),
                               default = "plug-in", when = "predict"),
      makeLogicalLearnerParam(id = "CV", default = FALSE, tunable = FALSE)
),
    properties = c("twoclass", "multiclass", "numerics", "factors", "prob"),
    name = "Linear Discriminant Analysis",
    short.name = "lda",
    note = "Learner param 'predict.method' maps to 'method' in predict.lda."
  )
}
trainLearner.classif.lda = function(.learner, .task, .subset, .weights = NULL, ...) {
  f = getTaskFormula(.task)
  MASS::lda(f, data = getTaskData(.task, .subset), ...)
}
predictLearner.classif.lda = function(.learner, .model, .newdata,   predict.method = "plug-in", ...) {
  p = predict(.model$learner.model, newdata = .newdata, method = predict.method, ...)
  if (.learner$predict.type == "response") 
    return(p$class) else return(p$posterior)
}
data(iris)
train = sample(1:nrow(iris), nrow(iris) / 1.5)
test = sample(1:nrow(iris), nrow(iris) / 6)
task <- makeClassifTask(data=iris,target='Species')
lrn <- makeRLearner.classif.lda()
tr <- trainLearner.classif.lda(.learner=lrn,.task=task,.subset=train)
pred <- predictLearner.classif.lda(.learner=lrn,.model=tr,.newdata=test)

我刚刚从网站上复制并粘贴了三个函数。但是我遇到了以下错误:

Error in UseMethod("predict") : 
  no applicable method for 'predict' applied to an object of class "NULL"

我发现我的tr里没有$learner.model,本来应该有的,转入预测函数。我的 tr 有:

> str(tr)
List of 10
 $ prior  : Named num [1:3] 0.38 0.3 0.32
  ..- attr(*, "names")= chr [1:3] "setosa" "versicolor" "virginica"
 $ counts : Named int [1:3] 38 30 32
  ..- attr(*, "names")= chr [1:3] "setosa" "versicolor" "virginica"
 $ means  : num [1:3, 1:4] 5.02 5.94 6.65 3.47 2.83 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:3] "setosa" "versicolor" "virginica"
  .. ..$ : chr   [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
 $ scaling: num [1:4, 1:2] 0.869 1.384 -2.214 -2.954 0.157 ...
  ..- attr(*, "dimnames")=List of 2
  .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
  .. ..$ : chr [1:2] "LD1" "LD2"
 $ lev    : chr [1:3] "setosa" "versicolor" "virginica"
 $ svd    : num [1:2] 41.78 2.91
 $ N      : int 100
 $ call   : language lda(formula = f, data = getTaskData(.task, .subset))
 $ terms  :Classes 'terms', 'formula'  language Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
  .. ..- attr(*, "variables")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
  .. ..- attr(*, "factors")= int [1:5, 1:4] 0 1 0 0 0 0 0 1 0 0 ...
  .. .. ..- attr(*, "dimnames")=List of 2
  .. .. .. ..$ : chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
  .. .. .. ..$ : chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
  .. ..- attr(*, "term.labels")= chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width"
  .. ..- attr(*, "order")= int [1:4] 1 1 1 1
  .. ..- attr(*, "intercept")= int 1
  .. ..- attr(*, "response")= int 1
  .. ..- attr(*, ".Environment")=<environment: 0x00000000213a8150> 
  .. ..- attr(*, "predvars")= language list(Species, Sepal.Length, Sepal.Width, Petal.Length, Petal.Width)
  .. ..- attr(*, "dataClasses")= Named chr [1:5] "factor" "numeric" "numeric" "numeric" ...
  .. .. ..- attr(*, "names")= chr [1:5] "Species" "Sepal.Length" "Sepal.Width" "Petal.Length" ...
 $ xlevels: Named list()
 - attr(*, "class")= chr "lda"

我试过注册学习者,但我想我的方法不对。这是我的代码:

registerS3method("makeRLearner.classif.lda", "<awesome_new_learner_class>", makeRLearner.classif.lda.<awesome_new_learner_class>)
registerS3method("trainLearner.classif.lda", "<awesome_new_learner_class>", trainLearner.classif.lda.<awesome_new_learner_class>)
registerS3method("predictLearner.classif.lda", "<awesome_new_learner_class>", predictLearner.classif.lda.<awesome_new_learner_class>)

可能我不应该只是从网站上复制代码。但我不知道该怎么做。我真的是 mlr 包的新手。

这里有一个完整的例子,使用的是mlr自带的iris任务。除了仔细查看 mlr 文档之外,您可能会发现 R 中编程的一般介绍很有用,特别是关于参数名称——您看到的错误的原因是您没有传递模型到 predict().model 不是该参数的名称,它是您在定义中对变量的称呼)。

library(mlr)
library(MASS)
makeRLearner.classif.lda1 = function() {
  makeRLearnerClassif(
    cl = "classif.lda1",
    package = "MASS",
    par.set = makeParamSet(
      makeDiscreteLearnerParam(id = "method", default = "moment", values = c("moment", "mle", "mve", "t")),
      makeNumericLearnerParam(id = "nu", lower = 2, requires = quote(method == "t")),
      makeNumericLearnerParam(id = "tol", default = 1e-4, lower = 0),
      makeDiscreteLearnerParam(id = "predict.method", values = c("plug-in", "predictive", "debiased"),
                               default = "plug-in", when = "predict"),
      makeLogicalLearnerParam(id = "CV", default = FALSE, tunable = FALSE)
),
    properties = c("twoclass", "multiclass", "numerics", "factors", "prob"),
    name = "Linear Discriminant Analysis",
    short.name = "lda",
    note = "Learner param 'predict.method' maps to 'method' in predict.lda."
  )
}
trainLearner.classif.lda1 = function(.learner, .task, .subset, .weights = NULL, ...) {
  f = getTaskFormula(.task)
  MASS::lda(f, data = getTaskData(.task, .subset), ...)
}
predictLearner.classif.lda1 = function(.learner, .model, .newdata,   predict.method = "plug-in", ...) {
  p = predict(.model$learner.model, newdata = .newdata, method = predict.method, ...)
  if (.learner$predict.type == "response") 
    return(p$class) else return(p$posterior)
}


registerS3method("makeRLearner", "classif.lda1", makeRLearner.classif.lda1)
registerS3method("trainLearner", "classif.lda1", trainLearner.classif.lda1)
registerS3method("predictLearner", "classif.lda1", predictLearner.classif.lda1)

lrn = makeLearner("classif.lda1")
mod = train(lrn, iris.task)
pred = predict(mod, iris.task)