在 mlr 中使用 MICE 进行插补
Imputation using MICE in mlr
我正在尝试使用 makeImputeMethod 在 mlr 中编写我自己的插补方法,以使用 R 中的 mice 包通过链式方程执行多重插补。我的 imputeMice() 方法运行完成,但在完成后出现以下错误:
Error in `[.data.frame`(data, ind) : undefined columns selected
我不确定为什么,也不知道它来自哪里。这是我写的代码:
library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#>
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE) # Tuning: 5-fold CV, no repeats
imputeMice = function() {
makeImputeMethod(
learn = function(data, target, col) {
return(list(values = data))
},
impute = function(data, target, col, values) {
data = as.data.frame(data)
excl = names(data)[ sapply(data, is.factor) ]
predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
x = mice::complete(imp_data)
print("Imputation completed")
return(x)
}
)
}
lrn = makeFilterWrapper(
makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"),
fw.method="univariate.model.score",
fw.perc=0.1,
cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))
res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures: cindex
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> Error in `[.data.frame`(data, ind): undefined columns selected
由 reprex package (v0.3.0)
于 2020-06-16 创建
很明显,函数 imputeMice() 在 data.frame pbc 的每一列上被调用。但是使用鼠标我们只需要调用这个函数一次,它就会对每一列进行插补。这在 mlr 中可能吗?
错误是我的 - 我应该在 learn 函数中调用 mice,而不是在 impute 函数中。我发现这些函数的名称令人困惑。我的新代码在下面,这是有效的。但它会在每一列上调用鼠标。我真的只需要调用一次。这可能吗?
library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#>
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE) # Tuning: 5-fold CV, no repeats
imputeMice = function() {
makeImputeMethod(
learn = function(data, target, col) {
data = as.data.frame(data)
excl = names(data)[ sapply(data, is.factor) ]
predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
x = mice::complete(imp_data)
return(list(values = x[[col]]))
},
impute = function(data, target, col, values) {
data[[col]] = values
return(data[[col]])
}
)
}
lrn = makeFilterWrapper(
makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"),
fw.method="univariate.model.score",
fw.perc=0.1,
cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))
res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures: cindex
#> [Resample] iter 1: 0.7069869
#> [Resample] iter 2: 0.7138798
#>
#> Aggregated Result: cindex.test.mean=0.7104333
#>
由 reprex package (v0.3.0)
于 2020-06-19 创建
我正在尝试使用 makeImputeMethod 在 mlr 中编写我自己的插补方法,以使用 R 中的 mice 包通过链式方程执行多重插补。我的 imputeMice() 方法运行完成,但在完成后出现以下错误:
Error in `[.data.frame`(data, ind) : undefined columns selected
我不确定为什么,也不知道它来自哪里。这是我写的代码:
library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#>
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE) # Tuning: 5-fold CV, no repeats
imputeMice = function() {
makeImputeMethod(
learn = function(data, target, col) {
return(list(values = data))
},
impute = function(data, target, col, values) {
data = as.data.frame(data)
excl = names(data)[ sapply(data, is.factor) ]
predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
x = mice::complete(imp_data)
print("Imputation completed")
return(x)
}
)
}
lrn = makeFilterWrapper(
makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"),
fw.method="univariate.model.score",
fw.perc=0.1,
cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))
res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures: cindex
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> [1] "Imputation completed"
#> Error in `[.data.frame`(data, ind): undefined columns selected
由 reprex package (v0.3.0)
于 2020-06-16 创建很明显,函数 imputeMice() 在 data.frame pbc 的每一列上被调用。但是使用鼠标我们只需要调用这个函数一次,它就会对每一列进行插补。这在 mlr 中可能吗?
错误是我的 - 我应该在 learn 函数中调用 mice,而不是在 impute 函数中。我发现这些函数的名称令人困惑。我的新代码在下面,这是有效的。但它会在每一列上调用鼠标。我真的只需要调用一次。这可能吗?
library(survival)
#> Warning: package 'survival' was built under R version 3.6.3
library(mlr)
#> Warning: package 'mlr' was built under R version 3.6.3
#> Loading required package: ParamHelpers
#> Warning: package 'ParamHelpers' was built under R version 3.6.3
#> 'mlr' is in maintenance mode since July 2019. Future development
#> efforts will go into its successor 'mlr3' (<https://mlr3.mlr-org.com>).
library(lattice)
#> Warning: package 'lattice' was built under R version 3.6.3
library(mice)
#> Warning: package 'mice' was built under R version 3.6.3
#>
#> Attaching package: 'mice'
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
data(pbc)
task_id = "PBC"
pbc[pbc$status == 2, "status"] = 1
pbc.task <- makeSurvTask(id = task_id, data = pbc, target = c("time", "status"))
outer = makeResampleDesc("CV", iters=2, stratify=TRUE) # Tuning: 5-fold CV, no repeats
imputeMice = function() {
makeImputeMethod(
learn = function(data, target, col) {
data = as.data.frame(data)
excl = names(data)[ sapply(data, is.factor) ]
predmat = mice::quickpred(data, minpuc=0, mincor=0, exclude=excl)
imp_data = mice::mice(data, pred=predmat, seed = 23109, printFlag=FALSE)
x = mice::complete(imp_data)
return(list(values = x[[col]]))
},
impute = function(data, target, col, values) {
data[[col]] = values
return(data[[col]])
}
)
}
lrn = makeFilterWrapper(
makeLearner(cl="surv.coxph", id = "cox.filt", predict.type="response"),
fw.method="univariate.model.score",
fw.perc=0.1,
cache=TRUE
)
lrn = makeImputeWrapper(lrn, classes = list(numeric = imputeMice(), integer = imputeMice(), factor = imputeMice()))
res = resample(learner = lrn, task = pbc.task, resampling = outer, models = TRUE,
measures = list(cindex), show.info = TRUE, extract = getFilteredFeatures)
#> Resampling: cross-validation
#> Measures: cindex
#> [Resample] iter 1: 0.7069869
#> [Resample] iter 2: 0.7138798
#>
#> Aggregated Result: cindex.test.mean=0.7104333
#>
由 reprex package (v0.3.0)
于 2020-06-19 创建