Xgboost for survival 在 R 中使用 mlr
Xgboost for survival using mlr in R
我想在 R 中的右截尾生存数据上使用 mlr 运行 xgboost。xgboost 代码列出了一个 objective 函数 survival:cox,它说:
survival:cox: Cox regression for right censored survival time data (negative values are considered right censored).
我正在使用的 Mlr 2 仅支持回归和分类学习器的 xgboost。如果我尝试使用 xgboost 的内置回归学习器,它会使用 mse 作为评估指标。所以我尝试将指标更改为 cindex 并得到错误
Measures: cindex cindex
Error in FUN(X[[i]], ...) :
Measure cindex does not support task type regr!
然后我尝试为 xgboost 编写一个新的生存学习器,它只是回归学习器的一个副本,但是 "Regr" 更改为 "Surv",当然它期望目标为有 2 列 - 时间和状态 - 并且不接受负时间,而 xgboost 只需要一列 - 时间 - 并假设任何具有负时间值的行都被审查。
下面是我试过的。有什么方法可以在 mlr2 或 mlr3 中实现这一点?
- 为 xgboost 使用内置回归学习器:
data(veteran)
veteran_xgb <- veteran
veteran_xgb <- veteran_xgb[c("trt", "karno", "diagtime", "age", "prior", "time")]
veteran_xgb$time <- ifelse(veteran$status==1, veteran$time, -veteran$time)
xgb.task <- makeRegrTask(id="XGBOOST_VET", data = veteran_xgb, target="time")
xgb_learner <- makeLearner(id="xgboost",
cl="regr.xgboost",
predict.type = "response",
par.vals = list(
objective = "survival:cox",
eval_metric = "cox-nloglik",
nrounds = 200
)
)
learners = list(xgb_learner)
outer = makeResampleDesc("CV", iters=5) # Benchmarking
bmr = benchmark(learners, xgb.task, outer, show.info = TRUE)
- 为 xgboost 使用自定义 surv 学习器:
data(veteran)
veteran_xgb <- veteran
veteran_xgb <- veteran_xgb[c("trt", "karno", "diagtime", "age", "prior", "time", "status")]
veteran_xgb$time <- ifelse(veteran$status==1, veteran$time, -veteran$time)
xgb.task <- makeSurvTask(id="XGBOOST_VET", data = veteran_xgb, target = c("time", "status"))
xgb_learner <- makeLearner(id="xgboost",
cl="surv.xgboost",
predict.type = "response",
par.vals = list(
objective = "survival:cox",
eval_metric = "cox-nloglik",
nrounds = 200
)
)
learners = list(xgb_learner)
outer = makeResampleDesc("CV", iters=5) # Benchmarking
surv.measures = list(cindex)
bmr = benchmark(learners, xgb.task, outer, surv.measures, show.info = TRUE)
文件 RLearner_surv_xgboost.R 可以从此处的 OneDrive 下载 https://1drv.ms/u/s!AjTjdzp0sDJRrhZtZF5-HZF2BrBB?e=FNLS94
我找到了解决方案并在此处更新了我的自定义学习器:https://1drv.ms/u/s!AjTjdzp0sDJRrhewy0yx3Wot3FiI?e=sxRrTN
诀窍是修改 trainlearner.surv.xgboost 函数。作为生存学习者,它期望传递的数据具有包含 2 列时间和状态的目标。但是在那个学习器中,我们可以计算 xgboost 期望的目标,对删失数据使用负时间,然后将这个新的 single-column 目标传递给 xgboost:
trainLearner.surv.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) {
parlist = list(...)
if (is.null(parlist$objective))
{
parlist$objective = "survival:cox"
parlist$eval_metric = "cox-nloglik"
}
task.data = getTaskData(.task, .subset, target.extra = TRUE)
survtime <- ifelse(task.data$target$status==1, task.data$target$time, -task.data$target$time)
parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = survtime)
if (!is.null(.weights))
xgboost::setinfo(parlist$data, "weight", .weights)
if (is.null(parlist$watchlist))
parlist$watchlist = list(train = parlist$data)
do.call(xgboost::xgb.train, parlist)
}
然后使用这个新学习器:
library(xgboost)
library(survival)
library(mlr)
source("RLearner_surv_xgboost.R")
data(veteran)
veteran.xgb <- veteran[, !(names(veteran) %in% c("celltype"))]
xgb.task <- makeSurvTask(id="XGBOOST_VET", data = veteran.xgb, target = c("time", "status"))
surv.measures = list(cindex)
outer= makeResampleDesc("CV", iters=5)
xgb.learner <- makeLearner(id="xgboost",
cl="surv.xgboost",
predict.type = "response")
learners = list(xgb.learner)
bmr = benchmark(learners, xgb.task, outer, surv.measures, show.info = TRUE)
我想在 R 中的右截尾生存数据上使用 mlr 运行 xgboost。xgboost 代码列出了一个 objective 函数 survival:cox,它说:
survival:cox: Cox regression for right censored survival time data (negative values are considered right censored).
我正在使用的 Mlr 2 仅支持回归和分类学习器的 xgboost。如果我尝试使用 xgboost 的内置回归学习器,它会使用 mse 作为评估指标。所以我尝试将指标更改为 cindex 并得到错误
Measures: cindex cindex
Error in FUN(X[[i]], ...) : Measure cindex does not support task type regr!
然后我尝试为 xgboost 编写一个新的生存学习器,它只是回归学习器的一个副本,但是 "Regr" 更改为 "Surv",当然它期望目标为有 2 列 - 时间和状态 - 并且不接受负时间,而 xgboost 只需要一列 - 时间 - 并假设任何具有负时间值的行都被审查。
下面是我试过的。有什么方法可以在 mlr2 或 mlr3 中实现这一点?
- 为 xgboost 使用内置回归学习器:
data(veteran)
veteran_xgb <- veteran
veteran_xgb <- veteran_xgb[c("trt", "karno", "diagtime", "age", "prior", "time")]
veteran_xgb$time <- ifelse(veteran$status==1, veteran$time, -veteran$time)
xgb.task <- makeRegrTask(id="XGBOOST_VET", data = veteran_xgb, target="time")
xgb_learner <- makeLearner(id="xgboost",
cl="regr.xgboost",
predict.type = "response",
par.vals = list(
objective = "survival:cox",
eval_metric = "cox-nloglik",
nrounds = 200
)
)
learners = list(xgb_learner)
outer = makeResampleDesc("CV", iters=5) # Benchmarking
bmr = benchmark(learners, xgb.task, outer, show.info = TRUE)
- 为 xgboost 使用自定义 surv 学习器:
data(veteran)
veteran_xgb <- veteran
veteran_xgb <- veteran_xgb[c("trt", "karno", "diagtime", "age", "prior", "time", "status")]
veteran_xgb$time <- ifelse(veteran$status==1, veteran$time, -veteran$time)
xgb.task <- makeSurvTask(id="XGBOOST_VET", data = veteran_xgb, target = c("time", "status"))
xgb_learner <- makeLearner(id="xgboost",
cl="surv.xgboost",
predict.type = "response",
par.vals = list(
objective = "survival:cox",
eval_metric = "cox-nloglik",
nrounds = 200
)
)
learners = list(xgb_learner)
outer = makeResampleDesc("CV", iters=5) # Benchmarking
surv.measures = list(cindex)
bmr = benchmark(learners, xgb.task, outer, surv.measures, show.info = TRUE)
文件 RLearner_surv_xgboost.R 可以从此处的 OneDrive 下载 https://1drv.ms/u/s!AjTjdzp0sDJRrhZtZF5-HZF2BrBB?e=FNLS94
我找到了解决方案并在此处更新了我的自定义学习器:https://1drv.ms/u/s!AjTjdzp0sDJRrhewy0yx3Wot3FiI?e=sxRrTN
诀窍是修改 trainlearner.surv.xgboost 函数。作为生存学习者,它期望传递的数据具有包含 2 列时间和状态的目标。但是在那个学习器中,我们可以计算 xgboost 期望的目标,对删失数据使用负时间,然后将这个新的 single-column 目标传递给 xgboost:
trainLearner.surv.xgboost = function(.learner, .task, .subset, .weights = NULL, ...) {
parlist = list(...)
if (is.null(parlist$objective))
{
parlist$objective = "survival:cox"
parlist$eval_metric = "cox-nloglik"
}
task.data = getTaskData(.task, .subset, target.extra = TRUE)
survtime <- ifelse(task.data$target$status==1, task.data$target$time, -task.data$target$time)
parlist$data = xgboost::xgb.DMatrix(data = data.matrix(task.data$data), label = survtime)
if (!is.null(.weights))
xgboost::setinfo(parlist$data, "weight", .weights)
if (is.null(parlist$watchlist))
parlist$watchlist = list(train = parlist$data)
do.call(xgboost::xgb.train, parlist)
}
然后使用这个新学习器:
library(xgboost)
library(survival)
library(mlr)
source("RLearner_surv_xgboost.R")
data(veteran)
veteran.xgb <- veteran[, !(names(veteran) %in% c("celltype"))]
xgb.task <- makeSurvTask(id="XGBOOST_VET", data = veteran.xgb, target = c("time", "status"))
surv.measures = list(cindex)
outer= makeResampleDesc("CV", iters=5)
xgb.learner <- makeLearner(id="xgboost",
cl="surv.xgboost",
predict.type = "response")
learners = list(xgb.learner)
bmr = benchmark(learners, xgb.task, outer, surv.measures, show.info = TRUE)