mlr3 重采样自动调谐器 - 不显示调谐参数?
mlr3 resample autotuner - not showing tuned parameters?
我是 mlr3 的新手,在获取调整后的超参数(来自每个交叉验证)以及使用 AutoTuner 方法优化的超参数(利用嵌套重采样)。我的理解是,在 AutoTuner 上应用重采样功能后,我们应该能够看到每次迭代中单独调整的超参数。
为了证明这一点——我使用了网站 (https://mlr3gallery.mlr-org.com/house-prices-in-king-county/) 上提供的 mlr3 示例,在 Linux 服务器和 Windows 机器上进行了测试。更具体地说,我正在查看 xgboost 参数调整部分 - 代码如下:
library(mlr3)
library(mlr3learners)
library(mlr3tuning)
library(paradox)
# load data
data("kc_housing", package = "mlr3data")
tsk = TaskRegr$new("sales", kc_housing[-1], target = "price")
set.seed(4411)
train.idx = sample(seq_len(tsk$nrow), 0.7 * tsk$nrow)
test.idx = setdiff(seq_len(tsk$nrow), train.idx)
task_train = tsk$clone()$filter(train.idx)
task_test = tsk$clone()$filter(test.idx)
set.seed(444L)
lrn_xgb = lrn("regr.xgboost")
# Define the ParamSet
ps = paradox::ParamSet$new(
params = list(
ParamDbl$new(id = "eta", lower = 0.2, upper = .4),
ParamDbl$new(id = "min_child_weight", lower = 1, upper = 20),
ParamDbl$new(id = "subsample", lower = .7, upper = .8),
ParamDbl$new(id = "colsample_bytree", lower = .9, upper = 1),
ParamDbl$new(id = "colsample_bylevel", lower = .5, upper = .7),
ParamInt$new(id = "nrounds", lower = 1L, upper = 25)
))
# Define the cross validation
cv3 = rsmp("cv", folds = 3)
# Define the Terminator
terminator = term("evals", n_evals = 5)
at = AutoTuner$new(learner = lrn_xgb,
resampling = rsmp("holdout"),
measures = msr("regr.mse"),
tune_ps = ps,
terminator = terminator,
tuner = tnr("random_search"))
res = resample(task = task_train, at, cv3)
sapply(res$learners, function(x) x$param_set$values)
下面是 sapply
的输出
[,1] [,2] [,3]
nrounds 1 1 1
verbose 0 0 0
会话环境(针对 Windows 机器)
> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)
Matrix products: default
locale:
[1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] paradox_0.1.0 mlr3tuning_0.1.2 mlr3learners_0.1.6 mlr3_0.1.8
loaded via a namespace (and not attached):
[1] lgr_0.3.3 lattice_0.20-38 mlr3misc_0.1.8 digest_0.6.20 crayon_1.3.4
[6] grid_3.6.3 R6_2.4.0 mlr3measures_0.1.2 backports_1.1.4 magrittr_1.5
[11] stringi_1.4.3 uuid_0.1-4 data.table_1.12.6 rstudioapi_0.10 Matrix_1.2-18
[16] checkmate_2.0.0 xgboost_0.90.0.2 tools_3.6.3 compiler_3.6.3
目前,您需要在 resample()
调用期间专门设置 store_models = TRUE
以将调谐结果存储在 AutoTuner
.
我们将来可能会简化此行为,因为尽管 store_tuning_instance = TRUE
已在 AutoTuner
中设置,但 $tuning_result
插槽为空令人困惑。
library(mlr3)
library(paradox)
library(mlr3tuning)
lgr::get_logger("mlr3")$set_threshold("warn")
task = tsk("iris")
learner = lrn("classif.rpart")
resampling = rsmp("holdout")
measures = msr("classif.ce")
param_set = ParamSet$new(
params = list(ParamDbl$new("cp", lower = 0.001, upper = 0.1)))
terminator = term("evals", n_evals = 5)
tuner = tnr("grid_search")
at = AutoTuner$new(learner, resampling, measures, param_set, terminator, tuner)
at$train(task)
# tuning result exists
at$tuning_result
#> $tune_x
#> $tune_x$cp
#> [1] 0.067
#>
#>
#> $params
#> $params$xval
#> [1] 0
#>
#> $params$cp
#> [1] 0.067
#>
#>
#> $perf
#> classif.ce
#> 0.08
res = resample(task = task, at, rsmp("cv", folds = 3), store_models = TRUE)
lapply(res$learners, function(x) x$tuning_result)
#> [[1]]
#> [[1]]$tune_x
#> [[1]]$tune_x$cp
#> [1] 0.012
#>
#>
#> [[1]]$params
#> [[1]]$params$xval
#> [1] 0
#>
#> [[1]]$params$cp
#> [1] 0.012
#>
#>
#> [[1]]$perf
#> classif.ce
#> 0.09090909
#>
#>
#> [[2]]
#> [[2]]$tune_x
#> [[2]]$tune_x$cp
#> [1] 0.078
#>
#>
#> [[2]]$params
#> [[2]]$params$xval
#> [1] 0
#>
#> [[2]]$params$cp
#> [1] 0.078
#>
#>
#> [[2]]$perf
#> classif.ce
#> 0.09090909
#>
#>
#> [[3]]
#> [[3]]$tune_x
#> [[3]]$tune_x$cp
#> [1] 0.045
#>
#>
#> [[3]]$params
#> [[3]]$params$xval
#> [1] 0
#>
#> [[3]]$params$cp
#> [1] 0.045
#>
#>
#> [[3]]$perf
#> classif.ce
#> 0.06060606
由 reprex package (v0.3.0)
于 2020 年 3 月 20 日创建
我是 mlr3 的新手,在获取调整后的超参数(来自每个交叉验证)以及使用 AutoTuner 方法优化的超参数(利用嵌套重采样)。我的理解是,在 AutoTuner 上应用重采样功能后,我们应该能够看到每次迭代中单独调整的超参数。
为了证明这一点——我使用了网站 (https://mlr3gallery.mlr-org.com/house-prices-in-king-county/) 上提供的 mlr3 示例,在 Linux 服务器和 Windows 机器上进行了测试。更具体地说,我正在查看 xgboost 参数调整部分 - 代码如下:
library(mlr3)
library(mlr3learners)
library(mlr3tuning)
library(paradox)
# load data
data("kc_housing", package = "mlr3data")
tsk = TaskRegr$new("sales", kc_housing[-1], target = "price")
set.seed(4411)
train.idx = sample(seq_len(tsk$nrow), 0.7 * tsk$nrow)
test.idx = setdiff(seq_len(tsk$nrow), train.idx)
task_train = tsk$clone()$filter(train.idx)
task_test = tsk$clone()$filter(test.idx)
set.seed(444L)
lrn_xgb = lrn("regr.xgboost")
# Define the ParamSet
ps = paradox::ParamSet$new(
params = list(
ParamDbl$new(id = "eta", lower = 0.2, upper = .4),
ParamDbl$new(id = "min_child_weight", lower = 1, upper = 20),
ParamDbl$new(id = "subsample", lower = .7, upper = .8),
ParamDbl$new(id = "colsample_bytree", lower = .9, upper = 1),
ParamDbl$new(id = "colsample_bylevel", lower = .5, upper = .7),
ParamInt$new(id = "nrounds", lower = 1L, upper = 25)
))
# Define the cross validation
cv3 = rsmp("cv", folds = 3)
# Define the Terminator
terminator = term("evals", n_evals = 5)
at = AutoTuner$new(learner = lrn_xgb,
resampling = rsmp("holdout"),
measures = msr("regr.mse"),
tune_ps = ps,
terminator = terminator,
tuner = tnr("random_search"))
res = resample(task = task_train, at, cv3)
sapply(res$learners, function(x) x$param_set$values)
下面是 sapply
的输出 [,1] [,2] [,3]
nrounds 1 1 1
verbose 0 0 0
会话环境(针对 Windows 机器)
> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)
Matrix products: default
locale:
[1] LC_COLLATE=English_Australia.1252 LC_CTYPE=English_Australia.1252
[3] LC_MONETARY=English_Australia.1252 LC_NUMERIC=C
[5] LC_TIME=English_Australia.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] paradox_0.1.0 mlr3tuning_0.1.2 mlr3learners_0.1.6 mlr3_0.1.8
loaded via a namespace (and not attached):
[1] lgr_0.3.3 lattice_0.20-38 mlr3misc_0.1.8 digest_0.6.20 crayon_1.3.4
[6] grid_3.6.3 R6_2.4.0 mlr3measures_0.1.2 backports_1.1.4 magrittr_1.5
[11] stringi_1.4.3 uuid_0.1-4 data.table_1.12.6 rstudioapi_0.10 Matrix_1.2-18
[16] checkmate_2.0.0 xgboost_0.90.0.2 tools_3.6.3 compiler_3.6.3
目前,您需要在 resample()
调用期间专门设置 store_models = TRUE
以将调谐结果存储在 AutoTuner
.
我们将来可能会简化此行为,因为尽管 store_tuning_instance = TRUE
已在 AutoTuner
中设置,但 $tuning_result
插槽为空令人困惑。
library(mlr3)
library(paradox)
library(mlr3tuning)
lgr::get_logger("mlr3")$set_threshold("warn")
task = tsk("iris")
learner = lrn("classif.rpart")
resampling = rsmp("holdout")
measures = msr("classif.ce")
param_set = ParamSet$new(
params = list(ParamDbl$new("cp", lower = 0.001, upper = 0.1)))
terminator = term("evals", n_evals = 5)
tuner = tnr("grid_search")
at = AutoTuner$new(learner, resampling, measures, param_set, terminator, tuner)
at$train(task)
# tuning result exists
at$tuning_result
#> $tune_x
#> $tune_x$cp
#> [1] 0.067
#>
#>
#> $params
#> $params$xval
#> [1] 0
#>
#> $params$cp
#> [1] 0.067
#>
#>
#> $perf
#> classif.ce
#> 0.08
res = resample(task = task, at, rsmp("cv", folds = 3), store_models = TRUE)
lapply(res$learners, function(x) x$tuning_result)
#> [[1]]
#> [[1]]$tune_x
#> [[1]]$tune_x$cp
#> [1] 0.012
#>
#>
#> [[1]]$params
#> [[1]]$params$xval
#> [1] 0
#>
#> [[1]]$params$cp
#> [1] 0.012
#>
#>
#> [[1]]$perf
#> classif.ce
#> 0.09090909
#>
#>
#> [[2]]
#> [[2]]$tune_x
#> [[2]]$tune_x$cp
#> [1] 0.078
#>
#>
#> [[2]]$params
#> [[2]]$params$xval
#> [1] 0
#>
#> [[2]]$params$cp
#> [1] 0.078
#>
#>
#> [[2]]$perf
#> classif.ce
#> 0.09090909
#>
#>
#> [[3]]
#> [[3]]$tune_x
#> [[3]]$tune_x$cp
#> [1] 0.045
#>
#>
#> [[3]]$params
#> [[3]]$params$xval
#> [1] 0
#>
#> [[3]]$params$cp
#> [1] 0.045
#>
#>
#> [[3]]$perf
#> classif.ce
#> 0.06060606
由 reprex package (v0.3.0)
于 2020 年 3 月 20 日创建