使用嵌套重采样通过 tidymodel 调整岭回归

Tuning ridge regression with tidymodel using nested resampling

我想使用 tidymodels 调整岭回归。我看过这个 nested sampling tutorial,但不确定如何将调整从一个超参数增加到两个超参数。请看下面的例子:

示例数据:

library("mlbench")
sim_data <- function(n) {
  tmp <- mlbench.friedman1(n, sd = 1)
  tmp <- cbind(tmp$x, tmp$y)
  tmp <- as.data.frame(tmp)
  names(tmp)[ncol(tmp)] <- "y"
  tmp
}
set.seed(9815)
train_dat <- sim_data(50)

设置内外折:

library(tidymodels)
results_nested_resampling <- rsample::nested_cv(train_dat,
                                                outside = vfold_cv(v=10, repeats = 1),
                                                inside = vfold_cv(v=10, repeats = 1))

拟合模型和计算 RMSE 的函数:

svm_rmse <- function(object, penalty = 1, mixture = 1) {
  y_col <- ncol(object$data)

  mod <-
    parsnip::linear_reg(penalty = penalty, mixture = mixture) %>% # tune() uses the grid
    parsnip::set_engine("glmnet") %>% 
    fit(y ~ ., data = analysis(object))
    
  holdout_pred <-
    predict(mod, assessment(object) %>% dplyr::select(-y)) %>%
    bind_cols(assessment(object) %>% dplyr::select(y))
  rmse(holdout_pred, truth = y, estimate = .pred)$.estimate
}

# In some case, we want to parameterize the function over the tuning parameter:
rmse_wrapper <- function(penalty, mixture, object) svm_rmse(object, penalty, mixture)

# testing rmse_wrapper
rmse_wrapper(penalty=0.1, mixture=0.1, object=results_nested_resampling$inner_resamples[[5]]$splits[[1]])

但是调整两个超参数的函数不起作用:

tune_over_cost <- function(object) {
  
  glmn_grid <- base::expand.grid(
    penalty = 10^seq(-3, -1, length = 20),
    mixture = (0:5) / 5)
  
  
  df3_glmn_grid %>%
    mutate(RMSE = map_dbl(glmn_grid$penalty, glmn_grid$mixture, rmse_wrapper,  object = object))
}

tune_over_cost(object=results_nested_resampling$inner_resamples[[5]])

提前致谢。

尝试使用 map2_dbl 而不是 map_dbl

也就是改这行代码: mutate(RMSE = map_dbl(glmn_grid$penalty, glmn_grid$mixture, rmse_wrapper, object = object))

到这一行: mutate(RMSE = map2_dbl(penalty, mixture, rmse_wrapper, object = object))