tidymodels - fit_resamples 使用 step_ns() 时出错
tidymodels - error in fit_resamples using step_ns()
我正在尝试编写一个函数来适应使用 step_ns() 的配方的重新采样。出于某种原因,我收到错误消息:
Fold01: recipe: Error: Not all variables in the recipe are present in the supplied training set
依此类推。然后
警告信息:
All models failed in [fit_resamples()]. See the
.notes
column.
这是我的代码:
compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {
outcome <- rlang::enquo(outcome)
metric <- rlang::enquo(metric)
pred_list <- colnames(data)
outcome_str <- substring(deparse(substitute(outcome)), 2)
outcome_str_id <- which(colnames(data) %in% outcome_str)
predictor <- pred_list[-outcome_str_id]
data <- data %>%
rename(prediction = !!outcome)
res <- tibble(splits = list(), id = character(), .metrics = list(),
.notes = list(), .predictions = list(), pred = character())
rec_without_splines <- recipe(prediction ~ ., data = data) %>%
prep()
rec_with_splines <- recipe(prediction ~ ., data = data) %>%
step_ns(all_predictors(), ...) %>%
prep()
folds_without_splines <- vfold_cv(juice(rec_without_splines), strata = prediction)
folds_with_splines <- vfold_cv(juice(rec_with_splines), strata = prediction)
mod <- linear_reg() %>%
set_engine("lm")
mod_without_splines <- fit_resamples(mod,
rec_without_splines,
folds_without_splines,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "no_splines")
mod_with_splines <- fit_resamples(mod,
rec_with_splines,
folds_with_splines,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "with_splines")
res <- mod_without_splines %>%
bind_rows(mod_with_splines)
return (res)
}
基本上参数 data
包含两列 table 并且 outcome
是结果列的名称。除了使用这个函数(我只是在这里玩弄 tidymodels 因为我是新手)我只想了解是什么导致了这个错误以及如何修复它。评估 mod_with_splines
.
时出现错误
遇到了类似的问题。但我不知道这是否与我的问题有关。在将它传递给 fit_resamples
之前,我不能不准备食谱。 (或者我认为)
如有任何帮助,我们将不胜感激。谢谢。
您的问题来自于尝试在已经 运行 通过相同配方的数据集上应用配方。
如果我们假设预测变量是 X1
和 X2
,那么 rec_with_splines
就是预期的那些变量。但由于 folds_with_splines
包含 rec_with_splines
的榨汁结果,因此 folds_with_splines
实际上包含 X1_ns_1
、X1_ns_2
、X2_ns_1
和 X2_ns_2
。不是 X1
和 X2
.
我建议使用 workflows 结合预处理和建模步骤。并将原始数据传递给 vfold_cv()
.
library(tidymodels)
compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {
outcome <- rlang::enquo(outcome)
metric <- rlang::enquo(metric)
pred_list <- colnames(data)
outcome_str <- substring(deparse(substitute(outcome)), 2)
outcome_str_id <- which(colnames(data) %in% outcome_str)
predictor <- pred_list[-outcome_str_id]
data <- data %>%
rename(prediction = !!outcome)
rec_without_splines <- recipe(prediction ~ ., data = data) %>%
prep()
rec_with_splines <- recipe(prediction ~ ., data = data) %>%
step_ns(all_predictors(), ...)
mod <- linear_reg() %>%
set_engine("lm")
wf_without_splines <- workflow() %>%
add_recipe(rec_without_splines) %>%
add_model(mod)
wf_with_splines <- workflow() %>%
add_recipe(rec_with_splines) %>%
add_model(mod)
data_folds <- vfold_cv(data, strata = prediction)
mod_without_splines <- fit_resamples(wf_without_splines,
data_folds,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "no_splines")
mod_with_splines <- fit_resamples(wf_with_splines,
data_folds,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "with_splines")
res <- mod_without_splines %>%
bind_rows(mod_with_splines)
return (res)
}
res <- compare_basis_exp_to_base_mod(mtcars, mpg, rmse)
我正在尝试编写一个函数来适应使用 step_ns() 的配方的重新采样。出于某种原因,我收到错误消息:
Fold01: recipe: Error: Not all variables in the recipe are present in the supplied training set
依此类推。然后
警告信息:
All models failed in [fit_resamples()]. See the
.notes
column.
这是我的代码:
compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {
outcome <- rlang::enquo(outcome)
metric <- rlang::enquo(metric)
pred_list <- colnames(data)
outcome_str <- substring(deparse(substitute(outcome)), 2)
outcome_str_id <- which(colnames(data) %in% outcome_str)
predictor <- pred_list[-outcome_str_id]
data <- data %>%
rename(prediction = !!outcome)
res <- tibble(splits = list(), id = character(), .metrics = list(),
.notes = list(), .predictions = list(), pred = character())
rec_without_splines <- recipe(prediction ~ ., data = data) %>%
prep()
rec_with_splines <- recipe(prediction ~ ., data = data) %>%
step_ns(all_predictors(), ...) %>%
prep()
folds_without_splines <- vfold_cv(juice(rec_without_splines), strata = prediction)
folds_with_splines <- vfold_cv(juice(rec_with_splines), strata = prediction)
mod <- linear_reg() %>%
set_engine("lm")
mod_without_splines <- fit_resamples(mod,
rec_without_splines,
folds_without_splines,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "no_splines")
mod_with_splines <- fit_resamples(mod,
rec_with_splines,
folds_with_splines,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "with_splines")
res <- mod_without_splines %>%
bind_rows(mod_with_splines)
return (res)
}
基本上参数 data
包含两列 table 并且 outcome
是结果列的名称。除了使用这个函数(我只是在这里玩弄 tidymodels 因为我是新手)我只想了解是什么导致了这个错误以及如何修复它。评估 mod_with_splines
.
遇到了类似的问题fit_resamples
之前,我不能不准备食谱。 (或者我认为)
如有任何帮助,我们将不胜感激。谢谢。
您的问题来自于尝试在已经 运行 通过相同配方的数据集上应用配方。
如果我们假设预测变量是 X1
和 X2
,那么 rec_with_splines
就是预期的那些变量。但由于 folds_with_splines
包含 rec_with_splines
的榨汁结果,因此 folds_with_splines
实际上包含 X1_ns_1
、X1_ns_2
、X2_ns_1
和 X2_ns_2
。不是 X1
和 X2
.
我建议使用 workflows 结合预处理和建模步骤。并将原始数据传递给 vfold_cv()
.
library(tidymodels)
compare_basis_exp_to_base_mod <- function (data, outcome, metric, ...) {
outcome <- rlang::enquo(outcome)
metric <- rlang::enquo(metric)
pred_list <- colnames(data)
outcome_str <- substring(deparse(substitute(outcome)), 2)
outcome_str_id <- which(colnames(data) %in% outcome_str)
predictor <- pred_list[-outcome_str_id]
data <- data %>%
rename(prediction = !!outcome)
rec_without_splines <- recipe(prediction ~ ., data = data) %>%
prep()
rec_with_splines <- recipe(prediction ~ ., data = data) %>%
step_ns(all_predictors(), ...)
mod <- linear_reg() %>%
set_engine("lm")
wf_without_splines <- workflow() %>%
add_recipe(rec_without_splines) %>%
add_model(mod)
wf_with_splines <- workflow() %>%
add_recipe(rec_with_splines) %>%
add_model(mod)
data_folds <- vfold_cv(data, strata = prediction)
mod_without_splines <- fit_resamples(wf_without_splines,
data_folds,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "no_splines")
mod_with_splines <- fit_resamples(wf_with_splines,
data_folds,
metrics = metric_set(!!metric),
control = control_resamples(save_pred = TRUE)) %>%
mutate(pred = "with_splines")
res <- mod_without_splines %>%
bind_rows(mod_with_splines)
return (res)
}
res <- compare_basis_exp_to_base_mod(mtcars, mpg, rmse)