使用带有自定义步骤的食谱在烘焙时效果很好,但在使用插入符号训练模型时效果不佳
Using recipes with custom step works fine while baking but not while training model with caret
使用食谱包,我开发了一个自定义步骤,以便将其包含在某些插入符号模型中。在我的新步骤上进行准备和烘烤时,一切正常。但是当我尝试在 caret::train 中包含未经准备的食谱时,我收到以下错误:"Error: No variables or terms were selected"。欢迎任何建议。
以下可重现的示例和会话信息:
# Loading libraries
packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
if (!do.call(require,as.list(pack))) {
do.call(install.packages,as.list(pack)) }
do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)
# Getting data
data(biomass)
biomass<-select(biomass, -dataset, -sample)
# Defining custom pretreatment algorithm
HaarTransform<-function(DF1) {
w<-function(k) {
s1=dwt(k, filter="haar")
return (s1@V[[1]])
}
Smt=as.matrix(DF1)
Smt=t(base::apply(Smt,1,w))
return (data.frame(Smt))
}
# Creating the custom step functions
step_Haar_new<-function(terms=NULL, role=NA, trained=FALSE, skip=FALSE,
columns=NULL) {
step(subclass="Haar", terms=terms, role=role,
trained=trained, skip=skip, columns=columns)
}
step_Haar<-function(recipe, ..., role=NA, trained=FALSE, skip=FALSE,
columns=NULL) {
terms=ellipse_check(...)
add_step(recipe,
step_Haar_new(terms=terms, role=role, trained=trained,
skip=skip, columns=columns))
}
prep.step_Haar <- function(x, training, info = NULL, ...) {
col_names<-terms_select(terms = x$terms, info = info)
step_Haar_new(terms = x$terms, role = x$role, trained = TRUE,
skip = x$skip, columns=col_names)
}
bake.step_Haar <- function(object, new_data, ...) {
new_data<-HaarTransform(dplyr::select(new_data, object$columns))
as_tibble(new_data)
}
# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>%
step_Haar(all_predictors())
Haar_recipe%>%
prep(biomass) %>%
bake(biomass)
# all is fine
# Fiting the caret model
fit <- caret::train(Haar_recipe, data=biomass,
method="svmLinear")
# Error: No variables or terms were selected.
R 会话:
R version 3.4.4 (2018-03-15)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=French_France.1252 LC_CTYPE=French_France.1252 LC_MONETARY=French_France.1252
[4] LC_NUMERIC=C LC_TIME=French_France.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] recipes_0.1.4 wavelets_0.3-0.1 e1071_1.7-0.1 caret_6.0-81 lattice_0.20-38
[6] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1 purrr_0.3.1 readr_1.3.1
[11] tidyr_0.8.3 tibble_2.0.1 ggplot2_3.1.0 tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] Rcpp_1.0.0 lubridate_1.7.4 class_7.3-15 utf8_1.1.4 assertthat_0.2.0
[6] ipred_0.9-8 foreach_1.4.4 R6_2.4.0 cellranger_1.1.0 plyr_1.8.4
[11] backports_1.1.3 stats4_3.4.4 httr_1.4.0 pillar_1.3.1 rlang_0.3.1
[16] lazyeval_0.2.1 readxl_1.3.0 rstudioapi_0.9.0 data.table_1.12.0 kernlab_0.9-27
[21] rpart_4.1-13 Matrix_1.2-15 splines_3.4.4 gower_0.2.0 munsell_0.5.0
[26] broom_0.5.1 compiler_3.4.4 modelr_0.1.4 pkgconfig_2.0.2 nnet_7.3-12
[31] tidyselect_0.2.5 prodlim_2018.04.18 codetools_0.2-16 fansi_0.4.0 crayon_1.3.4
[36] withr_2.1.2 MASS_7.3-51.1 ModelMetrics_1.2.2 grid_3.4.4 nlme_3.1-137
[41] jsonlite_1.6 gtable_0.2.0 magrittr_1.5 scales_1.0.0 cli_1.0.1
[46] stringi_1.3.1 reshape2_1.4.3 timeDate_3043.102 xml2_1.2.0 generics_0.0.2
[51] lava_1.6.5 iterators_1.0.10 tools_3.4.4 glue_1.3.0 hms_0.4.2
[56] survival_2.43-3 yaml_2.2.0 colorspace_1.4-0 rvest_0.3.2 haven_2.1.0
有几个问题:
- 名为 (
id
) 的步骤有一个新的必需参数(参见此处)
- 您的烘焙步骤仅保存了预测变量(并消除了结果列)
下面是一些有效的代码:
packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
if (!do.call(require,as.list(pack))) {
do.call(install.packages,as.list(pack)) }
do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)
# Getting data
data(biomass)
biomass <- select(biomass,-dataset,-sample)
# Defining custom pretreatment algorithm
HaarTransform <- function(DF1) {
w <- function(k) {
s1 = dwt(k, filter = "haar")
return (s1@V[[1]])
}
Smt = as.matrix(DF1)
Smt = t(base::apply(Smt, 1, w))
return (data.frame(Smt))
}
# Creating the custom step functions
step_Haar_new <- function(terms, role, trained, skip, columns, id) {
step(subclass = "Haar", terms = terms, role = role,
trained = trained, skip = skip, columns = columns, id = id)
}
step_Haar<-function(recipe, ..., role = "predictor", trained = FALSE, skip = FALSE,
columns = NULL, id = rand_id("Harr")) {
terms = ellipse_check(...)
add_step(recipe,
step_Haar_new(terms = terms, role = role, trained = trained,
skip = skip, columns = columns, id = id))
}
prep.step_Haar <- function(x, training, info = NULL, ...) {
col_names <- terms_select(terms = x$terms, info = info)
step_Haar_new(
terms = x$terms,
role = x$role,
trained = TRUE,
skip = x$skip,
columns = col_names,
id = x$id
)
}
bake.step_Haar <- function(object, new_data, ...) {
predictors <- HaarTransform(dplyr::select(new_data, object$columns))
new_data[, object$columns] <- NULL
bind_cols(new_data, predictors)
}
# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>%
step_Haar(all_predictors())
# Fiting the caret model
fit <- caret::train(Haar_recipe, data = biomass, method = "svmLinear")
使用食谱包,我开发了一个自定义步骤,以便将其包含在某些插入符号模型中。在我的新步骤上进行准备和烘烤时,一切正常。但是当我尝试在 caret::train 中包含未经准备的食谱时,我收到以下错误:"Error: No variables or terms were selected"。欢迎任何建议。 以下可重现的示例和会话信息:
# Loading libraries
packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
if (!do.call(require,as.list(pack))) {
do.call(install.packages,as.list(pack)) }
do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)
# Getting data
data(biomass)
biomass<-select(biomass, -dataset, -sample)
# Defining custom pretreatment algorithm
HaarTransform<-function(DF1) {
w<-function(k) {
s1=dwt(k, filter="haar")
return (s1@V[[1]])
}
Smt=as.matrix(DF1)
Smt=t(base::apply(Smt,1,w))
return (data.frame(Smt))
}
# Creating the custom step functions
step_Haar_new<-function(terms=NULL, role=NA, trained=FALSE, skip=FALSE,
columns=NULL) {
step(subclass="Haar", terms=terms, role=role,
trained=trained, skip=skip, columns=columns)
}
step_Haar<-function(recipe, ..., role=NA, trained=FALSE, skip=FALSE,
columns=NULL) {
terms=ellipse_check(...)
add_step(recipe,
step_Haar_new(terms=terms, role=role, trained=trained,
skip=skip, columns=columns))
}
prep.step_Haar <- function(x, training, info = NULL, ...) {
col_names<-terms_select(terms = x$terms, info = info)
step_Haar_new(terms = x$terms, role = x$role, trained = TRUE,
skip = x$skip, columns=col_names)
}
bake.step_Haar <- function(object, new_data, ...) {
new_data<-HaarTransform(dplyr::select(new_data, object$columns))
as_tibble(new_data)
}
# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>%
step_Haar(all_predictors())
Haar_recipe%>%
prep(biomass) %>%
bake(biomass)
# all is fine
# Fiting the caret model
fit <- caret::train(Haar_recipe, data=biomass,
method="svmLinear")
# Error: No variables or terms were selected.
R 会话:
R version 3.4.4 (2018-03-15)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=French_France.1252 LC_CTYPE=French_France.1252 LC_MONETARY=French_France.1252
[4] LC_NUMERIC=C LC_TIME=French_France.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] recipes_0.1.4 wavelets_0.3-0.1 e1071_1.7-0.1 caret_6.0-81 lattice_0.20-38
[6] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1 purrr_0.3.1 readr_1.3.1
[11] tidyr_0.8.3 tibble_2.0.1 ggplot2_3.1.0 tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] Rcpp_1.0.0 lubridate_1.7.4 class_7.3-15 utf8_1.1.4 assertthat_0.2.0
[6] ipred_0.9-8 foreach_1.4.4 R6_2.4.0 cellranger_1.1.0 plyr_1.8.4
[11] backports_1.1.3 stats4_3.4.4 httr_1.4.0 pillar_1.3.1 rlang_0.3.1
[16] lazyeval_0.2.1 readxl_1.3.0 rstudioapi_0.9.0 data.table_1.12.0 kernlab_0.9-27
[21] rpart_4.1-13 Matrix_1.2-15 splines_3.4.4 gower_0.2.0 munsell_0.5.0
[26] broom_0.5.1 compiler_3.4.4 modelr_0.1.4 pkgconfig_2.0.2 nnet_7.3-12
[31] tidyselect_0.2.5 prodlim_2018.04.18 codetools_0.2-16 fansi_0.4.0 crayon_1.3.4
[36] withr_2.1.2 MASS_7.3-51.1 ModelMetrics_1.2.2 grid_3.4.4 nlme_3.1-137
[41] jsonlite_1.6 gtable_0.2.0 magrittr_1.5 scales_1.0.0 cli_1.0.1
[46] stringi_1.3.1 reshape2_1.4.3 timeDate_3043.102 xml2_1.2.0 generics_0.0.2
[51] lava_1.6.5 iterators_1.0.10 tools_3.4.4 glue_1.3.0 hms_0.4.2
[56] survival_2.43-3 yaml_2.2.0 colorspace_1.4-0 rvest_0.3.2 haven_2.1.0
有几个问题:
- 名为 (
id
) 的步骤有一个新的必需参数(参见此处) - 您的烘焙步骤仅保存了预测变量(并消除了结果列)
下面是一些有效的代码:
packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
if (!do.call(require,as.list(pack))) {
do.call(install.packages,as.list(pack)) }
do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)
# Getting data
data(biomass)
biomass <- select(biomass,-dataset,-sample)
# Defining custom pretreatment algorithm
HaarTransform <- function(DF1) {
w <- function(k) {
s1 = dwt(k, filter = "haar")
return (s1@V[[1]])
}
Smt = as.matrix(DF1)
Smt = t(base::apply(Smt, 1, w))
return (data.frame(Smt))
}
# Creating the custom step functions
step_Haar_new <- function(terms, role, trained, skip, columns, id) {
step(subclass = "Haar", terms = terms, role = role,
trained = trained, skip = skip, columns = columns, id = id)
}
step_Haar<-function(recipe, ..., role = "predictor", trained = FALSE, skip = FALSE,
columns = NULL, id = rand_id("Harr")) {
terms = ellipse_check(...)
add_step(recipe,
step_Haar_new(terms = terms, role = role, trained = trained,
skip = skip, columns = columns, id = id))
}
prep.step_Haar <- function(x, training, info = NULL, ...) {
col_names <- terms_select(terms = x$terms, info = info)
step_Haar_new(
terms = x$terms,
role = x$role,
trained = TRUE,
skip = x$skip,
columns = col_names,
id = x$id
)
}
bake.step_Haar <- function(object, new_data, ...) {
predictors <- HaarTransform(dplyr::select(new_data, object$columns))
new_data[, object$columns] <- NULL
bind_cols(new_data, predictors)
}
# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>%
step_Haar(all_predictors())
# Fiting the caret model
fit <- caret::train(Haar_recipe, data = biomass, method = "svmLinear")