使用带有自定义步骤的食谱在烘焙时效果很好,但在使用插入符号训练模型时效果不佳

Using recipes with custom step works fine while baking but not while training model with caret

使用食谱包,我开发了一个自定义步骤,以便将其包含在某些插入符号模型中。在我的新步骤上进行准备和烘烤时,一切正常。但是当我尝试在 caret::train 中包含未经准备的食谱时,我收到以下错误:"Error: No variables or terms were selected"。欢迎任何建议。 以下可重现的示例和会话信息:

# Loading libraries
packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
    if (!do.call(require,as.list(pack))) {
        do.call(install.packages,as.list(pack)) }
    do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)

# Getting data
data(biomass)
biomass<-select(biomass, -dataset, -sample)

# Defining custom pretreatment algorithm
HaarTransform<-function(DF1) {
    w<-function(k) { 
        s1=dwt(k, filter="haar")
        return (s1@V[[1]])
    }
    Smt=as.matrix(DF1)
    Smt=t(base::apply(Smt,1,w))
    return (data.frame(Smt))
}

# Creating the custom step functions
step_Haar_new<-function(terms=NULL, role=NA, trained=FALSE, skip=FALSE, 
                        columns=NULL) {
    step(subclass="Haar",  terms=terms, role=role,
         trained=trained, skip=skip, columns=columns)
}

step_Haar<-function(recipe, ..., role=NA, trained=FALSE, skip=FALSE,
                    columns=NULL) {
    terms=ellipse_check(...)
    add_step(recipe, 
             step_Haar_new(terms=terms, role=role, trained=trained,  
                           skip=skip, columns=columns))
}

prep.step_Haar <- function(x, training, info = NULL, ...) {
    col_names<-terms_select(terms = x$terms, info = info)
    step_Haar_new(terms = x$terms,  role = x$role, trained = TRUE, 
                  skip = x$skip, columns=col_names)
}

bake.step_Haar <- function(object, new_data, ...) {
    new_data<-HaarTransform(dplyr::select(new_data, object$columns))
    as_tibble(new_data)
}

# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>% 
    step_Haar(all_predictors()) 

Haar_recipe%>%
    prep(biomass) %>%
    bake(biomass)   
# all is fine

# Fiting the caret model
fit <- caret::train(Haar_recipe, data=biomass,
                    method="svmLinear")  
# Error: No variables or terms were selected.

R 会话:

R version 3.4.4 (2018-03-15)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1

Matrix products: default

locale:
[1] LC_COLLATE=French_France.1252 LC_CTYPE=French_France.1252 LC_MONETARY=French_France.1252
[4] LC_NUMERIC=C LC_TIME=French_France.1252

attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] recipes_0.1.4 wavelets_0.3-0.1 e1071_1.7-0.1 caret_6.0-81 lattice_0.20-38
[6] forcats_0.4.0 stringr_1.4.0 dplyr_0.8.0.1 purrr_0.3.1 readr_1.3.1
[11] tidyr_0.8.3 tibble_2.0.1 ggplot2_3.1.0 tidyverse_1.2.1

loaded via a namespace (and not attached):
[1] Rcpp_1.0.0 lubridate_1.7.4 class_7.3-15 utf8_1.1.4 assertthat_0.2.0
[6] ipred_0.9-8 foreach_1.4.4 R6_2.4.0 cellranger_1.1.0 plyr_1.8.4
[11] backports_1.1.3 stats4_3.4.4 httr_1.4.0 pillar_1.3.1 rlang_0.3.1
[16] lazyeval_0.2.1 readxl_1.3.0 rstudioapi_0.9.0 data.table_1.12.0 kernlab_0.9-27
[21] rpart_4.1-13 Matrix_1.2-15 splines_3.4.4 gower_0.2.0 munsell_0.5.0
[26] broom_0.5.1 compiler_3.4.4 modelr_0.1.4 pkgconfig_2.0.2 nnet_7.3-12
[31] tidyselect_0.2.5 prodlim_2018.04.18 codetools_0.2-16 fansi_0.4.0 crayon_1.3.4
[36] withr_2.1.2 MASS_7.3-51.1 ModelMetrics_1.2.2 grid_3.4.4 nlme_3.1-137
[41] jsonlite_1.6 gtable_0.2.0 magrittr_1.5 scales_1.0.0 cli_1.0.1
[46] stringi_1.3.1 reshape2_1.4.3 timeDate_3043.102 xml2_1.2.0 generics_0.0.2
[51] lava_1.6.5 iterators_1.0.10 tools_3.4.4 glue_1.3.0 hms_0.4.2
[56] survival_2.43-3 yaml_2.2.0 colorspace_1.4-0 rvest_0.3.2 haven_2.1.0

有几个问题:

  • 名为 (id) 的步骤有一个新的必需参数(参见此处)
  • 您的烘焙步骤仅保存了预测变量(并消除了结果列)

下面是一些有效的代码:

packs <- c("tidyverse", "caret", "e1071", "wavelets", "recipes")
InstIfNec<-function (pack) {
  if (!do.call(require,as.list(pack))) {
    do.call(install.packages,as.list(pack)) }
  do.call(require,as.list(pack)) }
lapply(packs, InstIfNec)

# Getting data
data(biomass)
biomass <- select(biomass,-dataset,-sample)

# Defining custom pretreatment algorithm
HaarTransform <- function(DF1) {
  w <- function(k) {
    s1 = dwt(k, filter = "haar")
    return (s1@V[[1]])
  }
  Smt = as.matrix(DF1)
  Smt = t(base::apply(Smt, 1, w))
  return (data.frame(Smt))
}

# Creating the custom step functions
step_Haar_new <- function(terms, role, trained, skip, columns, id) {
  step(subclass = "Haar",  terms = terms, role = role, 
       trained = trained, skip = skip, columns = columns, id = id)
}

step_Haar<-function(recipe, ..., role = "predictor", trained = FALSE, skip = FALSE,  
                    columns = NULL, id = rand_id("Harr")) {
  terms = ellipse_check(...)
  add_step(recipe, 
           step_Haar_new(terms = terms, role = role, trained = trained,  
                         skip = skip, columns = columns, id = id))
}

prep.step_Haar <- function(x, training, info = NULL, ...) {
  col_names <- terms_select(terms = x$terms, info = info)
  step_Haar_new(
    terms = x$terms,
    role = x$role,
    trained = TRUE,
    skip = x$skip,
    columns = col_names,
    id = x$id
  )
}

bake.step_Haar <- function(object, new_data, ...) {
  predictors <- HaarTransform(dplyr::select(new_data, object$columns))
  new_data[, object$columns] <- NULL
  bind_cols(new_data, predictors)
}

# Testing the recipe function
Haar_recipe<-recipe(carbon ~ ., biomass) %>% 
  step_Haar(all_predictors()) 

# Fiting the caret model
fit <- caret::train(Haar_recipe, data = biomass, method = "svmLinear")