How to fix this error: Recipes fail to load in Caret:: Train?
How to fix this error: Recipes fail to load in Caret:: Train?
我在将食谱加载到 caret::train 时遇到这个问题
NA 插补有问题,但我不知道如何解决。如果我删除 cross 验证一切正常。
提前致谢,
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
# Feature engineering - final recipe
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes())
# Validation
cv5 <- trainControl( method = "repeatedcv",
number = 5,
repeats = 5, allowParallel = TRUE)
# Fit an lm model
set.seed(12)
lm_fit <- train(
air_recipe,
data = air_train,
method = "lm",
trControl = cv5,
metric = "RMSE")
错误信息
quantile.default(y, probs = seq(0, 1, length = cuts)) 中的错误:如果 'na.rm' 为假,则不允许缺失值和 NaN
R.version
_
平台x86_64-apple-darwin15.6.0
拱门 x86_64
osdarwin15.6.0
系统x86_64, darwin15.6.0
状态
专业 3
未成年人 6.1
2019 年
07 月
第 5 天
svn 版本 76782
语言 R
version.string R 版本 3.6.1 (2019-07-05)
昵称脚趾动作
看起来重采样已经完成 。
因此您可以 prep
和 juice
recipe
并使用公式方法:
library(recipes)
library(caret)
library(rsample)
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
# Feature engineering - final recipe
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_naomit(all_outcomes(),all_predictors())
# Prep recipe
air_prep <- prep(air_recipe, retain = TRUE)
# Juice the prepared recipe
air_train <- juice(air_prep)
# Validation
cv5 <- trainControl( method = "repeatedcv",
number = 5,
repeats = 5, allowParallel = TRUE)
# Fit an lm model
set.seed(12)
lm_fit <- train(
Ozone ~ .,
data = air_train,
method = "lm",
trControl = cv5,
metric = "RMSE")
lm_fit
#> Linear Regression
#>
#> 108 samples
#> 5 predictor
#>
#> No pre-processing
#> Resampling: Cross-Validated (5 fold, repeated 5 times)
#> Summary of sample sizes: 86, 88, 86, 86, 86, 86, ...
#> Resampling results:
#>
#> RMSE Rsquared MAE
#> 0.5091496 0.6568485 0.3793589
#>
#> Tuning parameter 'intercept' was held constant at a value of TRUE
或者,您可以使用 {parsnip}
和 {tune}
将所有内容保留在 tidymodels
习语中:
library(recipes)
library(rsample)
library(parsnip)
library(tune)
library(yardstick)
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_naomit(all_outcomes(),all_predictors())
air_cv <- vfold_cv(air_train, v = 5, repeats = 5)
lm_mod <- linear_reg() %>% set_engine("lm")
lm_fits <- fit_resamples(air_recipe, lm_mod, air_cv)
show_best(lm_fits, metric = "rmse", maximize = FALSE)
#> # A tibble: 1 x 5
#> .metric .estimator mean n std_err
#> <chr> <chr> <dbl> <int> <dbl>
#> 1 rmse standard 0.526 25 0.0256
由 reprex package (v0.3.0)
于 2020-04-05 创建
我在将食谱加载到 caret::train 时遇到这个问题
NA 插补有问题,但我不知道如何解决。如果我删除 cross 验证一切正常。
提前致谢,
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
# Feature engineering - final recipe
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes())
# Validation
cv5 <- trainControl( method = "repeatedcv",
number = 5,
repeats = 5, allowParallel = TRUE)
# Fit an lm model
set.seed(12)
lm_fit <- train(
air_recipe,
data = air_train,
method = "lm",
trControl = cv5,
metric = "RMSE")
错误信息
quantile.default(y, probs = seq(0, 1, length = cuts)) 中的错误:如果 'na.rm' 为假,则不允许缺失值和 NaN
R.version
_
平台x86_64-apple-darwin15.6.0
拱门 x86_64
osdarwin15.6.0
系统x86_64, darwin15.6.0
状态
专业 3
未成年人 6.1
2019 年
07 月
第 5 天
svn 版本 76782
语言 R
version.string R 版本 3.6.1 (2019-07-05)
昵称脚趾动作
看起来重采样已经完成
因此您可以 prep
和 juice
recipe
并使用公式方法:
library(recipes)
library(caret)
library(rsample)
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
# Feature engineering - final recipe
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_naomit(all_outcomes(),all_predictors())
# Prep recipe
air_prep <- prep(air_recipe, retain = TRUE)
# Juice the prepared recipe
air_train <- juice(air_prep)
# Validation
cv5 <- trainControl( method = "repeatedcv",
number = 5,
repeats = 5, allowParallel = TRUE)
# Fit an lm model
set.seed(12)
lm_fit <- train(
Ozone ~ .,
data = air_train,
method = "lm",
trControl = cv5,
metric = "RMSE")
lm_fit
#> Linear Regression
#>
#> 108 samples
#> 5 predictor
#>
#> No pre-processing
#> Resampling: Cross-Validated (5 fold, repeated 5 times)
#> Summary of sample sizes: 86, 88, 86, 86, 86, 86, ...
#> Resampling results:
#>
#> RMSE Rsquared MAE
#> 0.5091496 0.6568485 0.3793589
#>
#> Tuning parameter 'intercept' was held constant at a value of TRUE
或者,您可以使用 {parsnip}
和 {tune}
将所有内容保留在 tidymodels
习语中:
library(recipes)
library(rsample)
library(parsnip)
library(tune)
library(yardstick)
data(airquality)
set.seed(33) # for reproducibility
air_split <- initial_split(airquality, prop = 0.7)
air_train <- training(air_split)
air_test <- testing(air_split)
air_recipe <- recipe(Ozone ~ ., data = air_train) %>%
step_zv(all_predictors()) %>%
step_nzv(all_predictors()) %>%
step_knnimpute(all_numeric(), neighbors = 6) %>%
step_log(Ozone, Wind) %>%
step_other(Day, threshold = 0.01, other = "other") %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_naomit(all_outcomes(),all_predictors())
air_cv <- vfold_cv(air_train, v = 5, repeats = 5)
lm_mod <- linear_reg() %>% set_engine("lm")
lm_fits <- fit_resamples(air_recipe, lm_mod, air_cv)
show_best(lm_fits, metric = "rmse", maximize = FALSE)
#> # A tibble: 1 x 5
#> .metric .estimator mean n std_err
#> <chr> <chr> <dbl> <int> <dbl>
#> 1 rmse standard 0.526 25 0.0256
由 reprex package (v0.3.0)
于 2020-04-05 创建