In r with tidymodels: Warning message: "All models failed in [fit_resamples()]. See the `.notes` column." internal: Error: In metric: `roc_auc`
In r with tidymodels: Warning message: "All models failed in [fit_resamples()]. See the `.notes` column." internal: Error: In metric: `roc_auc`
我是 R 的新手,正在尝试学习 tidymodels。
我得到这个 错误 只有 glm
用于 iris dataset
如果我 change dataset
& 食谱那么 glm
是运行 很好,但后来我开始在 kknn
中收到此错误。
Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`
我检查了 .notes
,它看起来是这样的:
.notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1 .notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1
Warning message: All models failed in [fit_resamples()]. See the `.notes` column
正如上面post中所建议的那样,我尝试从 github 升级 parsnip
和 tune
软件包,但在安装 tune package
时出现错误:Warning in install.packages : package ‘tune’ is not available for this version of R
我不确定哪里出了问题,如果有人能提供帮助,我将不胜感激!!!
版本信息:
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.2 v purrr 0.3.4
v tibble 3.0.4 v dplyr 1.0.2
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.4.0 v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
-- Attaching packages -------------------------------------- tidymodels 0.1.1 --
v broom 0.7.2 v recipes 0.1.14
v dials 0.0.9 v rsample 0.0.8
v infer 0.5.3 v tune 0.1.1
v modeldata 0.0.2 v workflows 0.2.1
v parsnip 0.1.3.9000 v yardstick 0.0.7
-- Conflicts ----------------------------------------- tidymodels_conflicts() --
x scales::discard() masks purrr::discard()
x dplyr::filter() masks stats::filter()
x recipes::fixed() masks stringr::fixed()
x dplyr::lag() masks stats::lag()
x yardstick::spec() masks readr::spec()
x recipes::step() masks stats::step()
Windows 7
platform x86_64-w64-mingw32
arch x86_64
os mingw32
system x86_64, mingw32
status
major 4
minor 0.3
year 2020
month 10
day 10
svn rev 79318
language R
version.string R version 4.0.3 (2020-10-10)
代码:
library(tidyverse)
library(tidymodels)
library(themis)
iris
# Data split
set.seed(999)
iris_split <- initial_split(iris, strata = Species)
iris_train <- training(iris_split)
iris_test <- testing(iris_split)
# Cross Validation
set.seed(345)
iris_fold <- vfold_cv(iris_train)
print(iris_fold)
# recipe
iris_rec <- recipe(Species ~., data = iris_train) %>%
#make sure the training set has equal numbers of target variale (not needed for iris dataset)
step_downsample(Species) %>%
#normalise the data
step_center(-Species) %>%
step_scale(-Species) %>%
step_BoxCox(-Species) %>%
#function to apply the recipe to the data
prep()
# Workflow
iris_wf <- workflow() %>%
add_recipe(iris_rec)
# logistic
glm_spec <- logistic_reg() %>%
set_engine("glm")
# to do parallel processing
doParallel::registerDoParallel()
# adding parameters to workflow
glm_rs <- iris_wf %>%
add_model(glm_spec) %>%
fit_resamples(
resamples = iris_fold,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
错误
Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`
internal: Error: In metric: `roc_auc`
internal: Error: In metric: `roc_auc`"
# Resampling results
# 10-fold cross-validation
# A tibble: 10 x 5
splits id .metrics .notes .predictions
<list> <chr> <list> <list> <list>
1 <split [102/12]> Fold01 <NULL> <tibble [1 x 1]> <NULL>
2 <split [102/12]> Fold02 <NULL> <tibble [1 x 1]> <NULL>
3 <split [102/12]> Fold03 <NULL> <tibble [1 x 1]> <NULL>
4 <split [102/12]> Fold04 <NULL> <tibble [1 x 1]> <NULL>
5 <split [103/11]> Fold05 <NULL> <tibble [1 x 1]> <NULL>
6 <split [103/11]> Fold06 <NULL> <tibble [1 x 1]> <NULL>
7 <split [103/11]> Fold07 <NULL> <tibble [1 x 1]> <NULL>
8 <split [103/11]> Fold08 <NULL> <tibble [1 x 1]> <NULL>
9 <split [103/11]> Fold09 <NULL> <tibble [1 x 1]> <NULL>
10 <split [103/11]> Fold10 <NULL> <tibble [1 x 1]> <NULL>
(更新)
即使不使用 Parallel
计算
,RF
也会出现错误
我不认为你遇到的这个问题是因为当前 Windows in tune 上的并行处理错误,而是因为你正在尝试适应 multiclass 使用 二元 分类模型的分类问题。
如果您更改此示例,使其只是二元分类(例如,setosa 与其他),那么它应该可以工作:
library(tidymodels)
library(themis)
#> Registered S3 methods overwritten by 'themis':
#> method from
#> bake.step_downsample recipes
#> bake.step_upsample recipes
#> prep.step_downsample recipes
#> prep.step_upsample recipes
#> tidy.step_downsample recipes
#> tidy.step_upsample recipes
#>
#> Attaching package: 'themis'
#> The following objects are masked from 'package:recipes':
#>
#> step_downsample, step_upsample, tunable.step_downsample,
#> tunable.step_upsample
# Data split
set.seed(999)
iris_split <- iris %>%
mutate(Species = case_when(Species == "setosa" ~ "setosa",
TRUE ~ "other")) %>%
initial_split(strata = Species)
iris_train <- training(iris_split)
iris_test <- testing(iris_split)
# Cross Validation
set.seed(345)
iris_fold <- vfold_cv(iris_train)
iris_fold
#> # 10-fold cross-validation
#> # A tibble: 10 x 2
#> splits id
#> <list> <chr>
#> 1 <split [101/12]> Fold01
#> 2 <split [101/12]> Fold02
#> 3 <split [101/12]> Fold03
#> 4 <split [102/11]> Fold04
#> 5 <split [102/11]> Fold05
#> 6 <split [102/11]> Fold06
#> 7 <split [102/11]> Fold07
#> 8 <split [102/11]> Fold08
#> 9 <split [102/11]> Fold09
#> 10 <split [102/11]> Fold10
# recipe
iris_rec <- recipe(Species ~ ., data = iris_train) %>%
#make sure the training set has equal numbers of target variale (not needed for iris dataset)
step_downsample(Species) %>%
#normalise the data
step_center(-Species) %>%
step_scale(-Species) %>%
step_BoxCox(-Species)
# Workflow
iris_wf <- workflow() %>%
add_recipe(iris_rec)
# logistic
glm_spec <- logistic_reg() %>%
set_engine("glm")
# to do parallel processing
doParallel::registerDoParallel()
# adding parameters to workflow
iris_wf %>%
add_model(glm_spec) %>%
fit_resamples(
resamples = iris_fold,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
#> Warning: This tuning result has notes. Example notes on model fitting include:
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> # Resampling results
#> # 10-fold cross-validation
#> # A tibble: 10 x 5
#> splits id .metrics .notes .predictions
#> <list> <chr> <list> <list> <list>
#> 1 <split [101/12]> Fold01 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 2 <split [101/12]> Fold02 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 3 <split [101/12]> Fold03 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 4 <split [102/11]> Fold04 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 5 <split [102/11]> Fold05 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 6 <split [102/11]> Fold06 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 7 <split [102/11]> Fold07 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 8 <split [102/11]> Fold08 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 9 <split [102/11]> Fold09 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 10 <split [102/11]> Fold10 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
由 reprex package (v0.3.0.9001)
于 2020-10-22 创建
算法不收敛的错误是因为重采样时示例数据集的大小。
我在 Linux 机器上遇到了同样的问题,但通过删除 NA 或它们的插补解决了它。所以,似乎是 NA 的存在导致了模型拟合失败! :)
我是 R 的新手,正在尝试学习 tidymodels。
我得到这个 错误 只有 glm
用于 iris dataset
如果我 change dataset
& 食谱那么 glm
是运行 很好,但后来我开始在 kknn
中收到此错误。
Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`
我检查了 .notes
,它看起来是这样的:
.notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1 .notes
<chr>
internal: Error: In metric: `roc_auc`
A tibble: 1 × 1
Warning message: All models failed in [fit_resamples()]. See the `.notes` column
正如上面post中所建议的那样,我尝试从 github 升级 parsnip
和 tune
软件包,但在安装 tune package
时出现错误:Warning in install.packages : package ‘tune’ is not available for this version of R
我不确定哪里出了问题,如果有人能提供帮助,我将不胜感激!!!
版本信息:
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.2 v purrr 0.3.4
v tibble 3.0.4 v dplyr 1.0.2
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.4.0 v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
-- Attaching packages -------------------------------------- tidymodels 0.1.1 --
v broom 0.7.2 v recipes 0.1.14
v dials 0.0.9 v rsample 0.0.8
v infer 0.5.3 v tune 0.1.1
v modeldata 0.0.2 v workflows 0.2.1
v parsnip 0.1.3.9000 v yardstick 0.0.7
-- Conflicts ----------------------------------------- tidymodels_conflicts() --
x scales::discard() masks purrr::discard()
x dplyr::filter() masks stats::filter()
x recipes::fixed() masks stringr::fixed()
x dplyr::lag() masks stats::lag()
x yardstick::spec() masks readr::spec()
x recipes::step() masks stats::step()
Windows 7
platform x86_64-w64-mingw32
arch x86_64
os mingw32
system x86_64, mingw32
status
major 4
minor 0.3
year 2020
month 10
day 10
svn rev 79318
language R
version.string R version 4.0.3 (2020-10-10)
代码:
library(tidyverse)
library(tidymodels)
library(themis)
iris
# Data split
set.seed(999)
iris_split <- initial_split(iris, strata = Species)
iris_train <- training(iris_split)
iris_test <- testing(iris_split)
# Cross Validation
set.seed(345)
iris_fold <- vfold_cv(iris_train)
print(iris_fold)
# recipe
iris_rec <- recipe(Species ~., data = iris_train) %>%
#make sure the training set has equal numbers of target variale (not needed for iris dataset)
step_downsample(Species) %>%
#normalise the data
step_center(-Species) %>%
step_scale(-Species) %>%
step_BoxCox(-Species) %>%
#function to apply the recipe to the data
prep()
# Workflow
iris_wf <- workflow() %>%
add_recipe(iris_rec)
# logistic
glm_spec <- logistic_reg() %>%
set_engine("glm")
# to do parallel processing
doParallel::registerDoParallel()
# adding parameters to workflow
glm_rs <- iris_wf %>%
add_model(glm_spec) %>%
fit_resamples(
resamples = iris_fold,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
错误
Warning message:
"All models failed in [fit_resamples()]. See the `.notes` column."
Warning message:
"This tuning result has notes. Example notes on model fitting include:
internal: Error: In metric: `roc_auc`
internal: Error: In metric: `roc_auc`
internal: Error: In metric: `roc_auc`"
# Resampling results
# 10-fold cross-validation
# A tibble: 10 x 5
splits id .metrics .notes .predictions
<list> <chr> <list> <list> <list>
1 <split [102/12]> Fold01 <NULL> <tibble [1 x 1]> <NULL>
2 <split [102/12]> Fold02 <NULL> <tibble [1 x 1]> <NULL>
3 <split [102/12]> Fold03 <NULL> <tibble [1 x 1]> <NULL>
4 <split [102/12]> Fold04 <NULL> <tibble [1 x 1]> <NULL>
5 <split [103/11]> Fold05 <NULL> <tibble [1 x 1]> <NULL>
6 <split [103/11]> Fold06 <NULL> <tibble [1 x 1]> <NULL>
7 <split [103/11]> Fold07 <NULL> <tibble [1 x 1]> <NULL>
8 <split [103/11]> Fold08 <NULL> <tibble [1 x 1]> <NULL>
9 <split [103/11]> Fold09 <NULL> <tibble [1 x 1]> <NULL>
10 <split [103/11]> Fold10 <NULL> <tibble [1 x 1]> <NULL>
(更新)
即使不使用 Parallel
计算
RF
也会出现错误
我不认为你遇到的这个问题是因为当前 Windows in tune 上的并行处理错误,而是因为你正在尝试适应 multiclass 使用 二元 分类模型的分类问题。
如果您更改此示例,使其只是二元分类(例如,setosa 与其他),那么它应该可以工作:
library(tidymodels)
library(themis)
#> Registered S3 methods overwritten by 'themis':
#> method from
#> bake.step_downsample recipes
#> bake.step_upsample recipes
#> prep.step_downsample recipes
#> prep.step_upsample recipes
#> tidy.step_downsample recipes
#> tidy.step_upsample recipes
#>
#> Attaching package: 'themis'
#> The following objects are masked from 'package:recipes':
#>
#> step_downsample, step_upsample, tunable.step_downsample,
#> tunable.step_upsample
# Data split
set.seed(999)
iris_split <- iris %>%
mutate(Species = case_when(Species == "setosa" ~ "setosa",
TRUE ~ "other")) %>%
initial_split(strata = Species)
iris_train <- training(iris_split)
iris_test <- testing(iris_split)
# Cross Validation
set.seed(345)
iris_fold <- vfold_cv(iris_train)
iris_fold
#> # 10-fold cross-validation
#> # A tibble: 10 x 2
#> splits id
#> <list> <chr>
#> 1 <split [101/12]> Fold01
#> 2 <split [101/12]> Fold02
#> 3 <split [101/12]> Fold03
#> 4 <split [102/11]> Fold04
#> 5 <split [102/11]> Fold05
#> 6 <split [102/11]> Fold06
#> 7 <split [102/11]> Fold07
#> 8 <split [102/11]> Fold08
#> 9 <split [102/11]> Fold09
#> 10 <split [102/11]> Fold10
# recipe
iris_rec <- recipe(Species ~ ., data = iris_train) %>%
#make sure the training set has equal numbers of target variale (not needed for iris dataset)
step_downsample(Species) %>%
#normalise the data
step_center(-Species) %>%
step_scale(-Species) %>%
step_BoxCox(-Species)
# Workflow
iris_wf <- workflow() %>%
add_recipe(iris_rec)
# logistic
glm_spec <- logistic_reg() %>%
set_engine("glm")
# to do parallel processing
doParallel::registerDoParallel()
# adding parameters to workflow
iris_wf %>%
add_model(glm_spec) %>%
fit_resamples(
resamples = iris_fold,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
#> Warning: This tuning result has notes. Example notes on model fitting include:
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> preprocessor 1/1, model 1/1: glm.fit: algorithm did not converge, glm.fit: fitted probabilities numerically 0 or 1 occurred
#> # Resampling results
#> # 10-fold cross-validation
#> # A tibble: 10 x 5
#> splits id .metrics .notes .predictions
#> <list> <chr> <list> <list> <list>
#> 1 <split [101/12]> Fold01 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 2 <split [101/12]> Fold02 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 3 <split [101/12]> Fold03 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [12 × 6]>
#> 4 <split [102/11]> Fold04 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 5 <split [102/11]> Fold05 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 6 <split [102/11]> Fold06 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 7 <split [102/11]> Fold07 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 8 <split [102/11]> Fold08 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 9 <split [102/11]> Fold09 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
#> 10 <split [102/11]> Fold10 <tibble [4 × 4]> <tibble [1 × 1]> <tibble [11 × 6]>
由 reprex package (v0.3.0.9001)
于 2020-10-22 创建算法不收敛的错误是因为重采样时示例数据集的大小。
我在 Linux 机器上遇到了同样的问题,但通过删除 NA 或它们的插补解决了它。所以,似乎是 NA 的存在导致了模型拟合失败! :)