如何在整洁的模型中指定 PLS 模型
How do I specify a PLS model in tidy models
我对学习 tidymodels 很感兴趣,并尝试将它应用到 Appied Predictive Modeling 中的一些练习中。这是练习 6.2。我想为渗透率数据集指定偏最小二乘 (PLS) 模型。
我有以下代码可以一直运行到调谐网格。我根据 Julia Silge 的分析建模 - 使用 tidymodels 和 The Office found here.
的套索回归
您可以在下面看到我的脚本和 tune_grid 错误消息。
library(tidymodels)
library(tidyverse)
library(skimr)
library(plsmod)
library(caret)
library(AppliedPredictiveModeling)
data(permeability)
dim(fingerprints)
fingerprints <- fingerprints[, -nearZeroVar(fingerprints)]
dim(fingerprints)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
perm_rec<- recipe(permeability ~ ., data=perm_train) %>%
step_center(all_numeric(),-all_outcomes()) %>%
step_scale(all_numeric(),-all_outcomes())
perm_prep <- perm_rec %>%
prep()
perm_prep
pls_spec <- pls(num_comp = 4) %>%
set_mode("regression") %>%
set_engine("mixOmics")
wf <- workflow() %>%
add_recipe(perm_prep)
pls_fit <- wf %>%
add_model(pls_spec) %>%
fit(data=perm_train)
pls_fit %>%
pull_workflow_fit() %>%
tidy()
set.seed(123)
perm_folds <- vfold_cv(perm_train, v=10)
pls_tune_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- expand.grid(num_comp = seq(from = 1, to = 20, by = 1))
doParallel::registerDoParallel()
set.seed(4763)
pls_grid <- tune_grid(
wf %>% add_model(pls_tune_spec),
resamples = perm_folds,
grid = comp_grid
)
此时我收到以下错误:
所有模型都在 tune_grid() 中失败。请参阅 .notes
列。
两个问题:
- 为什么我的调谐网格出现故障,我该如何解决?
- 如何看待
.note
列。
我猜您可能使用的是 Windows 计算机,因为我们目前在 Windows 上并行处理的 CRAN 版本 tune 中存在一个错误。试试:
- 在没有并行处理的情况下顺序训练,或者
- 正在通过
devtools::install_github("tidymodels/tune")
安装已修复此错误的 tune 开发版本
您应该会看到如下结果:
library(tidymodels)
library(plsmod)
library(AppliedPredictiveModeling)
data(permeability)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
set.seed(123)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
set.seed(234)
perm_folds <- vfold_cv(perm_train, v=10)
perm_rec <- recipe(permeability ~ ., data = perm_train) %>%
step_nzv(all_predictors()) %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes())
pls_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- tibble(num_comp = seq(from = 1, to = 20, by = 5))
doParallel::registerDoParallel()
workflow() %>%
add_recipe(perm_rec) %>%
add_model(pls_spec) %>%
tune_grid(
resamples = perm_folds,
grid = comp_grid
)
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#>
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#>
#> data_frame
#> The following object is masked from 'package:dplyr':
#>
#> data_frame
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: lattice
#>
#> Loaded mixOmics 6.12.2
#> Thank you for using mixOmics!
#> Tutorials: http://mixomics.org
#> Bookdown vignette: https://mixomicsteam.github.io/Bookdown
#> Questions, issues: Follow the prompts at http://mixomics.org/contact-us
#> Cite us: citation('mixOmics')
#>
#> Attaching package: 'mixOmics'
#> The following object is masked from 'package:plsmod':
#>
#> pls
#> The following object is masked from 'package:tune':
#>
#> tune
#> The following object is masked from 'package:purrr':
#>
#> map
#> # Tuning results
#> # 10-fold cross-validation
#> # A tibble: 10 x 4
#> splits id .metrics .notes
#> <list> <chr> <list> <list>
#> 1 <split [111/13]> Fold01 <tibble [8 × 5]> <tibble [0 × 1]>
#> 2 <split [111/13]> Fold02 <tibble [8 × 5]> <tibble [0 × 1]>
#> 3 <split [111/13]> Fold03 <tibble [8 × 5]> <tibble [0 × 1]>
#> 4 <split [111/13]> Fold04 <tibble [8 × 5]> <tibble [0 × 1]>
#> 5 <split [112/12]> Fold05 <tibble [8 × 5]> <tibble [0 × 1]>
#> 6 <split [112/12]> Fold06 <tibble [8 × 5]> <tibble [0 × 1]>
#> 7 <split [112/12]> Fold07 <tibble [8 × 5]> <tibble [0 × 1]>
#> 8 <split [112/12]> Fold08 <tibble [8 × 5]> <tibble [0 × 1]>
#> 9 <split [112/12]> Fold09 <tibble [8 × 5]> <tibble [0 × 1]>
#> 10 <split [112/12]> Fold10 <tibble [8 × 5]> <tibble [0 × 1]>
由 reprex package (v0.3.0.9001)
于 2020-11-12 创建
如果您有一个像 pls_grid
这样带有注释的对象,您应该可以通过 pls_grid$.notes
进入该专栏,或者通过 pls_grid$.notes[[1]]
查看第一个示例。
我对学习 tidymodels 很感兴趣,并尝试将它应用到 Appied Predictive Modeling 中的一些练习中。这是练习 6.2。我想为渗透率数据集指定偏最小二乘 (PLS) 模型。
我有以下代码可以一直运行到调谐网格。我根据 Julia Silge 的分析建模 - 使用 tidymodels 和 The Office found here.
的套索回归您可以在下面看到我的脚本和 tune_grid 错误消息。
library(tidymodels)
library(tidyverse)
library(skimr)
library(plsmod)
library(caret)
library(AppliedPredictiveModeling)
data(permeability)
dim(fingerprints)
fingerprints <- fingerprints[, -nearZeroVar(fingerprints)]
dim(fingerprints)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
perm_rec<- recipe(permeability ~ ., data=perm_train) %>%
step_center(all_numeric(),-all_outcomes()) %>%
step_scale(all_numeric(),-all_outcomes())
perm_prep <- perm_rec %>%
prep()
perm_prep
pls_spec <- pls(num_comp = 4) %>%
set_mode("regression") %>%
set_engine("mixOmics")
wf <- workflow() %>%
add_recipe(perm_prep)
pls_fit <- wf %>%
add_model(pls_spec) %>%
fit(data=perm_train)
pls_fit %>%
pull_workflow_fit() %>%
tidy()
set.seed(123)
perm_folds <- vfold_cv(perm_train, v=10)
pls_tune_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- expand.grid(num_comp = seq(from = 1, to = 20, by = 1))
doParallel::registerDoParallel()
set.seed(4763)
pls_grid <- tune_grid(
wf %>% add_model(pls_tune_spec),
resamples = perm_folds,
grid = comp_grid
)
此时我收到以下错误:
所有模型都在 tune_grid() 中失败。请参阅 .notes
列。
两个问题:
- 为什么我的调谐网格出现故障,我该如何解决?
- 如何看待
.note
列。
我猜您可能使用的是 Windows 计算机,因为我们目前在 Windows 上并行处理的 CRAN 版本 tune 中存在一个错误。试试:
- 在没有并行处理的情况下顺序训练,或者
- 正在通过
devtools::install_github("tidymodels/tune")
安装已修复此错误的 tune 开发版本
您应该会看到如下结果:
library(tidymodels)
library(plsmod)
library(AppliedPredictiveModeling)
data(permeability)
df <- cbind(fingerprints, permeability)
df <- as_tibble(df)
set.seed(123)
perm_split <- initial_split(df)
perm_train <- training(perm_split)
perm_test <- testing(perm_split)
set.seed(234)
perm_folds <- vfold_cv(perm_train, v=10)
perm_rec <- recipe(permeability ~ ., data = perm_train) %>%
step_nzv(all_predictors()) %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes())
pls_spec <- pls(num_comp = tune()) %>%
set_mode("regression") %>%
set_engine("mixOmics")
comp_grid <- tibble(num_comp = seq(from = 1, to = 20, by = 5))
doParallel::registerDoParallel()
workflow() %>%
add_recipe(perm_rec) %>%
add_model(pls_spec) %>%
tune_grid(
resamples = perm_folds,
grid = comp_grid
)
#>
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#>
#> %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#> flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#> splice
#>
#> Attaching package: 'vctrs'
#> The following object is masked from 'package:tibble':
#>
#> data_frame
#> The following object is masked from 'package:dplyr':
#>
#> data_frame
#> Loading required package: MASS
#>
#> Attaching package: 'MASS'
#> The following object is masked from 'package:dplyr':
#>
#> select
#> Loading required package: lattice
#>
#> Loaded mixOmics 6.12.2
#> Thank you for using mixOmics!
#> Tutorials: http://mixomics.org
#> Bookdown vignette: https://mixomicsteam.github.io/Bookdown
#> Questions, issues: Follow the prompts at http://mixomics.org/contact-us
#> Cite us: citation('mixOmics')
#>
#> Attaching package: 'mixOmics'
#> The following object is masked from 'package:plsmod':
#>
#> pls
#> The following object is masked from 'package:tune':
#>
#> tune
#> The following object is masked from 'package:purrr':
#>
#> map
#> # Tuning results
#> # 10-fold cross-validation
#> # A tibble: 10 x 4
#> splits id .metrics .notes
#> <list> <chr> <list> <list>
#> 1 <split [111/13]> Fold01 <tibble [8 × 5]> <tibble [0 × 1]>
#> 2 <split [111/13]> Fold02 <tibble [8 × 5]> <tibble [0 × 1]>
#> 3 <split [111/13]> Fold03 <tibble [8 × 5]> <tibble [0 × 1]>
#> 4 <split [111/13]> Fold04 <tibble [8 × 5]> <tibble [0 × 1]>
#> 5 <split [112/12]> Fold05 <tibble [8 × 5]> <tibble [0 × 1]>
#> 6 <split [112/12]> Fold06 <tibble [8 × 5]> <tibble [0 × 1]>
#> 7 <split [112/12]> Fold07 <tibble [8 × 5]> <tibble [0 × 1]>
#> 8 <split [112/12]> Fold08 <tibble [8 × 5]> <tibble [0 × 1]>
#> 9 <split [112/12]> Fold09 <tibble [8 × 5]> <tibble [0 × 1]>
#> 10 <split [112/12]> Fold10 <tibble [8 × 5]> <tibble [0 × 1]>
由 reprex package (v0.3.0.9001)
于 2020-11-12 创建如果您有一个像 pls_grid
这样带有注释的对象,您应该可以通过 pls_grid$.notes
进入该专栏,或者通过 pls_grid$.notes[[1]]
查看第一个示例。