在 R 中,在配方中包含 step_pca 时工作流程会出错
In R, error in workflow fit when including step_pca in recipe
在 tidymodels 中,我想创建一个基于配方和模型规范的工作流。当我不包含 step_pca(); 时它会起作用;但是当我将 step_pca() 作为设置包括在内时,我得到了错误。请看 repex blow.
(如果我不使用 workflow() 就可以正常工作;但后来我失去了包括更新角色在内的功能)
x1 <- c(1, 6, 4, 2, 3, 4, 5, 7, 8, 2)
x2 <- c(1, 3, 4, 2, 3, 4, 5, 7, 8, 2)
id <- c(1:10)
y <- c(1, 4, 2, 5, 6, 2, 3, 6, 2, 4)
df1_train <- tibble(x1, x2, id, y)
# NA works with workflow
step_PCA_PREPROCESSING = NA
# Does not work with workflow
step_PCA_PREPROCESSING = 0.9
# My recipe
df1_train_recipe <- df1_train %>%
recipes::recipe(y ~ .) %>%
recipes::update_role(id, new_role = "id variable") %>%
recipes::step_center(recipes::all_predictors()) %>%
recipes::step_scale(recipes::all_predictors()) %>%
# Optional step_pca
{
if (!is.na(step_PCA_PREPROCESSING)) {
if (step_PCA_PREPROCESSING >= 1) {
recipes::step_pca(., recipes::all_predictors(), num_comp = step_PCA_PREPROCESSING)
} else if (step_PCA_PREPROCESSING < 1) {
recipes::step_pca(., recipes::all_predictors(), threshold = step_PCA_PREPROCESSING)
} else {
.
}
} else {
.
}
} %>%
recipes::prep()
# Model specifications
model_spec <- parsnip::linear_reg() %>%
parsnip::set_engine("glmnet")
# Create workflow (to know variable roles from recipes)
df1_workflow <- workflows::workflow() %>%
workflows::add_recipe(df1_train_recipe) %>%
workflows::add_model(model_spec)
# Fit model
mod <- parsnip::fit(df1_workflow, data = df1_train)
提前致谢
我认为最好的方法是使用 step_pca()
的能力将 num_comp
设置为零,这意味着没有 PCA 分解。这对于您的用例来说非常方便,因为 threshold
将覆盖 num_comp
.
Note: using this argument will override and reset any value given to num_comp
.
library(tidymodels)
x1 <- c(1, 6, 4, 2, 3, 4, 5, 7, 8, 2)
x2 <- c(1, 3, 4, 2, 3, 4, 5, 7, 8, 2)
id <- c(1:10)
y <- c(1, 4, 2, 5, 6, 2, 3, 6, 2, 4)
df1_train <- tibble(x1, x2, id, y)
turn_off_pca <- 0
turn_on_pca <- 1
rec1 <- recipe(y ~ ., data = df1_train) %>%
update_role(id, new_role = "id variable") %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_pca(all_predictors(), threshold = 0.9, num_comp = turn_off_pca)
rec2 <- recipe(y ~ ., data = df1_train) %>%
update_role(id, new_role = "id variable") %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_pca(all_predictors(), threshold = 0.9, num_comp = turn_on_pca)
lm_spec <- linear_reg() %>% set_engine("lm")
workflow() %>%
add_model(lm_spec) %>%
add_recipe(rec1) %>%
fit(df1_train)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> ● step_center()
#> ● step_scale()
#> ● step_pca()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#>
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#>
#> Coefficients:
#> (Intercept) x1 x2
#> 3.5000 0.4607 -0.3459
workflow() %>%
add_model(lm_spec) %>%
add_recipe(rec2) %>%
fit(df1_train)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> ● step_center()
#> ● step_scale()
#> ● step_pca()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#>
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#>
#> Coefficients:
#> (Intercept) PC1
#> 3.50000 0.08116
由 reprex package (v0.3.0.9001)
于 2020-12-06 创建
在 tidymodels 中,我想创建一个基于配方和模型规范的工作流。当我不包含 step_pca(); 时它会起作用;但是当我将 step_pca() 作为设置包括在内时,我得到了错误。请看 repex blow.
(如果我不使用 workflow() 就可以正常工作;但后来我失去了包括更新角色在内的功能)
x1 <- c(1, 6, 4, 2, 3, 4, 5, 7, 8, 2)
x2 <- c(1, 3, 4, 2, 3, 4, 5, 7, 8, 2)
id <- c(1:10)
y <- c(1, 4, 2, 5, 6, 2, 3, 6, 2, 4)
df1_train <- tibble(x1, x2, id, y)
# NA works with workflow
step_PCA_PREPROCESSING = NA
# Does not work with workflow
step_PCA_PREPROCESSING = 0.9
# My recipe
df1_train_recipe <- df1_train %>%
recipes::recipe(y ~ .) %>%
recipes::update_role(id, new_role = "id variable") %>%
recipes::step_center(recipes::all_predictors()) %>%
recipes::step_scale(recipes::all_predictors()) %>%
# Optional step_pca
{
if (!is.na(step_PCA_PREPROCESSING)) {
if (step_PCA_PREPROCESSING >= 1) {
recipes::step_pca(., recipes::all_predictors(), num_comp = step_PCA_PREPROCESSING)
} else if (step_PCA_PREPROCESSING < 1) {
recipes::step_pca(., recipes::all_predictors(), threshold = step_PCA_PREPROCESSING)
} else {
.
}
} else {
.
}
} %>%
recipes::prep()
# Model specifications
model_spec <- parsnip::linear_reg() %>%
parsnip::set_engine("glmnet")
# Create workflow (to know variable roles from recipes)
df1_workflow <- workflows::workflow() %>%
workflows::add_recipe(df1_train_recipe) %>%
workflows::add_model(model_spec)
# Fit model
mod <- parsnip::fit(df1_workflow, data = df1_train)
提前致谢
我认为最好的方法是使用 step_pca()
的能力将 num_comp
设置为零,这意味着没有 PCA 分解。这对于您的用例来说非常方便,因为 threshold
将覆盖 num_comp
.
Note: using this argument will override and reset any value given to
num_comp
.
library(tidymodels)
x1 <- c(1, 6, 4, 2, 3, 4, 5, 7, 8, 2)
x2 <- c(1, 3, 4, 2, 3, 4, 5, 7, 8, 2)
id <- c(1:10)
y <- c(1, 4, 2, 5, 6, 2, 3, 6, 2, 4)
df1_train <- tibble(x1, x2, id, y)
turn_off_pca <- 0
turn_on_pca <- 1
rec1 <- recipe(y ~ ., data = df1_train) %>%
update_role(id, new_role = "id variable") %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_pca(all_predictors(), threshold = 0.9, num_comp = turn_off_pca)
rec2 <- recipe(y ~ ., data = df1_train) %>%
update_role(id, new_role = "id variable") %>%
step_center(all_predictors()) %>%
step_scale(all_predictors()) %>%
step_pca(all_predictors(), threshold = 0.9, num_comp = turn_on_pca)
lm_spec <- linear_reg() %>% set_engine("lm")
workflow() %>%
add_model(lm_spec) %>%
add_recipe(rec1) %>%
fit(df1_train)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> ● step_center()
#> ● step_scale()
#> ● step_pca()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#>
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#>
#> Coefficients:
#> (Intercept) x1 x2
#> 3.5000 0.4607 -0.3459
workflow() %>%
add_model(lm_spec) %>%
add_recipe(rec2) %>%
fit(df1_train)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#>
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 3 Recipe Steps
#>
#> ● step_center()
#> ● step_scale()
#> ● step_pca()
#>
#> ── Model ───────────────────────────────────────────────────────────────────────
#>
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#>
#> Coefficients:
#> (Intercept) PC1
#> 3.50000 0.08116
由 reprex package (v0.3.0.9001)
于 2020-12-06 创建