如何在 r 中使用 xgboost 和 decision_tree 绘制树?
How to plot a tree with xgboost and decision_tree in r?
我试图在使用 tidymodels 工作流程后在 R 中绘制决策树,但我无法找到使用 and/or 好的模型的好函数。
在这样的代码之后,你如何编写一个情节?
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(data_recipe) %>%
add_model(xgboost_spec)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = data_folds, grid = 10)
final_xgboost <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, "roc_auc"))
xgboost_results <- final_xgboost %>%
fit_resamples(
resamples = data_folds,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
还是在决策树代码之后?
tree_spec <- decision_tree(
cost_complexity = tune(),
tree_depth = tune(),
min_n = tune()) %>%
set_engine("rpart") %>%
set_mode("classification")
tree_workflow <-
workflow() %>%
add_recipe(data_recipe) %>%
add_model(tree_spec)
tree_grid <- grid_regular(cost_complexity(),
tree_depth(),
min_n(), levels = 4)
tree_tune <- tree_workflow %>%
tune_grid(
resamples = data_folds,
grid = tree_grid,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity)
)
final_tree <- tree_workflow %>%
finalize_workflow(select_best(tree_tune, "roc_auc"))
tree_results <- final_tree %>%
fit_resamples(
resamples = data_folds,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
可能吗?或者我应该在last_fit()
之后使用模型?
谢谢!
我认为绘制 xgboost 模型没有多大意义,因为它是增强树(很多很多树),但您可以绘制单个决策树。
关键是大多数用于可视化树结果的软件包都要求您 repair the call object。
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
data(penguins)
penguins <- na.omit(penguins)
cart_spec <-
decision_tree() %>%
set_engine("rpart") %>%
set_mode("classification")
cart_fit <-
cart_spec %>%
fit(sex ~ species + bill_length_mm + body_mass_g, data = penguins)
cart_fit <- repair_call(cart_fit, data = penguins)
library(rattle)
#> Loading required package: bitops
#> Rattle: A free graphical interface for data science with R.
#> Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
#> Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(cart_fit$fit)
由 reprex package (v2.0.0)
于 2021-08-07 创建
rattle package isn't the only thing out there; ggparty 是另一个不错的选择。
这确实意味着您必须使用防风草模型和预处理器,而不是工作流程。可以看到一个tutorial of how to tune a parsnip plus preprocessor here.
我试图在使用 tidymodels 工作流程后在 R 中绘制决策树,但我无法找到使用 and/or 好的模型的好函数。 在这样的代码之后,你如何编写一个情节?
xgboost_spec <-
boost_tree(trees = tune(), min_n = tune(), tree_depth = tune(), learn_rate = tune(),
loss_reduction = tune(), sample_size = tune()) %>%
set_mode("classification") %>%
set_engine("xgboost")
xgboost_workflow <-
workflow() %>%
add_recipe(data_recipe) %>%
add_model(xgboost_spec)
xgboost_tune <-
tune_grid(xgboost_workflow, resamples = data_folds, grid = 10)
final_xgboost <- xgboost_workflow %>%
finalize_workflow(select_best(xgboost_tune, "roc_auc"))
xgboost_results <- final_xgboost %>%
fit_resamples(
resamples = data_folds,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
还是在决策树代码之后?
tree_spec <- decision_tree(
cost_complexity = tune(),
tree_depth = tune(),
min_n = tune()) %>%
set_engine("rpart") %>%
set_mode("classification")
tree_workflow <-
workflow() %>%
add_recipe(data_recipe) %>%
add_model(tree_spec)
tree_grid <- grid_regular(cost_complexity(),
tree_depth(),
min_n(), levels = 4)
tree_tune <- tree_workflow %>%
tune_grid(
resamples = data_folds,
grid = tree_grid,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity)
)
final_tree <- tree_workflow %>%
finalize_workflow(select_best(tree_tune, "roc_auc"))
tree_results <- final_tree %>%
fit_resamples(
resamples = data_folds,
metrics = metric_set(roc_auc, accuracy, sensitivity, specificity),
control = control_resamples(save_pred = TRUE)
)
可能吗?或者我应该在last_fit()
之后使用模型?
谢谢!
我认为绘制 xgboost 模型没有多大意义,因为它是增强树(很多很多树),但您可以绘制单个决策树。
关键是大多数用于可视化树结果的软件包都要求您 repair the call object。
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
data(penguins)
penguins <- na.omit(penguins)
cart_spec <-
decision_tree() %>%
set_engine("rpart") %>%
set_mode("classification")
cart_fit <-
cart_spec %>%
fit(sex ~ species + bill_length_mm + body_mass_g, data = penguins)
cart_fit <- repair_call(cart_fit, data = penguins)
library(rattle)
#> Loading required package: bitops
#> Rattle: A free graphical interface for data science with R.
#> Version 5.4.0 Copyright (c) 2006-2020 Togaware Pty Ltd.
#> Type 'rattle()' to shake, rattle, and roll your data.
fancyRpartPlot(cart_fit$fit)
由 reprex package (v2.0.0)
于 2021-08-07 创建rattle package isn't the only thing out there; ggparty 是另一个不错的选择。
这确实意味着您必须使用防风草模型和预处理器,而不是工作流程。可以看到一个tutorial of how to tune a parsnip plus preprocessor here.