绘制回测 Workflow_Set 数据
Plotting Backtested Workflow_Set data
我正在尝试查看此模型相对于之前的实际收盘价的表现。我使用的是 workflow_set 模型,提取预测没有问题。我在下面提供了一个可重现的例子。我希望能够绘制实际情况,以及经过回测的趋势线和预测。
tickers <- "TSLA"
first.date <- Sys.Date() - 3000
last.date <- Sys.Date()
freq.data <- "daily"
stocks <- BatchGetSymbols::BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
freq.data = freq.data ,
do.cache = FALSE,
thresh.bad.data = 0)
stocks <- stocks %>% as.data.frame() %>% select(Date = df.tickers.ref.date, Close = df.tickers.price.close)
time_val_split <-
stocks %>%
sliding_period(
Date,
period = "day",
every = 52)
data_extended <- stocks %>%
future_frame(
.length_out = 60,
.bind_data = TRUE
) %>%
ungroup()
train_tbl <- data_extended %>% drop_na()
future_tbl <- data_extended %>% filter(is.na(Close))
base_rec <- recipe(Close ~ Date, train_tbl) %>%
step_timeseries_signature(Date) %>%
step_rm(matches("(.xts$)|(.iso$)|(.lbl)|(hour)|(minute)|(second)|(am.pm)|(mweek)|(qday)|(week2)|(week3)|(week4)")) %>%
step_dummy(all_nominal(), one_hot = TRUE) %>%
step_normalize(all_numeric_predictors()) %>%
step_scale(all_numeric_predictors()) %>%
step_rm(Date)
cubist_spec <-
cubist_rules(committees = tune(),
neighbors = tune()) %>%
set_engine("Cubist")
rf_spec <-
rand_forest(mtry = tune(),
min_n = tune(),
trees = 1000) %>%
set_engine("ranger") %>%
set_mode("regression")
base <-
workflow_set(
preproc = list(base_date = base_rec),
models = list(
cubist_base = cubist_spec,
cart_base = cart_spec
))
all_workflows <-
bind_rows(
base
)
cores <- parallel::detectCores(logical = FALSE)
clusters <- parallel::makePSOCKcluster(cores)
doParallel::registerDoParallel(clusters)
wflwset_tune_results <-
all_workflows %>%
workflow_map(
fn = "tune_race_anova",
seed = 1,
resamples = time_val_split,
grid = 2,
verbose = TRUE)
doParallel::stopImplicitCluster()
best_for_each_mod <- wflwset_tune_results %>%
rank_results(select_best = TRUE) %>%
filter(.metric == "rmse") %>%
select(wflow_id, .config, mean, preprocessor, model)
b_mod <- best_for_each_mod %>%
arrange(mean) %>%
head(1) %>%
select(wflow_id) %>% as.character()
best_param <- wflwset_tune_results %>% extract_workflow_set_result(id = b_mod) %>% select_best(metric = "rmse")
# Finalize model with best param
best_finalized <- wflwset_tune_results %>%
extract_workflow(b_mod) %>%
finalize_workflow(best_param) %>%
fit(train_tbl)
此时模型已经训练完毕,但我似乎无法弄清楚如何 运行 它与先前的实际情况相比较。我的目标是将支持的结果与下面的预测结合起来。
prediction_tbl <- best_finalized %>%
predict(new_data = future_tbl) %>%
bind_cols(future_tbl) %>%
select(.pred, Date) %>%
mutate(type = "prediction") %>%
rename(Close = .pred)
train_tbl %>% mutate(type = "actual") %>% rbind(prediction_tbl) %>%
ggplot(aes(Date, Close, color = type)) +
geom_line(size = 2)
根据您的评论,我建议在将 future_tbl
绑定到您的预测后使用 pivot_longer()
。这使您可以将所有内容都放在一个管道中,而不必创建两个单独的数据框然后将它们绑定在一起。这是一个根据 mpg 绘制预测值和实际值的示例。希望这对您有所帮助!
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
# split data
set.seed(123)
mtcars <- as_tibble(mtcars)
cars_split <- initial_split(mtcars)
cars_train <- training(cars_split)
cars_test <- testing(cars_split)
# plot truth & prediction against another variable
workflow() %>%
add_model(linear_reg() %>% set_engine("lm")) %>%
add_recipe(recipe(qsec ~ ., data = cars_train)) %>%
fit(cars_train) %>%
predict(cars_test) %>%
bind_cols(cars_test) %>%
pivot_longer(cols = c(.pred, qsec),
names_to = "comparison",
values_to = "value") %>%
ggplot(aes(x = mpg,
y = value,
color = comparison)) +
geom_point(alpha = 0.75)
由 reprex package (v2.0.1)
于 2021-11-18 创建
我正在尝试查看此模型相对于之前的实际收盘价的表现。我使用的是 workflow_set 模型,提取预测没有问题。我在下面提供了一个可重现的例子。我希望能够绘制实际情况,以及经过回测的趋势线和预测。
tickers <- "TSLA"
first.date <- Sys.Date() - 3000
last.date <- Sys.Date()
freq.data <- "daily"
stocks <- BatchGetSymbols::BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
freq.data = freq.data ,
do.cache = FALSE,
thresh.bad.data = 0)
stocks <- stocks %>% as.data.frame() %>% select(Date = df.tickers.ref.date, Close = df.tickers.price.close)
time_val_split <-
stocks %>%
sliding_period(
Date,
period = "day",
every = 52)
data_extended <- stocks %>%
future_frame(
.length_out = 60,
.bind_data = TRUE
) %>%
ungroup()
train_tbl <- data_extended %>% drop_na()
future_tbl <- data_extended %>% filter(is.na(Close))
base_rec <- recipe(Close ~ Date, train_tbl) %>%
step_timeseries_signature(Date) %>%
step_rm(matches("(.xts$)|(.iso$)|(.lbl)|(hour)|(minute)|(second)|(am.pm)|(mweek)|(qday)|(week2)|(week3)|(week4)")) %>%
step_dummy(all_nominal(), one_hot = TRUE) %>%
step_normalize(all_numeric_predictors()) %>%
step_scale(all_numeric_predictors()) %>%
step_rm(Date)
cubist_spec <-
cubist_rules(committees = tune(),
neighbors = tune()) %>%
set_engine("Cubist")
rf_spec <-
rand_forest(mtry = tune(),
min_n = tune(),
trees = 1000) %>%
set_engine("ranger") %>%
set_mode("regression")
base <-
workflow_set(
preproc = list(base_date = base_rec),
models = list(
cubist_base = cubist_spec,
cart_base = cart_spec
))
all_workflows <-
bind_rows(
base
)
cores <- parallel::detectCores(logical = FALSE)
clusters <- parallel::makePSOCKcluster(cores)
doParallel::registerDoParallel(clusters)
wflwset_tune_results <-
all_workflows %>%
workflow_map(
fn = "tune_race_anova",
seed = 1,
resamples = time_val_split,
grid = 2,
verbose = TRUE)
doParallel::stopImplicitCluster()
best_for_each_mod <- wflwset_tune_results %>%
rank_results(select_best = TRUE) %>%
filter(.metric == "rmse") %>%
select(wflow_id, .config, mean, preprocessor, model)
b_mod <- best_for_each_mod %>%
arrange(mean) %>%
head(1) %>%
select(wflow_id) %>% as.character()
best_param <- wflwset_tune_results %>% extract_workflow_set_result(id = b_mod) %>% select_best(metric = "rmse")
# Finalize model with best param
best_finalized <- wflwset_tune_results %>%
extract_workflow(b_mod) %>%
finalize_workflow(best_param) %>%
fit(train_tbl)
此时模型已经训练完毕,但我似乎无法弄清楚如何 运行 它与先前的实际情况相比较。我的目标是将支持的结果与下面的预测结合起来。
prediction_tbl <- best_finalized %>%
predict(new_data = future_tbl) %>%
bind_cols(future_tbl) %>%
select(.pred, Date) %>%
mutate(type = "prediction") %>%
rename(Close = .pred)
train_tbl %>% mutate(type = "actual") %>% rbind(prediction_tbl) %>%
ggplot(aes(Date, Close, color = type)) +
geom_line(size = 2)
根据您的评论,我建议在将 future_tbl
绑定到您的预测后使用 pivot_longer()
。这使您可以将所有内容都放在一个管道中,而不必创建两个单独的数据框然后将它们绑定在一起。这是一个根据 mpg 绘制预测值和实际值的示例。希望这对您有所帮助!
library(tidymodels)
#> Registered S3 method overwritten by 'tune':
#> method from
#> required_pkgs.model_spec parsnip
# split data
set.seed(123)
mtcars <- as_tibble(mtcars)
cars_split <- initial_split(mtcars)
cars_train <- training(cars_split)
cars_test <- testing(cars_split)
# plot truth & prediction against another variable
workflow() %>%
add_model(linear_reg() %>% set_engine("lm")) %>%
add_recipe(recipe(qsec ~ ., data = cars_train)) %>%
fit(cars_train) %>%
predict(cars_test) %>%
bind_cols(cars_test) %>%
pivot_longer(cols = c(.pred, qsec),
names_to = "comparison",
values_to = "value") %>%
ggplot(aes(x = mpg,
y = value,
color = comparison)) +
geom_point(alpha = 0.75)
由 reprex package (v2.0.1)
于 2021-11-18 创建