如何使用分组数据的动态回归模型预测 arima?
How to forecast an arima with Dynamic regression models for grouped data?
我正在尝试同时使用分组数据对具有回归(ARIMA 错误回归)的 arima 进行预测。
我是整理数据的新手,所以...基本上,我正在使用多元 ts 和多元模型重现此示例 (https://cran.rstudio.com/web/packages/sweep/vignettes/SW01_Forecasting_Time_Series_Groups.html)。
这是一个可重现的例子:
library(tidyverse); library(tidyquant)
library(timetk); library(sweep)
library(forecast)
library(tsibble)
library(fpp3)
# using package data
bike_sales
# grouping data
monthly_qty_by_cat2 <- bike_sales %>%
mutate(order.month = as_date(as.yearmon(order.date))) %>%
group_by(category.secondary, order.month) %>%
summarise(total.qty = sum(quantity), price.m = mean(price))
# using nest
monthly_qty_by_cat2_nest <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
nest()
monthly_qty_by_cat2_nest
# Forecasting Workflow
# Step 1: Coerce to a ts object class
monthly_qty_by_cat2_ts <- monthly_qty_by_cat2_nest %>%
mutate(data.ts = map(.x = data,
.f = tk_ts,
select = -order.month, # take off date
start = 2011,
freq = 12))
# Step 2: modeling an ARIMA(y ~ x)
# make a function to map
modeloARIMA_reg <- function(y,x) {
result <- ARIMA(y ~ x)
return(list(result))}
# map the function
monthly_qty_by_cat2_fit <- monthly_qty_by_cat2_ts %>%
mutate(fit.arima = map(data.ts, modeloARIMA_reg))
monthly_qty_by_cat2_fit
这里我不知道地图是否在 y(从属)中使用了正确的变量,但我继续尝试预测并出现错误
# Step 3: Forecasting the model
monthly_qty_by_cat2_fcast <- monthly_qty_by_cat2_fit %>%
mutate(fcast.ets = map(fit.arima, forecast))
# this give me this error
# Erro: Problem with `mutate()` input `fcast.arima`.
# x argumento não-numérico para operador binário
# i Input `fcast.arima` is `map(fit.arima, forecast)`.
# i The error occured in group 1: category.secondary = "Cross Country Race".
# Run `rlang::last_error()` to see where the error occurred.
# Além disso: Warning message:
# In mean.default(x, na.rm = TRUE) :
# argument is not numeric or logical: returning NA
出现两个问题:
不知道如何输入每组自变量(x)的均值;
以及如何将此新数据声明为预测参数。
PS:不需要小标题或嵌套结果,我只需要点预测和 CI (total.qty lo.95 hi.95)
好吧,这段代码解决了我的问题。
这对每个时间序列(分组的 tsibble)进行一个预测,并使用这些时间序列的平均值作为预测中的未来数据
欢迎任何评论。
# MY FLOW
monthly_qty_by_cat2 <-
sweep::bike_sales %>%
mutate(order.month = yearmonth(order.date)) %>%
group_by(category.secondary, order.month) %>%
summarise(total.qty = sum(quantity), price.m = mean(price)) %>%
as_tsibble(index=order.month, key=category.secondary) # coerse in tsibble
# mean for the future
futuro <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
mutate(fut_x = mean(price.m)) %>%
do(price.m = head(.$fut_x,1))
# as.numeric
futuro$price.m <- as.numeric(futuro$price.m)
futuro
# make values in the future
future_x <- new_data(monthly_qty_by_cat2, 3) %>%
left_join(futuro, by = "category.secondary")
future_x
# model and forecast
fc <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
model(ARIMA(total.qty ~ price.m)) %>%
forecast(new_data=future_x) %>%
hilo(level = 95) %>%
unpack_hilo("95%")
fc
# Tidy the forecast
fc_tibble <- fc %>% as_tibble() %>% select(-total.qty)
fc_tibble
# the end
好吧,这解决了我的问题。
这对每个组时间序列进行一个预测,并使用这些组时间序列的平均值作为预测中的未来数据
欢迎任何评论。
我正在尝试同时使用分组数据对具有回归(ARIMA 错误回归)的 arima 进行预测。
我是整理数据的新手,所以...基本上,我正在使用多元 ts 和多元模型重现此示例 (https://cran.rstudio.com/web/packages/sweep/vignettes/SW01_Forecasting_Time_Series_Groups.html)。
这是一个可重现的例子:
library(tidyverse); library(tidyquant)
library(timetk); library(sweep)
library(forecast)
library(tsibble)
library(fpp3)
# using package data
bike_sales
# grouping data
monthly_qty_by_cat2 <- bike_sales %>%
mutate(order.month = as_date(as.yearmon(order.date))) %>%
group_by(category.secondary, order.month) %>%
summarise(total.qty = sum(quantity), price.m = mean(price))
# using nest
monthly_qty_by_cat2_nest <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
nest()
monthly_qty_by_cat2_nest
# Forecasting Workflow
# Step 1: Coerce to a ts object class
monthly_qty_by_cat2_ts <- monthly_qty_by_cat2_nest %>%
mutate(data.ts = map(.x = data,
.f = tk_ts,
select = -order.month, # take off date
start = 2011,
freq = 12))
# Step 2: modeling an ARIMA(y ~ x)
# make a function to map
modeloARIMA_reg <- function(y,x) {
result <- ARIMA(y ~ x)
return(list(result))}
# map the function
monthly_qty_by_cat2_fit <- monthly_qty_by_cat2_ts %>%
mutate(fit.arima = map(data.ts, modeloARIMA_reg))
monthly_qty_by_cat2_fit
这里我不知道地图是否在 y(从属)中使用了正确的变量,但我继续尝试预测并出现错误
# Step 3: Forecasting the model
monthly_qty_by_cat2_fcast <- monthly_qty_by_cat2_fit %>%
mutate(fcast.ets = map(fit.arima, forecast))
# this give me this error
# Erro: Problem with `mutate()` input `fcast.arima`.
# x argumento não-numérico para operador binário
# i Input `fcast.arima` is `map(fit.arima, forecast)`.
# i The error occured in group 1: category.secondary = "Cross Country Race".
# Run `rlang::last_error()` to see where the error occurred.
# Além disso: Warning message:
# In mean.default(x, na.rm = TRUE) :
# argument is not numeric or logical: returning NA
出现两个问题:
不知道如何输入每组自变量(x)的均值;
以及如何将此新数据声明为预测参数。
PS:不需要小标题或嵌套结果,我只需要点预测和 CI (total.qty lo.95 hi.95)
好吧,这段代码解决了我的问题。 这对每个时间序列(分组的 tsibble)进行一个预测,并使用这些时间序列的平均值作为预测中的未来数据 欢迎任何评论。
# MY FLOW
monthly_qty_by_cat2 <-
sweep::bike_sales %>%
mutate(order.month = yearmonth(order.date)) %>%
group_by(category.secondary, order.month) %>%
summarise(total.qty = sum(quantity), price.m = mean(price)) %>%
as_tsibble(index=order.month, key=category.secondary) # coerse in tsibble
# mean for the future
futuro <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
mutate(fut_x = mean(price.m)) %>%
do(price.m = head(.$fut_x,1))
# as.numeric
futuro$price.m <- as.numeric(futuro$price.m)
futuro
# make values in the future
future_x <- new_data(monthly_qty_by_cat2, 3) %>%
left_join(futuro, by = "category.secondary")
future_x
# model and forecast
fc <- monthly_qty_by_cat2 %>%
group_by(category.secondary) %>%
model(ARIMA(total.qty ~ price.m)) %>%
forecast(new_data=future_x) %>%
hilo(level = 95) %>%
unpack_hilo("95%")
fc
# Tidy the forecast
fc_tibble <- fc %>% as_tibble() %>% select(-total.qty)
fc_tibble
# the end
好吧,这解决了我的问题。 这对每个组时间序列进行一个预测,并使用这些组时间序列的平均值作为预测中的未来数据 欢迎任何评论。