具有分组数据 R 的 ARIMAX
ARIMAX with grouped data R
我是 R 的新手,我需要比较 ARIMAX 和 ARIMA 的准确性。这是我的数据示例以及我为 ARIMA 模型所做的工作:
library(dplyr)
library(forecast)
library(lubridate)
data<-tibble::tribble(
~id, ~day, ~month, ~year, ~value, ~reg1, ~reg2,
1L, 1L, 1L, 2019L, 4.634, 0.626, 0.684,
1L, 1L, 2L, 2019L, 2.969, 0.698, 0.049,
1L, 1L, 3L, 2019L, 1.885, 0.62, 0.155,
1L, 1L, 4L, 2019L, 2.415, 0.553, 0.959,
1L, 1L, 5L, 2019L, 2.215, 0.598, 0.065,
1L, 1L, 6L, 2019L, 1.805, 0.454, 0.07,
1L, 1L, 7L, 2019L, 4.682, 0.045, 0.376,
1L, 1L, 8L, 2019L, 4.248, 0.087, 0.094,
1L, 1L, 9L, 2019L, 0.55, 0.523, 0.86,
1L, 1L, 10L, 2019L, 0.109, 0.176, 0.591,
2L, 1L, 1L, 2019L, 2.918, 0.442, 0.956,
2L, 1L, 2L, 2019L, 3.083, 0.233, 0.388,
2L, 1L, 3L, 2019L, 3.271, 0.652, 0.946,
2L, 1L, 4L, 2019L, 2.175, 0.704, 0.902,
2L, 1L, 5L, 2019L, 4.51, 0.851, 0.533,
2L, 1L, 6L, 2019L, 4.178, 0.655, 0.614,
2L, 1L, 7L, 2019L, 1.956, 0.434, 0.977,
2L, 1L, 8L, 2019L, 3.219, 0.418, 0.4,
2L, 1L, 9L, 2019L, 2.72, 0.335, 0.096,
2L, 1L, 10L, 2019L, 4.519, 0.534, 0.388,
3L, 1L, 1L, 2019L, 2.969, 0.707, 0.752,
3L, 1L, 2L, 2019L, 2.456, 0.085, 0.651,
3L, 1L, 3L, 2019L, 0.418, 0.851, 0.399,
3L, 1L, 4L, 2019L, 2.324, 0.626, 0.317,
3L, 1L, 5L, 2019L, 3.548, 0.175, 0.081,
3L, 1L, 6L, 2019L, 3.74, 0.667, 0.691,
3L, 1L, 7L, 2019L, 4.48, 0.853, 0.259,
3L, 1L, 8L, 2019L, 0.18, 0.016, 0.489,
3L, 1L, 9L, 2019L, 3.028, 0.51, 0.741,
3L, 1L, 10L, 2019L, 4.652, 0.916, 0.953
)
data<-data %>%
mutate(date=as.character(make_date(year,month,day)),YearMonth = tsibble::yearmonth((ymd(date)))) %>%
as_tsibble(key=id,index = YearMonth)
fit <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
model(ARIMA(value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE))
# Now forecast the test set and compute RMSE and MSE
fit %>%
forecast(h = 2) %>%
accuracy(data)
现在我需要使用 ARIMAX 执行此操作:
covariates <- c("reg1","reg2")
fit_arimax <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
group_by(id) %>%
do(autoarima=auto.arima(.$value,xreg = as.matrix(data[,covariates])))
我收到以下错误:
Error in model.frame.default(formula = x ~ xregg, drop.unused.levels = TRUE) :
variable lengths differ (found for 'xregg')
In addition: Warning message: In !is.na(x) & !is.na(rowSums(xreg)) : longer object length is not a multiple of shorter object length
我看到了 但我做不到,因为我是 R 的初学者。
所以我想知道 ARIMA 是否有一些与回归器一起工作的东西或者如何用 auto.arima 解决它,然后比较 ARIMA 和 ARIMAX 中的 ID 的准确性。有谁知道怎么做?谢谢 !
您已从使用 tsibble 和 fable 包切换到使用 forecast 包。它们使用不同的数据结构,一般不应混用。
您可以使用寓言轻松地拟合具有 ARIMA 误差的回归模型,如下所示。
fit_arimax <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
model(
ARIMA(value ~ reg1 + reg2 + PDQ(0,0,0))
)
fc <- fit_arimax %>%
forecast(new_data = filter(data, YearMonth > yearmonth("2019 Aug")))
fc %>% accuracy(data)
请注意,这实际上不是 ARIMAX 模型——参见 https://robjhyndman.com/hyndsight/arimax/
我是 R 的新手,我需要比较 ARIMAX 和 ARIMA 的准确性。这是我的数据示例以及我为 ARIMA 模型所做的工作:
library(dplyr)
library(forecast)
library(lubridate)
data<-tibble::tribble(
~id, ~day, ~month, ~year, ~value, ~reg1, ~reg2,
1L, 1L, 1L, 2019L, 4.634, 0.626, 0.684,
1L, 1L, 2L, 2019L, 2.969, 0.698, 0.049,
1L, 1L, 3L, 2019L, 1.885, 0.62, 0.155,
1L, 1L, 4L, 2019L, 2.415, 0.553, 0.959,
1L, 1L, 5L, 2019L, 2.215, 0.598, 0.065,
1L, 1L, 6L, 2019L, 1.805, 0.454, 0.07,
1L, 1L, 7L, 2019L, 4.682, 0.045, 0.376,
1L, 1L, 8L, 2019L, 4.248, 0.087, 0.094,
1L, 1L, 9L, 2019L, 0.55, 0.523, 0.86,
1L, 1L, 10L, 2019L, 0.109, 0.176, 0.591,
2L, 1L, 1L, 2019L, 2.918, 0.442, 0.956,
2L, 1L, 2L, 2019L, 3.083, 0.233, 0.388,
2L, 1L, 3L, 2019L, 3.271, 0.652, 0.946,
2L, 1L, 4L, 2019L, 2.175, 0.704, 0.902,
2L, 1L, 5L, 2019L, 4.51, 0.851, 0.533,
2L, 1L, 6L, 2019L, 4.178, 0.655, 0.614,
2L, 1L, 7L, 2019L, 1.956, 0.434, 0.977,
2L, 1L, 8L, 2019L, 3.219, 0.418, 0.4,
2L, 1L, 9L, 2019L, 2.72, 0.335, 0.096,
2L, 1L, 10L, 2019L, 4.519, 0.534, 0.388,
3L, 1L, 1L, 2019L, 2.969, 0.707, 0.752,
3L, 1L, 2L, 2019L, 2.456, 0.085, 0.651,
3L, 1L, 3L, 2019L, 0.418, 0.851, 0.399,
3L, 1L, 4L, 2019L, 2.324, 0.626, 0.317,
3L, 1L, 5L, 2019L, 3.548, 0.175, 0.081,
3L, 1L, 6L, 2019L, 3.74, 0.667, 0.691,
3L, 1L, 7L, 2019L, 4.48, 0.853, 0.259,
3L, 1L, 8L, 2019L, 0.18, 0.016, 0.489,
3L, 1L, 9L, 2019L, 3.028, 0.51, 0.741,
3L, 1L, 10L, 2019L, 4.652, 0.916, 0.953
)
data<-data %>%
mutate(date=as.character(make_date(year,month,day)),YearMonth = tsibble::yearmonth((ymd(date)))) %>%
as_tsibble(key=id,index = YearMonth)
fit <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
model(ARIMA(value ~ PDQ(0,0,0), stepwise=FALSE, approximation=FALSE))
# Now forecast the test set and compute RMSE and MSE
fit %>%
forecast(h = 2) %>%
accuracy(data)
现在我需要使用 ARIMAX 执行此操作:
covariates <- c("reg1","reg2")
fit_arimax <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
group_by(id) %>%
do(autoarima=auto.arima(.$value,xreg = as.matrix(data[,covariates])))
我收到以下错误:
Error in model.frame.default(formula = x ~ xregg, drop.unused.levels = TRUE) :
variable lengths differ (found for 'xregg')
In addition: Warning message: In !is.na(x) & !is.na(rowSums(xreg)) : longer object length is not a multiple of shorter object length
我看到了
您已从使用 tsibble 和 fable 包切换到使用 forecast 包。它们使用不同的数据结构,一般不应混用。
您可以使用寓言轻松地拟合具有 ARIMA 误差的回归模型,如下所示。
fit_arimax <- data %>%
filter(YearMonth <= yearmonth("2019 Aug")) %>%
model(
ARIMA(value ~ reg1 + reg2 + PDQ(0,0,0))
)
fc <- fit_arimax %>%
forecast(new_data = filter(data, YearMonth > yearmonth("2019 Aug")))
fc %>% accuracy(data)
请注意,这实际上不是 ARIMAX 模型——参见 https://robjhyndman.com/hyndsight/arimax/