factor season 有新的级别 4 ，当在 R 中按组执行 Arima 时

Question

这里是我的数据集示例

ts=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L, 
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L, 
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L, 
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018", 
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020", 
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018", 
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019", 
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020", 
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017", 
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019", 
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018", 
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"), 
    client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens", 
    "hooves", "Oysters"), class = "factor"), Sales = c(374L, 
    12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L, 
    117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L, 
    67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L, 
    441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data", 
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA, 
-40L))

我想按组使用 Arima 模型执行时间序列

#if using dummy
fun_tslm <- function(x, start = "2017-01-04", freq = 12){
  tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
  #View(tsw)
  mytslm <- tslm(tsw ~ trend + season)
  mytslm
}

fun_forecast <- function(x, h = 14){
  residarima1 <- auto.arima(x[["residuals"]])
  residualsArimaForecast <- forecast(residarima1, h = h)
  residualsF <- as.numeric(residualsArimaForecast$mean)
  regressionForecast <- forecast(x, h = h)
  regressionF <- as.numeric(regressionForecast$mean)
  forecastR <- regressionF + residualsF
  forecastR
}

tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)

当我运行这个脚本我收到错误

Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels) : factor season has new levels 4

但实际上我想在我可以看到的地方获得带有 Arima 指标的输出 1.forecast初始值

2.forecast 14 个月 CI

初始值和预测值的输出应该是两个不同的data.frame。怎么做？

Answer 1

你的脚本和数据中有些部分不是很清楚，所以我可以尝试给你一个部分的答案，看看如何得到你想要的结果：

# I called your dataset in this way, because ts is a function
timeseries

现在的想法是将您的数据框转换为列表，列表的每个组件都是一个组，即时间序列。我想象每个组都是客户+东西，但我们可以用不同的方式来管理它：

# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)

# EDIT here you convert the Data class as class(date)
library(lubridate)
timeseries$Data <- dmy(timeseries$Data)

# now the list
listed <- split(timeseries,timeseries$group)

现在我们必须将列表的每个组成部分定义为时间序列，使用 lapply 和 ts 函数：

 # I do not understand why all your ts start with "2017-01-04", when in the example they are not (probably because it's an example)

 # EDIT: convert the start date
 listed_ts <- lapply(listed,
                     function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12)  ) 

    listed_ts
$`Hornschickens`
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374  12 120 242 227 268 280 419  12 172 336

$Hornshooves
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522  65 268 441 247 348 445 477  62 226 476
17171 306                                            

$KornevOysters
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150  90 117 116 146 120 211 213  67 146
17171 118 152 122 201

下一步是auto.arima每个时间序列，用lapply逻辑：

library(forecast)
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
# partial result
> listed_arima
$`Hornschickens`
Series: x 
ARIMA(0,0,0) with non-zero mean 

Coefficients:
          mean
      223.8182
s.e.   38.7707

sigma^2 estimated as 18188:  log likelihood=-69.03
AIC=142.06   AICc=143.56   BIC=142.86
...

现在每个华宇的预测：

listed_forecast <- lapply(listed_arima,function(x) forecast(x,1) )

如果您需要将其简化为 data.frame、do.call 和 rbind 帮助：

do.call(rbind,listed_forecast)

              method                            model   level     mean     lower     upper     x          series fitted     residuals 
Hornschickens "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 223.8182 Numeric,2 Numeric,2 Integer,11 "x"    Numeric,11 Numeric,11
Hornshooves   "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 336.9231 Numeric,2 Numeric,2 Integer,13 "x"    Numeric,13 Numeric,13
KornevOysters "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 137.125  Numeric,2 Numeric,2 Integer,16 "x"    Numeric,16 Numeric,16

我认为你可以稍微扭转一下以获得更好的结果。如果你想知道为什么对于这个例子，如果你在 auto.arima 函数中放入多个 1 来预测，但结果是一个常数，答案是 here，[=] 也指出23=] 输出列。

factor season 有新的级别 4 ，当在 R 中按组执行 Arima 时

factor season has new levels 4 , when performing Arima by group in R

r

time-series

forecasting

dplyr

arima