在 R 中按组使用 auto.arima() 将数字日期格式转换为正确的日期格式

convert numeric date format into correct date format using auto.arima() by group in R

让我们处理这个数据样本

timeseries<-structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L, 
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L, 
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L, 
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018", 
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020", 
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018", 
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019", 
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020", 
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017", 
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019", 
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018", 
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"), 
    client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens", 
    "hooves", "Oysters"), class = "factor"), Sales = c(374L, 
    12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L, 
    117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L, 
    67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L, 
    441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data", 
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA, 
-40L))

我想使用 auto.arima 按组

执行预测
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)

# now the list
listed <- split(timeseries,timeseries$group)

library("forecast")
library("lubridate")

listed_ts <- lapply(listed,
                    function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12)  ) 

listed_ts

listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,2) )
listed_forecast
do.call(rbind,listed_forecast)

listed_forecast之后我得到下一个输出

  Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
    17170 374  12 120 242 227 268 280 419  12 172 336

    $Hornshooves
          Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
    17170 497 522  65 268 441 247 348 445 477  62 226 476
    17171 306                                            

    $KornevOysters
          Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
    17170 117 108 150  90 117 116 146 120 211 213  67 146
    17171 118 152 122 201

17170 的日期格式不正确

所以 as.numeric(ymd("2017-01-04")) 向我们展示了 17170

如何得到正确日期格式的输出? 我想要这样的正常日期格式

$Hornschickens
          Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
Dec 2017       223.8182 50.98365 396.6527 -40.50942 488.1458
Jan 2018       223.8182 50.98365 396.6527 -40.50942 488.1458

所以我假设预测中的 17170 意味着 2017,17171=2018 但是转换成阅读格式有什么麻烦

我看到下一个逻辑 是否可以将这些数值自动转换为日期

其中

1970-01-01 (Y-m-d) 为 0

1970-01-02 是 1

“2018-12-25”是 17890

但是像我的示例中那样按月输入数据汇总时

dec 17890 如此简单地转换为 dec 2018 导致 17890 在 2018 年 12 月的范围内。

listed_ts 中的 start 参数不应该是日期:

start --- the time of the first observation. Either a single number or a vector of two integers, which specify a natural time unit and a (1-based) number of samples into the time unit. See the examples for the use of the second form.

(来自 ?ts。)因此,使用 start = c(2017, 1) 即可。