r 中 id 的 autoarima

Question

我有一个带有 ID、每月日期和该月值的 df。这是一个例子：

df<-tibble::tribble(
  ~ID,       ~MonthlyDate, ~value,
       1L, "2018-09-01",     -29.884,
       1L, "2018-10-01",      16.143,
       1L, "2018-11-01",       3.785,
       1L, "2018-12-01",        -264,
       1L, "2019-01-01",        -308,
       1L, "2019-02-01",         712,
       1L, "2019-03-01",        -260,
       1L, "2019-04-01",        -300,
       1L, "2019-05-01",        -149,
       1L, "2019-06-01",        -291,
       1L, "2019-07-01",        -284,
       1L, "2019-08-01",        -158,
       1L, "2019-09-01",        -272,
       1L, "2019-10-01",        -289,
       1L, "2019-11-01",      -1.268,
       2L, "2016-12-01",        -489,
       2L, "2017-01-01",       7.606,
       2L, "2017-02-01",        -458,
       2L, "2017-03-01",       8.603,
       2L, "2017-04-01",       6.298,
       2L, "2017-05-01",       2.598,
       2L, "2017-06-01",        9.14,
       2L, "2017-07-01",       6.004,
       2L, "2017-08-01",       3.684,
       2L, "2017-09-01",      -1.815,
       2L, "2017-10-01",      12.487,
       2L, "2017-11-01",      -1.288,
       2L, "2017-12-01",       3.213,
       3L, "2018-01-01",       3.447,
       3L, "2018-02-01",       3.986,
       3L, "2018-03-01",     8.08201,
       3L, "2018-04-01",       2.033,
       3L, "2018-05-01",       4.401,
       3L, "2018-06-01",       3.482,
       3L, "2018-07-01",        4.04,
       3L, "2018-08-01",       1.607,
       3L, "2018-09-01",         694,
       3L, "2018-10-01",       4.026,
       3L, "2018-11-01",        -176,
       3L, "2018-12-01",         -41,
       3L, "2019-01-01",         815,
       3L, "2019-02-01",       1.743,
       3L, "2019-03-01",      -3.433,
       3L, "2019-04-01",       4.167,
       3L, "2019-05-01",       3.792,
       3L, "2019-06-01",        -293,
       3L, "2019-07-01",      -4.666,
       3L, "2019-08-01",         835,
       3L, "2019-09-01",       5.434,
       3L, "2019-10-01",       4.636,
       3L, "2019-11-01",       2.731
  )
df
# A tibble: 51 x 3
#      ID MonthlyDate   value
#   <int> <chr>         <dbl>
 #1     1 2018-09-01   -29.9 
 #2     1 2018-10-01    16.1 
 #3     1 2018-11-01     3.78
 #4     1 2018-12-01  -264   
 #5     1 2019-01-01  -308   
 #6     1 2019-02-01   712   
 #7     1 2019-03-01  -260   
 #8     1 2019-04-01  -300   
 #9     1 2019-05-01  -149   
#10     1 2019-06-01  -291

我需要使用 auto.arima() 自动 select ARIMA 模型，在哪里可以找到由 id 分隔的模型。请注意，并非所有的开始或结束日期都相同。我还需要查看正在选择的型号

我尝试使用拆分：

df2<-split(df,df$ID)

然后 lapply:

lapply(df2,auto.arima())

但是我得到一个错误 as.ts(x) 中的错误：参数 "y" 不存在，没有默认值 不知道怎么按id分组，然后转成ts再应用auto.arima。有人知道怎么做吗？谢谢！

Answer 1

这正是 fable 包的设计目的。

library(tidyverse)
library(tsibble)
library(fable)

df <- tibble::tribble(
  ~ID,       ~MonthlyDate, ~value,
  1L, "2018-09-01",     -29.884,
  1L, "2018-10-01",      16.143,
  1L, "2018-11-01",       3.785,
  1L, "2018-12-01",        -264,
  1L, "2019-01-01",        -308,
  1L, "2019-02-01",         712,
  1L, "2019-03-01",        -260,
  1L, "2019-04-01",        -300,
  1L, "2019-05-01",        -149,
  1L, "2019-06-01",        -291,
  1L, "2019-07-01",        -284,
  1L, "2019-08-01",        -158,
  1L, "2019-09-01",        -272,
  1L, "2019-10-01",        -289,
  1L, "2019-11-01",      -1.268,
  2L, "2016-12-01",        -489,
  2L, "2017-01-01",       7.606,
  2L, "2017-02-01",        -458,
  2L, "2017-03-01",       8.603,
  2L, "2017-04-01",       6.298,
  2L, "2017-05-01",       2.598,
  2L, "2017-06-01",        9.14,
  2L, "2017-07-01",       6.004,
  2L, "2017-08-01",       3.684,
  2L, "2017-09-01",      -1.815,
  2L, "2017-10-01",      12.487,
  2L, "2017-11-01",      -1.288,
  2L, "2017-12-01",       3.213,
  3L, "2018-01-01",       3.447,
  3L, "2018-02-01",       3.986,
  3L, "2018-03-01",     8.08201,
  3L, "2018-04-01",       2.033,
  3L, "2018-05-01",       4.401,
  3L, "2018-06-01",       3.482,
  3L, "2018-07-01",        4.04,
  3L, "2018-08-01",       1.607,
  3L, "2018-09-01",         694,
  3L, "2018-10-01",       4.026,
  3L, "2018-11-01",        -176,
  3L, "2018-12-01",         -41,
  3L, "2019-01-01",         815,
  3L, "2019-02-01",       1.743,
  3L, "2019-03-01",      -3.433,
  3L, "2019-04-01",       4.167,
  3L, "2019-05-01",       3.792,
  3L, "2019-06-01",        -293,
  3L, "2019-07-01",      -4.666,
  3L, "2019-08-01",         835,
  3L, "2019-09-01",       5.434,
  3L, "2019-10-01",       4.636,
  3L, "2019-11-01",       2.731
) %>%
  mutate(
    MonthlyDate = yearmonth(MonthlyDate)
  ) %>%
  as_tsibble(index = MonthlyDate, key=ID) %>%
  fill_gaps()
df
#> # A tsibble: 51 x 3 [1M]
#> # Key:       ID [3]
#>       ID MonthlyDate   value
#>    <int>       <mth>   <dbl>
#>  1     1    2018 Sep  -29.9 
#>  2     1    2018 Oct   16.1 
#>  3     1    2018 Nov    3.78
#>  4     1    2018 Dec -264   
#>  5     1    2019 Jan -308   
#>  6     1    2019 Feb  712   
#>  7     1    2019 Mar -260   
#>  8     1    2019 Apr -300   
#>  9     1    2019 May -149   
#> 10     1    2019 Jun -291   
#> # … with 41 more rows

df %>% 
  model(arima = ARIMA(value ~ PDQ(P=0,Q=0,D=0))) %>%
  forecast(h = "12 months")
#> # A fable: 36 x 5 [1M]
#> # Key:     ID, .model [3]
#>       ID .model MonthlyDate value .distribution 
#>    <int> <chr>        <mth> <dbl> <dist>        
#>  1     1 arima     2019 Dec -125. N(-125, 69007)
#>  2     1 arima     2020 Jan -125. N(-125, 69007)
#>  3     1 arima     2020 Feb -125. N(-125, 69007)
#>  4     1 arima     2020 Mar -125. N(-125, 69007)
#>  5     1 arima     2020 Apr -125. N(-125, 69007)
#>  6     1 arima     2020 May -125. N(-125, 69007)
#>  7     1 arima     2020 Jun -125. N(-125, 69007)
#>  8     1 arima     2020 Jul -125. N(-125, 69007)
#>  9     1 arima     2020 Aug -125. N(-125, 69007)
#> 10     1 arima     2020 Sep -125. N(-125, 69007)
#> # … with 26 more rows

^{由 reprex package (v0.3.0)}

于 2020 年 3 月 22 日创建

见fable.tidyverts.org for further information, or otexts.com/fpp3。

r 中 id 的 autoarima

autoarima by id in r

r

time-series

arima