使用带寓言和咕噜声的 ETS 的动态趋势或季节性参数的问题

problem using a dynamic trend or seasonal parameter with ETS with fable and purrr

我有一个 tsibble 如下所示。

test.data <- structure(list(RSLITM = c("004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "004", "004", "004", "004", 
"004", "004", "004", "004", "004", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005", "005", "005", "005", 
"005", "005", "005", "005", "005", "005"), RSFMTH = structure(c(17713, 
17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 17987, 
18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 18262, 
18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 18536, 
18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 18809, 
17713, 17744, 17775, 17805, 17836, 17866, 17897, 17928, 17956, 
17987, 18017, 18048, 18078, 18109, 18140, 18170, 18201, 18231, 
18262, 18293, 18322, 18353, 18383, 18414, 18444, 18475, 18506, 
18536, 18567, 18597, 18628, 18659, 18687, 18718, 18748, 18779, 
18809), class = c("yearmonth", "vctrs_vctr")), RSFQTY = c(285600, 
352200, 273600, 282700, 175800, 138700, 177700, 245900, 165000, 
180100, 298000, 173800, 257300, 282800, 164500, 155100, 232300, 
175500, 226000, 287100, 221400, 270800, 286200, 394400, 336600, 
331000, 224600, 216800, 351600, 374700, 173500, 423700, 357700, 
245200, 454700, 361700, 381200, 79000, 58100, 66300, 52700, 68600, 
33000, 76600, 85600, 84100, 49000, 98000, 113500, 83800, 64000, 
116800, 72000, 65200, 49800, 33300, 79800, 48000, 81600, 125000, 
53500, 97600, 80000, 81900, 80000, 53800, 39000, 73800, 76600, 
33700, 60200, 84000, 66600, 32400), RSSEAS = c("A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A"), RSTREND = c("N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", 
"N", "N", "N", "N", "N"), RSMODE = c("EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP", 
"EXP", "EXP", "EXP", "EXP", "EXP", "EXP", "EXP")), row.names = c(NA, 
-74L), key = structure(list(RSLITM = c("004", "005"), RSSEAS = c("A", 
"A"), RSTREND = c("N", "N"), RSMODE = c("EXP", "EXP"), .rows = structure(list(
    1:37, 38:74), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), index = structure("RSFMTH", ordered = TRUE), index2 = "RSFMTH", interval = structure(list(
    year = 0, quarter = 0, month = 1, week = 0, day = 0, hour = 0, 
    minute = 0, second = 0, millisecond = 0, microsecond = 0, 
    nanosecond = 0, unit = 0), .regular = TRUE, class = c("interval", 
"vctrs_rcrd", "vctrs_vctr")), class = c("tbl_ts", "tbl_df", "tbl", 
"data.frame"))

我想使用 tsibble 中保存的参数应用修改后的 ETS 函数。例如,RSSEAS 和 RSTREND 列中的任何内容都将用于估计 ETS 模型。

以下作品:

test.data %>% model(EXP = ETS(RSFQTY ~ trend("N") + season("A")))

但是,当我尝试使用下面的函数为每个 SKU 提取参数时(因为可能每个 SKU 的参数可能不同),我收到一条错误消息。

ets.function <- function(tsib){
  season.param <- as.character(tsib[1, "RSSEAS"])
  trend.param <- as.character(tsib[1, "RSTREND"])
  tsib %>% model(EXP = ETS(RSFQTY ~ trend(trend.param) + season(season.param))) %>% forecast(h = "3 years")
}

如果我将 ets.function(test.data) R returns 称为寓言,但它是 NULL/NA,因为未使用指定参数估计模型。

调用 map_dfr(test.data, ets.function) 出现以下错误:

Error in tsib[1, "RSSEAS"] : incorrect number of dimensions

这对我来说没有意义,因为如果我在我的控制台中 运行 season.param 或 trend.param 的代码,我会得到相应的“A”或“N” ,这正是趋势和季节特价在 ETS 函数中所取的值。

基本上,我试图找出一种方法,使用每个唯一键组合的预先指定参数,将 ETS 映射到我的 tsibble 上。我对如何实现这一点的其他想法持开放态度(pmap_dfr 参数向量等)。

我们可以用 gluepaste

创建公式
library(fable)
ets.function <- function(tsib){
  season.param <- tsib[["RSSEAS"]][1]
  trend.param <- tsib[["RSTREND"]][1]
  fmla <- as.formula(glue::glue("RSFQTY ~ trend('{trend.param}') +",
            " season('{season.param}')"))
  print(fmla)
  tsib %>% 
     model(EXP = ETS(fmla)) %>% 
  forecast(h = "3 years")
}

-测试

> ets.function(test.data)
RSFQTY ~ trend("N") + season("A")
<environment: 0x7fface3d9778>
# A fable: 72 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [2]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH             RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>             <dist>   <dbl>
 1 004    A      N       EXP    EXP    2021 Aug  N(4e+05, 1.4e+10) 395706.
 2 004    A      N       EXP    EXP    2021 Sep N(279181, 8.6e+09) 279181.
 3 004    A      N       EXP    EXP    2021 Oct N(266837, 8.8e+09) 266837.
 4 004    A      N       EXP    EXP    2021 Nov N(349230, 1.4e+10) 349230.
 5 004    A      N       EXP    EXP    2021 Dec N(327811, 1.4e+10) 327811.
 6 004    A      N       EXP    EXP    2022 Jan N(265657, 1.2e+10) 265657.
 7 004    A      N       EXP    EXP    2022 Feb N(375557, 1.9e+10) 375557.
 8 004    A      N       EXP    EXP    2022 Mar  N(3e+05, 1.6e+10) 300908.
 9 004    A      N       EXP    EXP    2022 Apr N(318455, 1.8e+10) 318455.
10 004    A      N       EXP    EXP    2022 May  N(4e+05, 2.4e+10) 400250.
# … with 62 more rows


或者也可以使用 sprintf

ets.function <- function(tsib){
  season.param <- tsib[["RSSEAS"]][1]
  trend.param <- tsib[["RSTREND"]][1]
  fmla <- as.formula(sprintf("RSFQTY ~ trend('%s') + season('%s')",  
           trend.param, season.param))
  
  print(fmla)
  tsib %>% 
     model(EXP = ETS(fmla)) %>% 
  forecast(h = "3 years")
}
ets.function(test.data)

如果我们需要基于组应用,我们可以使用 group_split 拆分并使用 map

循环应用函数
library(purrr)
test.data %>%
    group_split(RSLITM) %>% 
    map(~ ets.function(.x))

-输出

RSFQTY ~ trend("N") + season("A")
<environment: 0x7ffac94171b8>
RSFQTY ~ trend("N") + season("A")
<environment: 0x7ffacf3f1950>
[[1]]
# A fable: 36 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [1]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH             RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>             <dist>   <dbl>
 1 004    A      N       EXP    EXP    2021 Aug  N(4e+05, 1.4e+10) 395706.
 2 004    A      N       EXP    EXP    2021 Sep N(279181, 8.6e+09) 279181.
 3 004    A      N       EXP    EXP    2021 Oct N(266837, 8.8e+09) 266837.
 4 004    A      N       EXP    EXP    2021 Nov N(349230, 1.4e+10) 349230.
 5 004    A      N       EXP    EXP    2021 Dec N(327811, 1.4e+10) 327811.
 6 004    A      N       EXP    EXP    2022 Jan N(265657, 1.2e+10) 265657.
 7 004    A      N       EXP    EXP    2022 Feb N(375557, 1.9e+10) 375557.
 8 004    A      N       EXP    EXP    2022 Mar  N(3e+05, 1.6e+10) 300908.
 9 004    A      N       EXP    EXP    2022 Apr N(318455, 1.8e+10) 318455.
10 004    A      N       EXP    EXP    2022 May  N(4e+05, 2.4e+10) 400250.
# … with 26 more rows

[[2]]
# A fable: 36 x 8 [1M]
# Key:     RSLITM, RSSEAS, RSTREND, RSMODE, .model [1]
   RSLITM RSSEAS RSTREND RSMODE .model   RSFMTH            RSFQTY   .mean
   <chr>  <chr>  <chr>   <chr>  <chr>     <mth>            <dist>   <dbl>
 1 005    A      N       EXP    EXP    2021 Aug N(67121, 4.1e+08)  67121.
 2 005    A      N       EXP    EXP    2021 Sep N(95706, 8.3e+08)  95706.
 3 005    A      N       EXP    EXP    2021 Oct N(73173, 4.9e+08)  73173.
 4 005    A      N       EXP    EXP    2021 Nov N(57981, 3.1e+08)  57981.
 5 005    A      N       EXP    EXP    2021 Dec N(42901, 1.7e+08)  42901.
 6 005    A      N       EXP    EXP    2022 Jan N(62766, 3.6e+08)  62766.
 7 005    A      N       EXP    EXP    2022 Feb N(79394, 5.7e+08)  79394.
 8 005    A      N       EXP    EXP    2022 Mar N(61960, 3.5e+08)  61960.
 9 005    A      N       EXP    EXP    2022 Apr N(60882, 3.4e+08)  60882.
10 005    A      N       EXP    EXP    2022 May  N(106257, 1e+09) 106257.
# … with 26 more rows