从 hts2 包中获取预测区间：VaughanR0/Streamline-R：分层和分组时间序列

Question

我目前正在用 R 开发我的第一个 TS 项目，并使用 hts2 包来包含预测间隔。但是，我对 R 还很陌生，根本不知道如何从我的 gts 的不同 groups/leves 中提取预测区间。例如：如何获得 Top 系列的预测区间？

感谢您的帮助！

问题来了：这是我的原始数据框的样子：

structure(list(mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076, 
2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105), mAK55 = c(1820, 
1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622, 
4773, 4854), mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549, 
5967, 5670, 6104, 7466, 8193, 8672, 8963), mAK65 = c(5858, 5746, 
6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885, 
11536, 11896), mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838, 
8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815), mAK75 = c(8788, 
9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349, 
12409, 12044, 11765, 11886), mAK80 = c(6896, 7597, 8494, 8809, 
8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537
),  mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316, 
3915, 4068, 4985, 5747, 6196, 6776), mAK90 = c(524, 656, 814, 
975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557), 
wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868, 
2847, 2942, 3304, 3459, 3432, 3379), wAK55 = c(2899, 3086, 
3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636, 
6977, 6782), wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565, 
8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627), wAK65 = c(10012, 
9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110, 
14199, 15512, 15846, 15901), wAK70 = c(17834, 18331, 18951, 
18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579, 
19436, 19140), wAK75 = c(21610, 22108, 23521, 24417, 25463, 
24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354
), wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890, 
20401, 18762, 19966, 23731, 25391, 25097, 23846), wAK85 = c(8926, 
9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213, 
11170, 12460, 13083, 13498), wAK90 = c(1906, 2274, 2676, 
3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398, 
3492)), row.names = c(NA, -14L), class = c("tbl_df", "tbl", 
"data.frame"))

m/w代表men/women，AK50代表一定年龄class（AK 50=50岁及以下...）

我已将其转换为分组时间序列，使用的字符为：

library (hts)

gts1<-ts(data, start=2005,end=2018)

gts<-gts(gts1, characters = list(1, 4))

现在我想使用以下代码对 ARIMA 模型进行一些预测（32 年）：

install.packages("remotes")
remotes::install_github("VaughanR0/Streamline-R")
fcsts = hts2::forecast.gts(gts, h = 32, method = "comb", weights ="wls",keep.intervals=TRUE, fmethod = "arima")

这是我的输出：

summary(fcsts)
Grouped Time Series 
4 Levels 
Number of groups at each level: 1 2 9 18 
Total number of series: 30 
Number of observations in each historical series: 14 
Number of forecasts per series: 32 
Top level series of forecasts: 
Time Series:
Start = 2019 
End = 2050 
Frequency = 1 
[1] 191409.5 191828.0 192924.3 195538.0 199757.3 204983.4 210277.1....

Method: Bottom-up forecasts 
Forecast method: Arima

现在我想获得 Top 系列的预测区间。我试过 "fcsts$upper" 但我只得到底部系列的间隔...

谁能帮帮我？

Answer 1

hts 包不计算预测区间。我们编写了一个新包 fable，它可以满足您的需求，并且更易于用于分层和分组时间序列。这是使用您的数据的示例。

library(tidyverse)
library(tsibble)
#> 
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:dplyr':
#> 
#>     id
library(fable)
#> Loading required package: fabletools

# Create tsibble
data <- tibble(
    year = 2005:2018,
    mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076, 2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105),
    mAK55 = c(1820, 1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622, 4773, 4854),
    mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549, 5967, 5670, 6104, 7466, 8193, 8672, 8963),
    mAK65 = c(5858, 5746, 6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885, 11536, 11896),
    mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838, 8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815),
    mAK75 = c(8788, 9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349, 12409, 12044, 11765, 11886),
    mAK80 = c(6896, 7597, 8494, 8809, 8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537),
    mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316, 3915, 4068, 4985, 5747, 6196, 6776),
    mAK90 = c(524, 656, 814, 975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557),
    wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868, 2847, 2942, 3304, 3459, 3432, 3379),
    wAK55 = c(2899, 3086, 3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636, 6977, 6782),
    wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565, 8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627),
    wAK65 = c(10012, 9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110, 14199, 15512, 15846, 15901),
    wAK70 = c(17834, 18331, 18951, 18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579, 19436, 19140),
    wAK75 = c(21610, 22108, 23521, 24417, 25463, 24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354),
    wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890, 20401, 18762, 19966, 23731, 25391, 25097, 23846),
    wAK85 = c(8926, 9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213, 11170, 12460, 13083, 13498),
    wAK90 = c(1906, 2274, 2676, 3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398, 3492),
  ) %>%
  pivot_longer(-year, names_to = "group", values_to = "value") %>%
  mutate(
    sex = stringr::str_sub(group, 1, 1),
    age = stringr::str_sub(group, 4, 5),
  ) %>%
  select(-group) %>%
  as_tsibble(index=year, key=c(sex,age))

data
#> # A tsibble: 252 x 4 [1Y]
#> # Key:       sex, age [18]
#>     year value sex   age  
#>    <int> <dbl> <chr> <chr>
#>  1  2005  1294 m     50   
#>  2  2006  1417 m     50   
#>  3  2007  1690 m     50   
#>  4  2008  1827 m     50   
#>  5  2009  1973 m     50   
#>  6  2010  2076 m     50   
#>  7  2011  2196 m     50   
#>  8  2012  2127 m     50   
#>  9  2013  2078 m     50   
#> 10  2014  2033 m     50   
#> # … with 242 more rows


fcsts <- data %>% 
  # Specify group structure and create aggregates
  aggregate_key(sex * age, value = sum(value)) %>%
  # Fit models 
  model(arima = ARIMA(value)) %>%
  # Set up reconciliation
  mutate(mint = min_trace(arima)) %>%
  # Produce the forecasts
  forecast(h = 32)
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...

# Create 95% intervals for top level
fcsts %>% 
  hilo(level=95) %>%
  filter(is_aggregated(sex) & is_aggregated(age))
#> # A tsibble: 64 x 6 [1Y]
#> # Key:       sex, age, .model [2]
#>    sex          age          .model  year   value                  `95%`
#>    <chr>        <chr>        <chr>  <dbl>   <dbl>                 <hilo>
#>  1 <aggregated> <aggregated> arima   2019 187774. [173317.2, 202230.3]95
#>  2 <aggregated> <aggregated> arima   2020 186021. [155948.3, 216094.4]95
#>  3 <aggregated> <aggregated> arima   2021 185864. [143996.2, 227731.0]95
#>  4 <aggregated> <aggregated> arima   2022 186589. [137523.8, 235655.0]95
#>  5 <aggregated> <aggregated> arima   2023 187265. [133768.8, 240760.3]95
#>  6 <aggregated> <aggregated> arima   2024 187467. [130517.5, 244415.5]95
#>  7 <aggregated> <aggregated> arima   2025 187303. [126897.4, 247709.1]95
#>  8 <aggregated> <aggregated> arima   2026 187070. [122946.0, 251194.2]95
#>  9 <aggregated> <aggregated> arima   2027 186958. [119048.8, 254866.4]95
#> 10 <aggregated> <aggregated> arima   2028 186979. [115480.3, 258477.4]95
#> # … with 54 more rows

^{由 reprex package (v0.3.0)}

于 2020-04-29 创建

从 hts2 包中获取预测区间：VaughanR0/Streamline-R：分层和分组时间序列

Get Prediction Intervals from hts2 package:VaughanR0/Streamline-R: Hierarchical and Grouped Time Series

time

r

time-series

forecast