从 hts2 包中获取预测区间:VaughanR0/Streamline-R:分层和分组时间序列
Get Prediction Intervals from hts2 package:VaughanR0/Streamline-R: Hierarchical and Grouped Time Series
我目前正在用 R 开发我的第一个 TS 项目,并使用 hts2 包来包含预测间隔。但是,我对 R 还很陌生,根本不知道如何从我的 gts 的不同 groups/leves 中提取预测区间。例如:如何获得 Top 系列的预测区间?
感谢您的帮助!
问题来了:
这是我的原始数据框的样子:
structure(list(mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076,
2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105), mAK55 = c(1820,
1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622,
4773, 4854), mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549,
5967, 5670, 6104, 7466, 8193, 8672, 8963), mAK65 = c(5858, 5746,
6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885,
11536, 11896), mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838,
8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815), mAK75 = c(8788,
9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349,
12409, 12044, 11765, 11886), mAK80 = c(6896, 7597, 8494, 8809,
8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537
), mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316,
3915, 4068, 4985, 5747, 6196, 6776), mAK90 = c(524, 656, 814,
975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557),
wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868,
2847, 2942, 3304, 3459, 3432, 3379), wAK55 = c(2899, 3086,
3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636,
6977, 6782), wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565,
8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627), wAK65 = c(10012,
9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110,
14199, 15512, 15846, 15901), wAK70 = c(17834, 18331, 18951,
18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579,
19436, 19140), wAK75 = c(21610, 22108, 23521, 24417, 25463,
24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354
), wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890,
20401, 18762, 19966, 23731, 25391, 25097, 23846), wAK85 = c(8926,
9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213,
11170, 12460, 13083, 13498), wAK90 = c(1906, 2274, 2676,
3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398,
3492)), row.names = c(NA, -14L), class = c("tbl_df", "tbl",
"data.frame"))
m/w代表men/women,AK50代表一定年龄class(AK 50=50岁及以下...)
我已将其转换为分组时间序列,使用的字符为:
library (hts)
gts1<-ts(data, start=2005,end=2018)
gts<-gts(gts1, characters = list(1, 4))
现在我想使用以下代码对 ARIMA 模型进行一些预测(32 年):
install.packages("remotes")
remotes::install_github("VaughanR0/Streamline-R")
fcsts = hts2::forecast.gts(gts, h = 32, method = "comb", weights ="wls",keep.intervals=TRUE, fmethod = "arima")
这是我的输出:
summary(fcsts)
Grouped Time Series
4 Levels
Number of groups at each level: 1 2 9 18
Total number of series: 30
Number of observations in each historical series: 14
Number of forecasts per series: 32
Top level series of forecasts:
Time Series:
Start = 2019
End = 2050
Frequency = 1
[1] 191409.5 191828.0 192924.3 195538.0 199757.3 204983.4 210277.1....
Method: Bottom-up forecasts
Forecast method: Arima
现在我想获得 Top 系列的预测区间。我试过 "fcsts$upper" 但我只得到底部系列的间隔...
谁能帮帮我?
hts
包不计算预测区间。我们编写了一个新包 fable
,它可以满足您的需求,并且更易于用于分层和分组时间序列。这是使用您的数据的示例。
library(tidyverse)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:dplyr':
#>
#> id
library(fable)
#> Loading required package: fabletools
# Create tsibble
data <- tibble(
year = 2005:2018,
mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076, 2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105),
mAK55 = c(1820, 1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622, 4773, 4854),
mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549, 5967, 5670, 6104, 7466, 8193, 8672, 8963),
mAK65 = c(5858, 5746, 6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885, 11536, 11896),
mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838, 8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815),
mAK75 = c(8788, 9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349, 12409, 12044, 11765, 11886),
mAK80 = c(6896, 7597, 8494, 8809, 8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537),
mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316, 3915, 4068, 4985, 5747, 6196, 6776),
mAK90 = c(524, 656, 814, 975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557),
wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868, 2847, 2942, 3304, 3459, 3432, 3379),
wAK55 = c(2899, 3086, 3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636, 6977, 6782),
wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565, 8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627),
wAK65 = c(10012, 9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110, 14199, 15512, 15846, 15901),
wAK70 = c(17834, 18331, 18951, 18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579, 19436, 19140),
wAK75 = c(21610, 22108, 23521, 24417, 25463, 24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354),
wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890, 20401, 18762, 19966, 23731, 25391, 25097, 23846),
wAK85 = c(8926, 9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213, 11170, 12460, 13083, 13498),
wAK90 = c(1906, 2274, 2676, 3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398, 3492),
) %>%
pivot_longer(-year, names_to = "group", values_to = "value") %>%
mutate(
sex = stringr::str_sub(group, 1, 1),
age = stringr::str_sub(group, 4, 5),
) %>%
select(-group) %>%
as_tsibble(index=year, key=c(sex,age))
data
#> # A tsibble: 252 x 4 [1Y]
#> # Key: sex, age [18]
#> year value sex age
#> <int> <dbl> <chr> <chr>
#> 1 2005 1294 m 50
#> 2 2006 1417 m 50
#> 3 2007 1690 m 50
#> 4 2008 1827 m 50
#> 5 2009 1973 m 50
#> 6 2010 2076 m 50
#> 7 2011 2196 m 50
#> 8 2012 2127 m 50
#> 9 2013 2078 m 50
#> 10 2014 2033 m 50
#> # … with 242 more rows
fcsts <- data %>%
# Specify group structure and create aggregates
aggregate_key(sex * age, value = sum(value)) %>%
# Fit models
model(arima = ARIMA(value)) %>%
# Set up reconciliation
mutate(mint = min_trace(arima)) %>%
# Produce the forecasts
forecast(h = 32)
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
# Create 95% intervals for top level
fcsts %>%
hilo(level=95) %>%
filter(is_aggregated(sex) & is_aggregated(age))
#> # A tsibble: 64 x 6 [1Y]
#> # Key: sex, age, .model [2]
#> sex age .model year value `95%`
#> <chr> <chr> <chr> <dbl> <dbl> <hilo>
#> 1 <aggregated> <aggregated> arima 2019 187774. [173317.2, 202230.3]95
#> 2 <aggregated> <aggregated> arima 2020 186021. [155948.3, 216094.4]95
#> 3 <aggregated> <aggregated> arima 2021 185864. [143996.2, 227731.0]95
#> 4 <aggregated> <aggregated> arima 2022 186589. [137523.8, 235655.0]95
#> 5 <aggregated> <aggregated> arima 2023 187265. [133768.8, 240760.3]95
#> 6 <aggregated> <aggregated> arima 2024 187467. [130517.5, 244415.5]95
#> 7 <aggregated> <aggregated> arima 2025 187303. [126897.4, 247709.1]95
#> 8 <aggregated> <aggregated> arima 2026 187070. [122946.0, 251194.2]95
#> 9 <aggregated> <aggregated> arima 2027 186958. [119048.8, 254866.4]95
#> 10 <aggregated> <aggregated> arima 2028 186979. [115480.3, 258477.4]95
#> # … with 54 more rows
由 reprex package (v0.3.0)
于 2020-04-29 创建
我目前正在用 R 开发我的第一个 TS 项目,并使用 hts2 包来包含预测间隔。但是,我对 R 还很陌生,根本不知道如何从我的 gts 的不同 groups/leves 中提取预测区间。例如:如何获得 Top 系列的预测区间?
感谢您的帮助!
问题来了: 这是我的原始数据框的样子:
structure(list(mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076,
2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105), mAK55 = c(1820,
1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622,
4773, 4854), mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549,
5967, 5670, 6104, 7466, 8193, 8672, 8963), mAK65 = c(5858, 5746,
6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885,
11536, 11896), mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838,
8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815), mAK75 = c(8788,
9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349,
12409, 12044, 11765, 11886), mAK80 = c(6896, 7597, 8494, 8809,
8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537
), mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316,
3915, 4068, 4985, 5747, 6196, 6776), mAK90 = c(524, 656, 814,
975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557),
wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868,
2847, 2942, 3304, 3459, 3432, 3379), wAK55 = c(2899, 3086,
3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636,
6977, 6782), wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565,
8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627), wAK65 = c(10012,
9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110,
14199, 15512, 15846, 15901), wAK70 = c(17834, 18331, 18951,
18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579,
19436, 19140), wAK75 = c(21610, 22108, 23521, 24417, 25463,
24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354
), wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890,
20401, 18762, 19966, 23731, 25391, 25097, 23846), wAK85 = c(8926,
9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213,
11170, 12460, 13083, 13498), wAK90 = c(1906, 2274, 2676,
3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398,
3492)), row.names = c(NA, -14L), class = c("tbl_df", "tbl",
"data.frame"))
m/w代表men/women,AK50代表一定年龄class(AK 50=50岁及以下...)
我已将其转换为分组时间序列,使用的字符为:
library (hts)
gts1<-ts(data, start=2005,end=2018)
gts<-gts(gts1, characters = list(1, 4))
现在我想使用以下代码对 ARIMA 模型进行一些预测(32 年):
install.packages("remotes")
remotes::install_github("VaughanR0/Streamline-R")
fcsts = hts2::forecast.gts(gts, h = 32, method = "comb", weights ="wls",keep.intervals=TRUE, fmethod = "arima")
这是我的输出:
summary(fcsts)
Grouped Time Series
4 Levels
Number of groups at each level: 1 2 9 18
Total number of series: 30
Number of observations in each historical series: 14
Number of forecasts per series: 32
Top level series of forecasts:
Time Series:
Start = 2019
End = 2050
Frequency = 1
[1] 191409.5 191828.0 192924.3 195538.0 199757.3 204983.4 210277.1....
Method: Bottom-up forecasts
Forecast method: Arima
现在我想获得 Top 系列的预测区间。我试过 "fcsts$upper" 但我只得到底部系列的间隔...
谁能帮帮我?
hts
包不计算预测区间。我们编写了一个新包 fable
,它可以满足您的需求,并且更易于用于分层和分组时间序列。这是使用您的数据的示例。
library(tidyverse)
library(tsibble)
#>
#> Attaching package: 'tsibble'
#> The following object is masked from 'package:dplyr':
#>
#> id
library(fable)
#> Loading required package: fabletools
# Create tsibble
data <- tibble(
year = 2005:2018,
mAK50 = c(1294, 1417, 1690, 1827, 1973, 2076, 2196, 2127, 2078, 2033, 2260, 2309, 2265, 2105),
mAK55 = c(1820, 1921, 2295, 2556, 2820, 3044, 3144, 3366, 3443, 3452, 4144, 4622, 4773, 4854),
mAK60 = c(3401, 3895, 4634, 5078, 5527, 5570, 5549, 5967, 5670, 6104, 7466, 8193, 8672, 8963),
mAK65 = c(5858, 5746, 6045, 6577, 7002, 7411, 8202, 8239, 7913, 8547, 9642, 10885, 11536, 11896),
mAK70 = c(8832, 9423, 10374, 10639, 10633, 9838, 8931, 8442, 7597, 8091, 9750, 11050, 11999, 11815),
mAK75 = c(8788, 9304, 10459, 11598, 12378, 12667, 12871, 12720, 11261, 11349, 12409, 12044, 11765, 11886),
mAK80 = c(6896, 7597, 8494, 8809, 8679, 9086, 9274, 9317, 9155, 10212, 12683, 13813, 14034, 13537),
mAK85 = c(2595, 2827, 3256, 3702, 4087, 4186, 4446, 4316, 3915, 4068, 4985, 5747, 6196, 6776),
mAK90 = c(524, 656, 814, 975, 951, 963, 995, 1046, 1064, 1134, 1302, 1460, 1499, 1557),
wAK50 = c(1630, 1718, 2108, 2334, 2537, 2578, 2797, 2868, 2847, 2942, 3304, 3459, 3432, 3379),
wAK55 = c(2899, 3086, 3613, 3837, 4250, 4508, 4569, 4809, 4837, 4856, 5886, 6636, 6977, 6782),
wAK60 = c(5800, 6661, 7377, 8250, 8377, 8565, 8662, 8633, 8115, 8640, 10007, 11020, 11546, 11627),
wAK65 = c(10012, 9838, 10020, 10572, 11102, 11689, 12467, 12637, 11815, 12110, 14199, 15512, 15846, 15901),
wAK70 = c(17834, 18331, 18951, 18934, 18610, 16847, 14967, 14113, 12781, 13552, 16253, 18579, 19436, 19140),
wAK75 = c(21610, 22108, 23521, 24417, 25463, 24652, 24424, 22900, 20285, 20136, 21084, 20680, 19711, 19354),
wAK80 = c(18295, 19213, 20224, 20890, 20929, 20803, 20890, 20401, 18762, 19966, 23731, 25391, 25097, 23846),
wAK85 = c(8926, 9372, 10005, 10664, 10693, 10510, 10713, 9878, 8918, 9213, 11170, 12460, 13083, 13498),
wAK90 = c(1906, 2274, 2676, 3044, 3125, 3105, 3107, 2996, 2567, 2720, 3029, 3457, 3398, 3492),
) %>%
pivot_longer(-year, names_to = "group", values_to = "value") %>%
mutate(
sex = stringr::str_sub(group, 1, 1),
age = stringr::str_sub(group, 4, 5),
) %>%
select(-group) %>%
as_tsibble(index=year, key=c(sex,age))
data
#> # A tsibble: 252 x 4 [1Y]
#> # Key: sex, age [18]
#> year value sex age
#> <int> <dbl> <chr> <chr>
#> 1 2005 1294 m 50
#> 2 2006 1417 m 50
#> 3 2007 1690 m 50
#> 4 2008 1827 m 50
#> 5 2009 1973 m 50
#> 6 2010 2076 m 50
#> 7 2011 2196 m 50
#> 8 2012 2127 m 50
#> 9 2013 2078 m 50
#> 10 2014 2033 m 50
#> # … with 242 more rows
fcsts <- data %>%
# Specify group structure and create aggregates
aggregate_key(sex * age, value = sum(value)) %>%
# Fit models
model(arima = ARIMA(value)) %>%
# Set up reconciliation
mutate(mint = min_trace(arima)) %>%
# Produce the forecasts
forecast(h = 32)
#> New names:
#> * `` -> ...1
#> * `` -> ...2
#> * `` -> ...3
#> * `` -> ...4
#> * `` -> ...5
#> * ...
# Create 95% intervals for top level
fcsts %>%
hilo(level=95) %>%
filter(is_aggregated(sex) & is_aggregated(age))
#> # A tsibble: 64 x 6 [1Y]
#> # Key: sex, age, .model [2]
#> sex age .model year value `95%`
#> <chr> <chr> <chr> <dbl> <dbl> <hilo>
#> 1 <aggregated> <aggregated> arima 2019 187774. [173317.2, 202230.3]95
#> 2 <aggregated> <aggregated> arima 2020 186021. [155948.3, 216094.4]95
#> 3 <aggregated> <aggregated> arima 2021 185864. [143996.2, 227731.0]95
#> 4 <aggregated> <aggregated> arima 2022 186589. [137523.8, 235655.0]95
#> 5 <aggregated> <aggregated> arima 2023 187265. [133768.8, 240760.3]95
#> 6 <aggregated> <aggregated> arima 2024 187467. [130517.5, 244415.5]95
#> 7 <aggregated> <aggregated> arima 2025 187303. [126897.4, 247709.1]95
#> 8 <aggregated> <aggregated> arima 2026 187070. [122946.0, 251194.2]95
#> 9 <aggregated> <aggregated> arima 2027 186958. [119048.8, 254866.4]95
#> 10 <aggregated> <aggregated> arima 2028 186979. [115480.3, 258477.4]95
#> # … with 54 more rows
由 reprex package (v0.3.0)
于 2020-04-29 创建