R timeseries : lapply(listed_ts, function(x) auto.arima(x, allowmean = F)) 错误:找不到对象 'listed_ts'
R timeseries :Error in lapply(listed_ts, function(x) auto.arima(x, allowmean = F)) : object 'listed_ts' not found
我想对每个 sales_point_id 分别进行每周时间序列分析,其中包含事实值和预测结果。
dput()
timeseries=structure(list(sales_point_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), calendar_id_operday = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), line_fact_amt = c(55767L,
59913L, 36363L, 48558L, 505L, 76344L, 22533L, 11965L, 78944L,
36754L, 30621L, 55716L, 32470L, 62165L, 57986L, 2652L, 16487L,
72849L, 73715L, 65656L, 64411L, 47460L, 61866L, 10877L, 72392L,
53011L, 23544L, 76692L, 10388L, 24255L, 56684L, 59329L, 6655L,
65612L, 17495L, 10389L, 63702L, 47407L, 78782L, 22898L, 21151L,
32587L)), class = "data.frame", row.names = c(NA, -42L))
我需要每周预测和 week=1 其平均值 20210101-20210108(ymd) 但这里没有日期格式只有一周,这些数据的特殊性。
这是我徒劳的尝试
library("lubridate")
# first the grouping variable
timeseries$group <- paste0(timeseries$sales_point_id)
groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements
timeseries$date <- as.Date(as.character(timeseries$calendar_id_operday), )
timeseries <- timeseries[order(timeseries$calendar_id_operday),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], )
start_dates <- strsplit(start_dates, ' ')
listed <- split(timeseries,timeseries$group)
str(listed)
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x,allowmean = F ))
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,12) )# forecast 12 weeks ahead
listed_forecast
# need to flat it down to a data.frame, do.call and rbind help:
do.call(rbind,listed_forecast)
#Get a prediction of initial values
lapply(listed_arima, fitted)
Error in lapply(listed_ts, function(x) auto.arima(x, allowmean = F))
我做错了什么以及如何修复以纠正时间序列的工作。
我理想和期望的结果只是示例输出格式。
sales_point_id calendar_id_operday line_fact_amt.fact
1 1 1 436
2 1 2 56
3 1 3 66
4 1 4 NaN
5 1 5 NaN
6 1 6 NaN
7 1 7 NaN
8 1 8 NaN
9 1 9 NaN
10 1 10 NaN
11 1 11 NaN
12 1 12 NaN
13 1 13 NaN
14 1 14 NaN
15 1 15 NaN
line_fact_amt.predict forecast.ahead
1 435 NaN
2 57 NaN
3 70 NaN
4 NaN 524
5 NaN 945
6 NaN 235
7 NaN 200
8 NaN 326
9 NaN 437
10 NaN 7
11 NaN 191
12 NaN 321
13 NaN 919
14 NaN 407
15 NaN 82
我一如既往地感谢您的帮助。
如果您使用 fable
包而不是 forecast
包,您会发现生活会容易得多。它可以更好地处理每周数据,并且可以同时预测多个系列。
这是一个使用您的数据的示例。首先,我们将数据变成一个tsibble对象,这是fable
所需要的构造。它就像一个数据框,除了它有一个时间索引和一个可选的 key
来定义单独的时间序列。
library(fable)
library(tsibble)
library(lubridate)
library(dplyr)
# Turn data frame into tsibble
timeseries <- timeseries %>%
mutate(week = yearweek(paste0("2021-W",calendar_id_operday))) %>%
select(week, sales_point_id, line_fact_amt) %>%
as_tsibble(index=week, key=sales_point_id)
timeseries
#> # A tsibble: 42 x 3 [1W]
#> # Key: sales_point_id [2]
#> week sales_point_id line_fact_amt
#> <week> <int> <int>
#> 1 2021 W01 1 55767
#> 2 2021 W02 1 59913
#> 3 2021 W03 1 36363
#> 4 2021 W04 1 48558
#> 5 2021 W05 1 505
#> 6 2021 W06 1 76344
#> 7 2021 W07 1 22533
#> 8 2021 W08 1 11965
#> 9 2021 W09 1 78944
#> 10 2021 W10 1 36754
#> # … with 32 more rows
然后我们将 ARIMA 模型拟合到每个系列,生成未来 12 周的预测,并根据要求将拟合值与预测相结合。 .mean
列包含点预测。
# Fit an ARIMA model for each group
arima <- timeseries %>%
model(arima = ARIMA(line_fact_amt))
# Forecast h steps ahead
fc <- forecast(arima, h = 12)
# Create tsibble with fitted values and forecasts
bind_rows(augment(arima), fc)
#> # A tsibble: 66 x 8 [1W]
#> # Key: sales_point_id, .model [2]
#> sales_point_id .model week line_fact_amt .fitted .resid .innov .mean
#> <int> <chr> <week> <dist> <dbl> <dbl> <dbl> <dbl>
#> 1 1 arima 2021 W01 55767 45827. 9940. 9940. NA
#> 2 1 arima 2021 W02 59913 45827. 14086. 14086. NA
#> 3 1 arima 2021 W03 36363 45827. -9464. -9464. NA
#> 4 1 arima 2021 W04 48558 45827. 2731. 2731. NA
#> 5 1 arima 2021 W05 505 45827. -45322. -45322. NA
#> 6 1 arima 2021 W06 76344 45827. 30517. 30517. NA
#> 7 1 arima 2021 W07 22533 45827. -23294. -23294. NA
#> 8 1 arima 2021 W08 11965 45827. -33862. -33862. NA
#> 9 1 arima 2021 W09 78944 45827. 33117. 33117. NA
#> 10 1 arima 2021 W10 36754 45827. -9073. -9073. NA
#> # … with 56 more rows
由 reprex package (v2.0.1)
于 2022-03-01 创建
有关更多信息,请参阅我在 OTexts.com/fpp3 的教科书。
我想对每个 sales_point_id 分别进行每周时间序列分析,其中包含事实值和预测结果。
dput()
timeseries=structure(list(sales_point_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), calendar_id_operday = c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), line_fact_amt = c(55767L,
59913L, 36363L, 48558L, 505L, 76344L, 22533L, 11965L, 78944L,
36754L, 30621L, 55716L, 32470L, 62165L, 57986L, 2652L, 16487L,
72849L, 73715L, 65656L, 64411L, 47460L, 61866L, 10877L, 72392L,
53011L, 23544L, 76692L, 10388L, 24255L, 56684L, 59329L, 6655L,
65612L, 17495L, 10389L, 63702L, 47407L, 78782L, 22898L, 21151L,
32587L)), class = "data.frame", row.names = c(NA, -42L))
我需要每周预测和 week=1 其平均值 20210101-20210108(ymd) 但这里没有日期格式只有一周,这些数据的特殊性。 这是我徒劳的尝试
library("lubridate")
# first the grouping variable
timeseries$group <- paste0(timeseries$sales_point_id)
groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements
timeseries$date <- as.Date(as.character(timeseries$calendar_id_operday), )
timeseries <- timeseries[order(timeseries$calendar_id_operday),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], )
start_dates <- strsplit(start_dates, ' ')
listed <- split(timeseries,timeseries$group)
str(listed)
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x,allowmean = F ))
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,12) )# forecast 12 weeks ahead
listed_forecast
# need to flat it down to a data.frame, do.call and rbind help:
do.call(rbind,listed_forecast)
#Get a prediction of initial values
lapply(listed_arima, fitted)
Error in lapply(listed_ts, function(x) auto.arima(x, allowmean = F))
我做错了什么以及如何修复以纠正时间序列的工作。 我理想和期望的结果只是示例输出格式。
sales_point_id calendar_id_operday line_fact_amt.fact
1 1 1 436
2 1 2 56
3 1 3 66
4 1 4 NaN
5 1 5 NaN
6 1 6 NaN
7 1 7 NaN
8 1 8 NaN
9 1 9 NaN
10 1 10 NaN
11 1 11 NaN
12 1 12 NaN
13 1 13 NaN
14 1 14 NaN
15 1 15 NaN
line_fact_amt.predict forecast.ahead
1 435 NaN
2 57 NaN
3 70 NaN
4 NaN 524
5 NaN 945
6 NaN 235
7 NaN 200
8 NaN 326
9 NaN 437
10 NaN 7
11 NaN 191
12 NaN 321
13 NaN 919
14 NaN 407
15 NaN 82
我一如既往地感谢您的帮助。
如果您使用 fable
包而不是 forecast
包,您会发现生活会容易得多。它可以更好地处理每周数据,并且可以同时预测多个系列。
这是一个使用您的数据的示例。首先,我们将数据变成一个tsibble对象,这是fable
所需要的构造。它就像一个数据框,除了它有一个时间索引和一个可选的 key
来定义单独的时间序列。
library(fable)
library(tsibble)
library(lubridate)
library(dplyr)
# Turn data frame into tsibble
timeseries <- timeseries %>%
mutate(week = yearweek(paste0("2021-W",calendar_id_operday))) %>%
select(week, sales_point_id, line_fact_amt) %>%
as_tsibble(index=week, key=sales_point_id)
timeseries
#> # A tsibble: 42 x 3 [1W]
#> # Key: sales_point_id [2]
#> week sales_point_id line_fact_amt
#> <week> <int> <int>
#> 1 2021 W01 1 55767
#> 2 2021 W02 1 59913
#> 3 2021 W03 1 36363
#> 4 2021 W04 1 48558
#> 5 2021 W05 1 505
#> 6 2021 W06 1 76344
#> 7 2021 W07 1 22533
#> 8 2021 W08 1 11965
#> 9 2021 W09 1 78944
#> 10 2021 W10 1 36754
#> # … with 32 more rows
然后我们将 ARIMA 模型拟合到每个系列,生成未来 12 周的预测,并根据要求将拟合值与预测相结合。 .mean
列包含点预测。
# Fit an ARIMA model for each group
arima <- timeseries %>%
model(arima = ARIMA(line_fact_amt))
# Forecast h steps ahead
fc <- forecast(arima, h = 12)
# Create tsibble with fitted values and forecasts
bind_rows(augment(arima), fc)
#> # A tsibble: 66 x 8 [1W]
#> # Key: sales_point_id, .model [2]
#> sales_point_id .model week line_fact_amt .fitted .resid .innov .mean
#> <int> <chr> <week> <dist> <dbl> <dbl> <dbl> <dbl>
#> 1 1 arima 2021 W01 55767 45827. 9940. 9940. NA
#> 2 1 arima 2021 W02 59913 45827. 14086. 14086. NA
#> 3 1 arima 2021 W03 36363 45827. -9464. -9464. NA
#> 4 1 arima 2021 W04 48558 45827. 2731. 2731. NA
#> 5 1 arima 2021 W05 505 45827. -45322. -45322. NA
#> 6 1 arima 2021 W06 76344 45827. 30517. 30517. NA
#> 7 1 arima 2021 W07 22533 45827. -23294. -23294. NA
#> 8 1 arima 2021 W08 11965 45827. -33862. -33862. NA
#> 9 1 arima 2021 W09 78944 45827. 33117. 33117. NA
#> 10 1 arima 2021 W10 36754 45827. -9073. -9073. NA
#> # … with 56 more rows
由 reprex package (v2.0.1)
于 2022-03-01 创建有关更多信息,请参阅我在 OTexts.com/fpp3 的教科书。