R timeseries : lapply(listed_ts, function(x) auto.arima(x, allowmean = F)) 错误：找不到对象 'listed_ts'

Question

我想对每个 sales_point_id 分别进行每周时间序列分析，其中包含事实值和预测结果。

dput()

timeseries=structure(list(sales_point_id = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), calendar_id_operday = c(1L, 2L, 3L, 4L, 5L, 6L, 
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 
20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), line_fact_amt = c(55767L, 
59913L, 36363L, 48558L, 505L, 76344L, 22533L, 11965L, 78944L, 
36754L, 30621L, 55716L, 32470L, 62165L, 57986L, 2652L, 16487L, 
72849L, 73715L, 65656L, 64411L, 47460L, 61866L, 10877L, 72392L, 
53011L, 23544L, 76692L, 10388L, 24255L, 56684L, 59329L, 6655L, 
65612L, 17495L, 10389L, 63702L, 47407L, 78782L, 22898L, 21151L, 
32587L)), class = "data.frame", row.names = c(NA, -42L))

我需要每周预测和 week=1 其平均值 20210101-20210108(ymd) 但这里没有日期格式只有一周，这些数据的特殊性。这是我徒劳的尝试

library("lubridate")
# first the grouping variable
timeseries$group <- paste0(timeseries$sales_point_id)

groups <- unique(timeseries$group)
# find starting date per group and save them as a list of elements
timeseries$date <- as.Date(as.character(timeseries$calendar_id_operday), )
timeseries <- timeseries[order(timeseries$calendar_id_operday),]
start_dates <- format(timeseries$date[match(groups, timeseries$group)], )
start_dates <- strsplit(start_dates, ' ')

listed <- split(timeseries,timeseries$group)
str(listed)

listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x,allowmean = F )) 
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,12) )# forecast 12 weeks ahead
listed_forecast

# need to flat it down to a data.frame, do.call and rbind help:
do.call(rbind,listed_forecast)


#Get a prediction of initial values

lapply(listed_arima, fitted)


 Error in lapply(listed_ts, function(x) auto.arima(x, allowmean = F))

我做错了什么以及如何修复以纠正时间序列的工作。我理想和期望的结果只是示例输出格式。

   sales_point_id calendar_id_operday line_fact_amt.fact
1               1                   1                436
2               1                   2                 56
3               1                   3                 66
4               1                   4                NaN
5               1                   5                NaN
6               1                   6                NaN
7               1                   7                NaN
8               1                   8                NaN
9               1                   9                NaN
10              1                  10                NaN
11              1                  11                NaN
12              1                  12                NaN
13              1                  13                NaN
14              1                  14                NaN
15              1                  15                NaN
   line_fact_amt.predict forecast.ahead
1                    435            NaN
2                     57            NaN
3                     70            NaN
4                    NaN            524
5                    NaN            945
6                    NaN            235
7                    NaN            200
8                    NaN            326
9                    NaN            437
10                   NaN              7
11                   NaN            191
12                   NaN            321
13                   NaN            919
14                   NaN            407
15                   NaN             82

我一如既往地感谢您的帮助。

Answer 1

如果您使用 fable 包而不是 forecast 包，您会发现生活会容易得多。它可以更好地处理每周数据，并且可以同时预测多个系列。

这是一个使用您的数据的示例。首先，我们将数据变成一个tsibble对象，这是fable所需要的构造。它就像一个数据框，除了它有一个时间索引和一个可选的 key 来定义单独的时间序列。

library(fable)
library(tsibble)
library(lubridate)
library(dplyr)

# Turn data frame into tsibble
timeseries <- timeseries %>%
  mutate(week = yearweek(paste0("2021-W",calendar_id_operday))) %>%
  select(week, sales_point_id, line_fact_amt) %>%
  as_tsibble(index=week, key=sales_point_id)
timeseries
#> # A tsibble: 42 x 3 [1W]
#> # Key:       sales_point_id [2]
#>        week sales_point_id line_fact_amt
#>      <week>          <int>         <int>
#>  1 2021 W01              1         55767
#>  2 2021 W02              1         59913
#>  3 2021 W03              1         36363
#>  4 2021 W04              1         48558
#>  5 2021 W05              1           505
#>  6 2021 W06              1         76344
#>  7 2021 W07              1         22533
#>  8 2021 W08              1         11965
#>  9 2021 W09              1         78944
#> 10 2021 W10              1         36754
#> # … with 32 more rows

然后我们将 ARIMA 模型拟合到每个系列，生成未来 12 周的预测，并根据要求将拟合值与预测相结合。 .mean 列包含点预测。

# Fit an ARIMA model for each group
arima <- timeseries %>%
  model(arima = ARIMA(line_fact_amt))

# Forecast h steps ahead
fc <- forecast(arima, h = 12)

# Create tsibble with fitted values and forecasts
bind_rows(augment(arima), fc)
#> # A tsibble: 66 x 8 [1W]
#> # Key:       sales_point_id, .model [2]
#>    sales_point_id .model     week line_fact_amt .fitted  .resid  .innov .mean
#>             <int> <chr>    <week>        <dist>   <dbl>   <dbl>   <dbl> <dbl>
#>  1              1 arima  2021 W01         55767  45827.   9940.   9940.    NA
#>  2              1 arima  2021 W02         59913  45827.  14086.  14086.    NA
#>  3              1 arima  2021 W03         36363  45827.  -9464.  -9464.    NA
#>  4              1 arima  2021 W04         48558  45827.   2731.   2731.    NA
#>  5              1 arima  2021 W05           505  45827. -45322. -45322.    NA
#>  6              1 arima  2021 W06         76344  45827.  30517.  30517.    NA
#>  7              1 arima  2021 W07         22533  45827. -23294. -23294.    NA
#>  8              1 arima  2021 W08         11965  45827. -33862. -33862.    NA
#>  9              1 arima  2021 W09         78944  45827.  33117.  33117.    NA
#> 10              1 arima  2021 W10         36754  45827.  -9073.  -9073.    NA
#> # … with 56 more rows

^{由 reprex package (v2.0.1)}

于 2022-03-01 创建

有关更多信息，请参阅我在 OTexts.com/fpp3 的教科书。

R timeseries : lapply(listed_ts, function(x) auto.arima(x, allowmean = F)) 错误：找不到对象 'listed_ts'

R timeseries :Error in lapply(listed_ts, function(x) auto.arima(x, allowmean = F)) : object 'listed_ts' not found

r

forecasting

dplyr