R 中的时间序列预测;在初始预测后绘制 "events" 并生成具有指定日期范围的新预测图

Time series forecasting in R; plotting "events" and generating new forecasting plots with specified date range after initial forecast

我创建了一个函数,允许我使用 fable 包进行时间序列预测。该函数的想法是在特定 date/event 之后分析观察值与预测值。这是一个生成日期列的模拟数据框:-

set.seed(1)
df <- data.frame(Date = sort(sample(seq(as.Date('2018/01/01'), as.Date('2020/09/17'), by="day"),1368883, replace = T)))

这是我创建的函数。您指定数据,然后是事件日期,然后是预测期(以天为单位),最后是图表的标题。

event_analysis<-function(data,eventdate,period,title){
  require(dplyr)
  require(tsibble)
  require(fable)
  require(fabletools)
  require(imputeTS)
  require(ggplot2)
  data_count<-data%>%
    group_by(Date)%>%
    summarise(Count=n())
  
  data_count<-as_tsibble(data_count)
  data_count<-na_mean(data_count)
  
  
  train <- data_count %>%
    #sample_frac(0.8)
    filter(Date<=as.Date(eventdate))
  
  fit <- train %>%
    model(
      ets = ETS(Count),
      arima = ARIMA(Count),
      snaive = SNAIVE(Count)
    ) %>%
    mutate(mixed = (ets + arima + snaive) / 3)
  
  
  fc <- fit %>% forecast(h = period)
  
  forecastplot<-fc %>%
    autoplot(data_count, level = NULL)+ggtitle(title)+
    geom_vline(xintercept = as.Date(eventdate),linetype="dashed",color="red")+
    labs(caption = "Red dashed line = Event occurrence")
                                                                 
  
  fc_accuracy<-accuracy(fc,data_count)
  
  #obs<-data_count
  #colnames(obs)[2]<-"Observed"
  #obs_pred<-merge(data_count,fc_accuracy, by="Date")
  return(list(forecastplot,fc_accuracy,fc))
}

在一个 运行 中,我指定 df、事件日期、我要预测的天数(3 周),然后是标题:-

event_analysis(df, "2020-01-01",21,"Event forecast")

这将打印此结果并绘制:-

我承认我制作的模拟数据并不完全理想,但该函数在我的 real-world 数据上运行良好。

这是我想要实现的。我希望这个输出是从函数中产生的,但除此之外,我还想要一个额外的图表,它可以“放大”预测的时间段,原因有两个:-

  1. 为了便于解释
  2. 我希望能够看到之前的N天和N天事件日期之后(N代表预测期即21)。

所以,一个额外的图表(连同原始的完整预测)看起来像这样,也许在一个输出中,“多图”样式:-

另一件事是打印另一个输出,显示测试集中的观察值与预测中使用的模型的预测值。

这些基本上是我想添加到我的函数中的另外两件事,但我不确定如何去做。非常感谢任何帮助:) .

我想你可以这样重写它。我做了一些调整来帮助你。

set.seed(1)
df <- data.frame(Date = sort(sample(seq(as.Date('2018/01/01'), as.Date('2020/09/17'), by="day"),1368883, replace = T)))

event_analysis <- function(data, eventdate, period, title){
 
 # in the future, you may think to move them out
 library(dplyr)
 library(tsibble)
 library(fable)
 library(fabletools)
 library(imputeTS)
 library(ggplot2)
 
 # convert at the beginning
 eventdate <- as.Date(eventdate)
 
 # more compact sintax
 data_count <- count(data, Date, name = "Count")
 
 # better specify the date variable to avoid the message
 data_count <- as_tsibble(data_count, index = Date)
 
 # you need to complete missing dates, just in case
 data_count <- tsibble::fill_gaps(data_count)
 
 
 data_count <- na_mean(data_count)
 
 
 train <- data_count %>%
  filter(Date <= eventdate)
 
 test <- data_count %>%
  filter(Date > eventdate, Date <= (eventdate+period))
 
  fit <- train %>%
  model(
   ets    = ETS(Count),
   arima  = ARIMA(Count),
   snaive = SNAIVE(Count)
  ) %>%
  mutate(mixed = (ets + arima + snaive) / 3)
 
 
 fc <- fit %>% forecast(h = period)
 

 # your plot
 forecastplot <- fc %>%
  autoplot(data_count, level = NULL) + 
  ggtitle(title) +
  geom_vline(xintercept = as.Date(eventdate), linetype = "dashed", color = "red") +
  labs(caption = "Red dashed line = Event occurrence")
 
 
 # plot just forecast and test
 zoomfcstplot <- autoplot(fc) + autolayer(test, .vars = Count)
 
 fc_accuracy <- accuracy(fc,data_count)
 

 ### EDIT: ###

 # results vs test
 res <- fc %>% 
  as_tibble() %>% 
  select(-Count) %>% 
  tidyr::pivot_wider(names_from = .model, values_from = .mean) %>% 
  inner_join(test, by = "Date")

 ##############
 

 return(list(forecastplot = forecastplot,
             zoomplot     = zoomfcstplot,
             accuracy     = fc_accuracy,
             forecast     = fc,
             results      = res))
}


event_analysis(df, 
               eventdate = "2020-01-01",
               period    = 21,
               title     = "Event forecast")


输出:

#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> Carico il pacchetto richiesto: fabletools
#> Registered S3 method overwritten by 'quantmod':
#>   method            from
#>   as.zoo.data.frame zoo
#> $forecastplot

#> 
#> $zoomplot

#> 
#> $accuracy
#> # A tibble: 4 x 9
#>   .model .type    ME  RMSE   MAE   MPE  MAPE  MASE    ACF1
#>   <chr>  <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
#> 1 arima  Test  -16.8  41.8  35.2 -1.31  2.61 0.791  0.164 
#> 2 ets    Test  -16.8  41.8  35.2 -1.31  2.61 0.791  0.164 
#> 3 mixed  Test  -21.9  44.7  36.8 -1.68  2.73 0.825 -0.0682
#> 4 snaive Test  -32.1  57.3  46.6 -2.43  3.45 1.05  -0.377 
#> 
#> $forecast
#> # A fable: 84 x 4 [1D]
#> # Key:     .model [4]
#>    .model Date               Count .mean
#>    <chr>  <date>            <dist> <dbl>
#>  1 ets    2020-01-02 N(1383, 1505) 1383.
#>  2 ets    2020-01-03 N(1383, 1505) 1383.
#>  3 ets    2020-01-04 N(1383, 1505) 1383.
#>  4 ets    2020-01-05 N(1383, 1505) 1383.
#>  5 ets    2020-01-06 N(1383, 1505) 1383.
#>  6 ets    2020-01-07 N(1383, 1505) 1383.
#>  7 ets    2020-01-08 N(1383, 1505) 1383.
#>  8 ets    2020-01-09 N(1383, 1505) 1383.
#>  9 ets    2020-01-10 N(1383, 1505) 1383.
#> 10 ets    2020-01-11 N(1383, 1505) 1383.
#> # ... with 74 more rows
#>
#> $results
#> # A tibble: 21 x 6
#>    Date         ets arima snaive mixed Count
#>    <date>     <dbl> <dbl>  <dbl> <dbl> <int>
#>  1 2020-01-02 1383. 1383.   1386 1384.  1350
#>  2 2020-01-03 1383. 1383.   1366 1377.  1398
#>  3 2020-01-04 1383. 1383.   1426 1397.  1357
#>  4 2020-01-05 1383. 1383.   1398 1388.  1415
#>  5 2020-01-06 1383. 1383.   1431 1399.  1399
#>  6 2020-01-07 1383. 1383.   1431 1399.  1346
#>  7 2020-01-08 1383. 1383.   1350 1372.  1299
#>  8 2020-01-09 1383. 1383.   1386 1384.  1303
#>  9 2020-01-10 1383. 1383.   1366 1377.  1365
#> 10 2020-01-11 1383. 1383.   1426 1397.  1328
#> # ... with 11 more rows