在 R 中按组使用 auto.arima() 将数字日期格式转换为正确的日期格式
convert numeric date format into correct date format using auto.arima() by group in R
让我们处理这个数据样本
timeseries<-structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
我想使用 auto.arima 按组
执行预测
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# now the list
listed <- split(timeseries,timeseries$group)
library("forecast")
library("lubridate")
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12) )
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,2) )
listed_forecast
do.call(rbind,listed_forecast)
在listed_forecast
之后我得到下一个输出
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374 12 120 242 227 268 280 419 12 172 336
$Hornshooves
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522 65 268 441 247 348 445 477 62 226 476
17171 306
$KornevOysters
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150 90 117 116 146 120 211 213 67 146
17171 118 152 122 201
17170 的日期格式不正确
所以 as.numeric(ymd("2017-01-04"))
向我们展示了 17170
如何得到正确日期格式的输出?
我想要这样的正常日期格式
$Hornschickens
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Dec 2017 223.8182 50.98365 396.6527 -40.50942 488.1458
Jan 2018 223.8182 50.98365 396.6527 -40.50942 488.1458
所以我假设预测中的 17170 意味着 2017,17171=2018
但是转换成阅读格式有什么麻烦
我看到下一个逻辑
是否可以将这些数值自动转换为日期
其中
1970-01-01 (Y-m-d) 为 0
1970-01-02 是 1
“2018-12-25”是 17890
但是像我的示例中那样按月输入数据汇总时
dec 17890 如此简单地转换为 dec 2018 导致 17890 在 2018 年 12 月的范围内。
listed_ts
中的 start
参数不应该是日期:
start --- the time of the first observation. Either a single number or
a vector of two integers, which specify a natural time unit and a
(1-based) number of samples into the time unit. See the examples for
the use of the second form.
(来自 ?ts
。)因此,使用 start = c(2017, 1)
即可。
让我们处理这个数据样本
timeseries<-structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
我想使用 auto.arima 按组
执行预测# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# now the list
listed <- split(timeseries,timeseries$group)
library("forecast")
library("lubridate")
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12) )
listed_ts
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
#Now the forecast for each arima:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,2) )
listed_forecast
do.call(rbind,listed_forecast)
在listed_forecast
之后我得到下一个输出
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374 12 120 242 227 268 280 419 12 172 336
$Hornshooves
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522 65 268 441 247 348 445 477 62 226 476
17171 306
$KornevOysters
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150 90 117 116 146 120 211 213 67 146
17171 118 152 122 201
17170 的日期格式不正确
所以 as.numeric(ymd("2017-01-04"))
向我们展示了 17170
如何得到正确日期格式的输出? 我想要这样的正常日期格式
$Hornschickens
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Dec 2017 223.8182 50.98365 396.6527 -40.50942 488.1458
Jan 2018 223.8182 50.98365 396.6527 -40.50942 488.1458
所以我假设预测中的 17170 意味着 2017,17171=2018 但是转换成阅读格式有什么麻烦
我看到下一个逻辑 是否可以将这些数值自动转换为日期
其中
1970-01-01 (Y-m-d) 为 0
1970-01-02 是 1
“2018-12-25”是 17890
但是像我的示例中那样按月输入数据汇总时
dec 17890 如此简单地转换为 dec 2018 导致 17890 在 2018 年 12 月的范围内。
listed_ts
中的 start
参数不应该是日期:
start --- the time of the first observation. Either a single number or a vector of two integers, which specify a natural time unit and a (1-based) number of samples into the time unit. See the examples for the use of the second form.
(来自 ?ts
。)因此,使用 start = c(2017, 1)
即可。