factor season 有新的级别 4 ,当在 R 中按组执行 Arima 时
factor season has new levels 4 , when performing Arima by group in R
这里是我的数据集示例
ts=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
我想按组使用 Arima 模型执行时间序列
#if using dummy
fun_tslm <- function(x, start = "2017-01-04", freq = 12){
tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
#View(tsw)
mytslm <- tslm(tsw ~ trend + season)
mytslm
}
fun_forecast <- function(x, h = 14){
residarima1 <- auto.arima(x[["residuals"]])
residualsArimaForecast <- forecast(residarima1, h = h)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(x, h = h)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF + residualsF
forecastR
}
tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)
当我运行这个脚本
我收到错误
Error in model.frame.default(Terms, newdata, na.action = na.action,
xlev = object$xlevels) : factor season has new levels 4
但实际上我想在我可以看到的地方获得带有 Arima 指标的输出
1.forecast初始值
2.forecast 14 个月 CI
初始值和预测值的输出应该是两个不同的data.frame
。
怎么做?
你的脚本和数据中有些部分不是很清楚,所以我可以尝试给你一个部分的答案,看看如何得到你想要的结果:
# I called your dataset in this way, because ts is a function
timeseries
现在的想法是将您的数据框转换为列表,列表的每个组件都是一个组,即时间序列。我想象每个组都是客户+东西,但我们可以用不同的方式来管理它:
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# EDIT here you convert the Data class as class(date)
library(lubridate)
timeseries$Data <- dmy(timeseries$Data)
# now the list
listed <- split(timeseries,timeseries$group)
现在我们必须将列表的每个组成部分定义为时间序列,使用 lapply
和 ts
函数:
# I do not understand why all your ts start with "2017-01-04", when in the example they are not (probably because it's an example)
# EDIT: convert the start date
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12) )
listed_ts
$`Hornschickens`
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374 12 120 242 227 268 280 419 12 172 336
$Hornshooves
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522 65 268 441 247 348 445 477 62 226 476
17171 306
$KornevOysters
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150 90 117 116 146 120 211 213 67 146
17171 118 152 122 201
下一步是auto.arima
每个时间序列,用lapply
逻辑:
library(forecast)
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
# partial result
> listed_arima
$`Hornschickens`
Series: x
ARIMA(0,0,0) with non-zero mean
Coefficients:
mean
223.8182
s.e. 38.7707
sigma^2 estimated as 18188: log likelihood=-69.03
AIC=142.06 AICc=143.56 BIC=142.86
...
现在每个华宇的预测:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,1) )
如果您需要将其简化为 data.frame、do.call
和 rbind
帮助:
do.call(rbind,listed_forecast)
method model level mean lower upper x series fitted residuals
Hornschickens "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 223.8182 Numeric,2 Numeric,2 Integer,11 "x" Numeric,11 Numeric,11
Hornshooves "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 336.9231 Numeric,2 Numeric,2 Integer,13 "x" Numeric,13 Numeric,13
KornevOysters "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 137.125 Numeric,2 Numeric,2 Integer,16 "x" Numeric,16 Numeric,16
我认为你可以稍微扭转一下以获得更好的结果。如果你想知道为什么对于这个例子,如果你在 auto.arima
函数中放入多个 1 来预测,但结果是一个常数,答案是 here,[=] 也指出23=] 输出列。
这里是我的数据集示例
ts=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L,
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L,
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L,
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018",
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020",
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018",
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019",
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020",
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017",
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019",
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018",
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"),
client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens",
"hooves", "Oysters"), class = "factor"), Sales = c(374L,
12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L,
117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L,
67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L,
441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data",
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA,
-40L))
我想按组使用 Arima 模型执行时间序列
#if using dummy
fun_tslm <- function(x, start = "2017-01-04", freq = 12){
tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
#View(tsw)
mytslm <- tslm(tsw ~ trend + season)
mytslm
}
fun_forecast <- function(x, h = 14){
residarima1 <- auto.arima(x[["residuals"]])
residualsArimaForecast <- forecast(residarima1, h = h)
residualsF <- as.numeric(residualsArimaForecast$mean)
regressionForecast <- forecast(x, h = h)
regressionF <- as.numeric(regressionForecast$mean)
forecastR <- regressionF + residualsF
forecastR
}
tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)
当我运行这个脚本 我收到错误
Error in model.frame.default(Terms, newdata, na.action = na.action, xlev = object$xlevels) : factor season has new levels 4
但实际上我想在我可以看到的地方获得带有 Arima 指标的输出 1.forecast初始值
2.forecast 14 个月 CI
初始值和预测值的输出应该是两个不同的data.frame
。
怎么做?
你的脚本和数据中有些部分不是很清楚,所以我可以尝试给你一个部分的答案,看看如何得到你想要的结果:
# I called your dataset in this way, because ts is a function
timeseries
现在的想法是将您的数据框转换为列表,列表的每个组件都是一个组,即时间序列。我想象每个组都是客户+东西,但我们可以用不同的方式来管理它:
# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)
# EDIT here you convert the Data class as class(date)
library(lubridate)
timeseries$Data <- dmy(timeseries$Data)
# now the list
listed <- split(timeseries,timeseries$group)
现在我们必须将列表的每个组成部分定义为时间序列,使用 lapply
和 ts
函数:
# I do not understand why all your ts start with "2017-01-04", when in the example they are not (probably because it's an example)
# EDIT: convert the start date
listed_ts <- lapply(listed,
function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12) )
listed_ts
$`Hornschickens`
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374 12 120 242 227 268 280 419 12 172 336
$Hornshooves
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522 65 268 441 247 348 445 477 62 226 476
17171 306
$KornevOysters
Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150 90 117 116 146 120 211 213 67 146
17171 118 152 122 201
下一步是auto.arima
每个时间序列,用lapply
逻辑:
library(forecast)
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
# partial result
> listed_arima
$`Hornschickens`
Series: x
ARIMA(0,0,0) with non-zero mean
Coefficients:
mean
223.8182
s.e. 38.7707
sigma^2 estimated as 18188: log likelihood=-69.03
AIC=142.06 AICc=143.56 BIC=142.86
...
现在每个华宇的预测:
listed_forecast <- lapply(listed_arima,function(x) forecast(x,1) )
如果您需要将其简化为 data.frame、do.call
和 rbind
帮助:
do.call(rbind,listed_forecast)
method model level mean lower upper x series fitted residuals
Hornschickens "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 223.8182 Numeric,2 Numeric,2 Integer,11 "x" Numeric,11 Numeric,11
Hornshooves "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 336.9231 Numeric,2 Numeric,2 Integer,13 "x" Numeric,13 Numeric,13
KornevOysters "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 137.125 Numeric,2 Numeric,2 Integer,16 "x" Numeric,16 Numeric,16
我认为你可以稍微扭转一下以获得更好的结果。如果你想知道为什么对于这个例子,如果你在 auto.arima
函数中放入多个 1 来预测,但结果是一个常数,答案是 here,[=] 也指出23=] 输出列。