此数据适合 ARIMA 建模吗?
Is this data fit for ARIMA modelling?
我使用以下数据开发了 Auto ARIMA 模型。但是看了结果后,我不明白这个数据是否适合 运行 ARIMA 模型。在第 3 个滞后对变量 Count 进行差分给出了显着的 p 值,并且 auto.arima 建议顺序 (3,0,0)。但结果预测值不是预期的,大部分是 negative 值。实际数据不包含任何 负数 值。我不明白是什么问题。该模型在统计上看起来是正确的,但预测值看起来并不好。非常感谢任何帮助。
数据:
dput(Enrollment_Data)
structure(list(COUNT = c(17L, 1L, 5L, 8L, 45L, 21L, 18L, 43L,
82L, 116L, 192L, 289L, 242L, 254L, 335L, 138L, 71L, 98L, 91L,
138L, 175L, 232L, 155L, 376L, 197L, 271L, 421L), Enrolment_date = structure(c(25L,
20L, 5L, 10L, 8L, 16L, 1L, 18L, 14L, 12L, 3L, 26L, 23L, 21L,
6L, 11L, 9L, 17L, 2L, 19L, 15L, 13L, 4L, 27L, 24L, 22L, 7L), .Label = c("APR2018",
"APR2019", "AUG2018", "AUG2019", "DEC2017", "DEC2018", "DEC2019",
"FEB2018", "FEB2019", "JAN2018", "JAN2019", "JUL2018", "JUL2019",
"JUN2018", "JUN2019", "MAR2018", "MAR2019", "MAY2018", "MAY2019",
"NOV2017", "NOV2018", "NOV2019", "OCT2018", "OCT2019", "SEP2017",
"SEP2018", "SEP2019"), class = "factor")), class = "data.frame", row.names = c(NA,
-27L))
代码:
Enrollment_Data <- read.csv('EnrollmentRateT0.csv')
print(Enrollment_Data)
dput(Enrollment_Data)
#load packages
library("tseries")
library("ggplot2")
library("forecast")
library(FitAR)
library("fUnitRoots")
library(lmtest)
library(fpp2)
attach(Enrollment_Data)
#Step-1 : Model Identification
#Stationarity Check - Dicky-Fuller test
#P-value > 0.5 Heance the data is non - stationary
d.COUNT <- diff(COUNT, differences = 3)
summary(COUNT)
summary(d.COUNT)
plot(d.COUNT)
adf.test(d.COUNT, alternative="stationary")
acf(d.COUNT)
pacf(d.COUNT)
#Step 2: Model Estimation
#Step 4: Diagnosis
auto.arima(d.COUNT)
auto.arima(d.COUNT, stepwise = FALSE, approximation = FALSE)
arima.final <-auto.arima(d.COUNT, stepwise = FALSE, approximation = FALSE, D=1)
tsdiag(arima.final)
arima.final
'Choose the one that has least AIC and significant co-efficients'
#arima.final <-arima(COUNT, c(3,3,1))
forecast1 <- forecast(arima.final,h = 12)
forecast1
plot.forecast(futurVal)
plot(forecast1)
class(forecast1)
print(forecast1)
summary(forecast1)
accuracy(forecast1)
plot(d.COUNT)
p <- predict(arima.final,n.ahead = 12);
f <- forecast(arima.final, h = 12);
all.equal(f$mean, p$pred)
accuracy(f)
p
f
结果:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
25 -234.78798559 -376.20497 -93.3710 -451.0666 -18.50937
26 248.28301149 -21.68036 518.2464 -164.5903 661.15636
27 38.07516814 -281.53132 357.6817 -450.7208 526.87112
28 -278.77782716 -600.00425 42.4486 -770.0513 212.49560
29 251.40378400 -74.76879 577.5764 -247.4341 750.24168
30 -31.49668698 -359.73170 296.7383 -533.4888 470.49545
31 -144.02466378 -474.75484 186.7055 -649.8328 361.78350
32 130.22859430 -211.26598 471.7232 -392.0423 652.49947
33 13.52166802 -332.92417 359.9675 -516.3215 543.36485
34 -123.35180366 -469.81119 223.1076 -653.2157 406.51210
35 103.92492852 -244.63788 452.4877 -429.1559 637.00574
36 -0.06911659 -349.40010 349.2619 -534.3247 534.18651
您 运行宁 auto.arima()
在 d.COUNT
上,这是原始 Enrollment_Data$COUNT
的第三个滞后差异。 d.COUNT
确实包含很多负值。我相信你想 运行 auto.arima
on Enrollment_Data$COUNT
instead.
我使用以下数据开发了 Auto ARIMA 模型。但是看了结果后,我不明白这个数据是否适合 运行 ARIMA 模型。在第 3 个滞后对变量 Count 进行差分给出了显着的 p 值,并且 auto.arima 建议顺序 (3,0,0)。但结果预测值不是预期的,大部分是 negative 值。实际数据不包含任何 负数 值。我不明白是什么问题。该模型在统计上看起来是正确的,但预测值看起来并不好。非常感谢任何帮助。
数据:
dput(Enrollment_Data)
structure(list(COUNT = c(17L, 1L, 5L, 8L, 45L, 21L, 18L, 43L,
82L, 116L, 192L, 289L, 242L, 254L, 335L, 138L, 71L, 98L, 91L,
138L, 175L, 232L, 155L, 376L, 197L, 271L, 421L), Enrolment_date = structure(c(25L,
20L, 5L, 10L, 8L, 16L, 1L, 18L, 14L, 12L, 3L, 26L, 23L, 21L,
6L, 11L, 9L, 17L, 2L, 19L, 15L, 13L, 4L, 27L, 24L, 22L, 7L), .Label = c("APR2018",
"APR2019", "AUG2018", "AUG2019", "DEC2017", "DEC2018", "DEC2019",
"FEB2018", "FEB2019", "JAN2018", "JAN2019", "JUL2018", "JUL2019",
"JUN2018", "JUN2019", "MAR2018", "MAR2019", "MAY2018", "MAY2019",
"NOV2017", "NOV2018", "NOV2019", "OCT2018", "OCT2019", "SEP2017",
"SEP2018", "SEP2019"), class = "factor")), class = "data.frame", row.names = c(NA,
-27L))
代码:
Enrollment_Data <- read.csv('EnrollmentRateT0.csv')
print(Enrollment_Data)
dput(Enrollment_Data)
#load packages
library("tseries")
library("ggplot2")
library("forecast")
library(FitAR)
library("fUnitRoots")
library(lmtest)
library(fpp2)
attach(Enrollment_Data)
#Step-1 : Model Identification
#Stationarity Check - Dicky-Fuller test
#P-value > 0.5 Heance the data is non - stationary
d.COUNT <- diff(COUNT, differences = 3)
summary(COUNT)
summary(d.COUNT)
plot(d.COUNT)
adf.test(d.COUNT, alternative="stationary")
acf(d.COUNT)
pacf(d.COUNT)
#Step 2: Model Estimation
#Step 4: Diagnosis
auto.arima(d.COUNT)
auto.arima(d.COUNT, stepwise = FALSE, approximation = FALSE)
arima.final <-auto.arima(d.COUNT, stepwise = FALSE, approximation = FALSE, D=1)
tsdiag(arima.final)
arima.final
'Choose the one that has least AIC and significant co-efficients'
#arima.final <-arima(COUNT, c(3,3,1))
forecast1 <- forecast(arima.final,h = 12)
forecast1
plot.forecast(futurVal)
plot(forecast1)
class(forecast1)
print(forecast1)
summary(forecast1)
accuracy(forecast1)
plot(d.COUNT)
p <- predict(arima.final,n.ahead = 12);
f <- forecast(arima.final, h = 12);
all.equal(f$mean, p$pred)
accuracy(f)
p
f
结果:
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
25 -234.78798559 -376.20497 -93.3710 -451.0666 -18.50937
26 248.28301149 -21.68036 518.2464 -164.5903 661.15636
27 38.07516814 -281.53132 357.6817 -450.7208 526.87112
28 -278.77782716 -600.00425 42.4486 -770.0513 212.49560
29 251.40378400 -74.76879 577.5764 -247.4341 750.24168
30 -31.49668698 -359.73170 296.7383 -533.4888 470.49545
31 -144.02466378 -474.75484 186.7055 -649.8328 361.78350
32 130.22859430 -211.26598 471.7232 -392.0423 652.49947
33 13.52166802 -332.92417 359.9675 -516.3215 543.36485
34 -123.35180366 -469.81119 223.1076 -653.2157 406.51210
35 103.92492852 -244.63788 452.4877 -429.1559 637.00574
36 -0.06911659 -349.40010 349.2619 -534.3247 534.18651
您 运行宁 auto.arima()
在 d.COUNT
上,这是原始 Enrollment_Data$COUNT
的第三个滞后差异。 d.COUNT
确实包含很多负值。我相信你想 运行 auto.arima
on Enrollment_Data$COUNT
instead.