如何在 R 中进行 Ljung-Box 测试?
How to do the Ljung-Box test in R?
我正在尝试在 R 中进行 Ljung-Box 测试,但出现错误,我不明白问题出在哪里。
让我们使用 "Forecasting: Principles and Practice" 示例中的代码来获取 Google 的每日收盘价
library(fpp3)
google_stock <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
google_2015 <- google_stock %>% filter(year(Date) == 2015)
我使用朴素方法从预测序列中获得残差
aug <- google_2015 %>% model(NAIVE(Close)) %>% augment()
现在我想使用 Ljung-Box 检验对残差中的自相关进行正式检验
aug %>% features(.resid, ljung_box, lag=10, dof=0)
但是我在输出中得到这个错误:
# A tibble: 1 x 2
Symbol .model
<chr> <chr>
1 GOOG NAIVE(Close)
Warning message:
1 error encountered for feature 1
[1] 'ts' object must have one or more observations
我做错了什么?
这是数据集的样本(使用 dput() ):
> dput(head(google_stock, 10))
structure(list(Symbol = c("GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG"), Date = structure(c(16072,
16073, 16076, 16077, 16078, 16079, 16080, 16083, 16084, 16085
), class = "Date"), Open = c(554.125916, 553.897461, 552.908875,
558.865112, 569.297241, 568.025513, 565.859619, 559.595398, 565.298279,
572.769714), High = c(555.26355, 554.856201, 555.814941, 566.162659,
569.953003, 568.413025, 565.859619, 569.749329, 571.781128, 573.768188
), Low = c(550.549194, 548.894958, 549.645081, 556.95752, 562.983337,
559.143311, 557.499023, 554.975403, 560.400146, 568.199402),
Close = c(552.963501, 548.929749, 555.049927, 565.750366,
566.927673, 561.468201, 561.438354, 557.861633, 570.986267,
570.598816), Adj_Close = c(552.963501, 548.929749, 555.049927,
565.750366, 566.927673, 561.468201, 561.438354, 557.861633,
570.986267, 570.598816), Volume = c(3666400, 3355000, 3561600,
5138400, 4514100, 4196000, 4314700, 4869100, 4997400, 3925700
), day = 1:10), row.names = c(NA, -10L), key = structure(list(
Symbol = "GOOG", .rows = list(1:10)), row.names = c(NA, -1L
), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), index = structure("day", ordered = TRUE), index2 = "day", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 0, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 1), class = "interval"), class = c("tbl_ts",
"tbl_df", "tbl", "data.frame"))
看起来您需要先 select 感兴趣的列 (.resid
),然后再传递到 features()
函数:
aug %>%
select(.resid) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
# Output
# Selecting index: "day"
# A tibble: 1 x 2
lb_stat lb_pvalue
<dbl> <dbl>
1 7.91 0.637
如果您没有在 features()
函数中指定 .resid
,它仍然可以工作,通过自动 selecting 该列(因为它是除索引之外的唯一列列):
aug %>%
select(.resid) %>%
features(features = ljung_box, lag = 10, dof = 0)
# Selecting index: "day"
# Feature variable not specified, automatically selected `.var = .resid`
# A tibble: 1 x 2
lb_stat lb_pvalue
<dbl> <dbl>
1 7.91 0.637
我正在尝试在 R 中进行 Ljung-Box 测试,但出现错误,我不明白问题出在哪里。
让我们使用 "Forecasting: Principles and Practice" 示例中的代码来获取 Google 的每日收盘价
library(fpp3)
google_stock <- gafa_stock %>%
filter(Symbol == "GOOG") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
google_2015 <- google_stock %>% filter(year(Date) == 2015)
我使用朴素方法从预测序列中获得残差
aug <- google_2015 %>% model(NAIVE(Close)) %>% augment()
现在我想使用 Ljung-Box 检验对残差中的自相关进行正式检验
aug %>% features(.resid, ljung_box, lag=10, dof=0)
但是我在输出中得到这个错误:
# A tibble: 1 x 2
Symbol .model
<chr> <chr>
1 GOOG NAIVE(Close)
Warning message:
1 error encountered for feature 1
[1] 'ts' object must have one or more observations
我做错了什么?
这是数据集的样本(使用 dput() ):
> dput(head(google_stock, 10))
structure(list(Symbol = c("GOOG", "GOOG", "GOOG", "GOOG", "GOOG",
"GOOG", "GOOG", "GOOG", "GOOG", "GOOG"), Date = structure(c(16072,
16073, 16076, 16077, 16078, 16079, 16080, 16083, 16084, 16085
), class = "Date"), Open = c(554.125916, 553.897461, 552.908875,
558.865112, 569.297241, 568.025513, 565.859619, 559.595398, 565.298279,
572.769714), High = c(555.26355, 554.856201, 555.814941, 566.162659,
569.953003, 568.413025, 565.859619, 569.749329, 571.781128, 573.768188
), Low = c(550.549194, 548.894958, 549.645081, 556.95752, 562.983337,
559.143311, 557.499023, 554.975403, 560.400146, 568.199402),
Close = c(552.963501, 548.929749, 555.049927, 565.750366,
566.927673, 561.468201, 561.438354, 557.861633, 570.986267,
570.598816), Adj_Close = c(552.963501, 548.929749, 555.049927,
565.750366, 566.927673, 561.468201, 561.438354, 557.861633,
570.986267, 570.598816), Volume = c(3666400, 3355000, 3561600,
5138400, 4514100, 4196000, 4314700, 4869100, 4997400, 3925700
), day = 1:10), row.names = c(NA, -10L), key = structure(list(
Symbol = "GOOG", .rows = list(1:10)), row.names = c(NA, -1L
), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), index = structure("day", ordered = TRUE), index2 = "day", interval = structure(list(
year = 0, quarter = 0, month = 0, week = 0, day = 0, hour = 0,
minute = 0, second = 0, millisecond = 0, microsecond = 0,
nanosecond = 0, unit = 1), class = "interval"), class = c("tbl_ts",
"tbl_df", "tbl", "data.frame"))
看起来您需要先 select 感兴趣的列 (.resid
),然后再传递到 features()
函数:
aug %>%
select(.resid) %>%
features(.resid, ljung_box, lag = 10, dof = 0)
# Output
# Selecting index: "day"
# A tibble: 1 x 2
lb_stat lb_pvalue
<dbl> <dbl>
1 7.91 0.637
如果您没有在 features()
函数中指定 .resid
,它仍然可以工作,通过自动 selecting 该列(因为它是除索引之外的唯一列列):
aug %>%
select(.resid) %>%
features(features = ljung_box, lag = 10, dof = 0)
# Selecting index: "day"
# Feature variable not specified, automatically selected `.var = .resid`
# A tibble: 1 x 2
lb_stat lb_pvalue
<dbl> <dbl>
1 7.91 0.637