使用 ggplot 绘制多个 ACF
Plotting multiple ACF with ggplot
您好,我正在尝试使用 ggplot 绘制多个 ACF 和 PACF。
我已经取得了一些进展,但循环的事情仍然给我带来了很多麻烦。所以希望你能帮助我。
我想制作四个 acf 图,AAPL、GE、SPY 和 WMT 各一张。
col_names <- colnames(df)
col_names <- col_names[-c(1,2)]
for (i in col_names){
bacf <- acf(df, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))+
q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
geom_segment(mapping = aes(xend = lag, yend = 0))+
geom_hline(yintercept=c(significance_level,-significance_level), lty=3, color="blue") +
ggtitle(i)+theme_minimal()
plot_list[[i]]<-q;
plot_grid(q[[1]],q[[2]],q[[3]],q[[4]])
我的一些数据:
structure(list(Date = structure(c(10960, 10961, 10962, 10963,
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
`AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
-1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
-0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
-1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
-4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
-0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
-1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
-1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
-7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")
已尽力使其尽可能高效以实现您的意图。在某些情况下可以像 lapply
这样使用基数 r 但我用 tidyverse
library(dplyr)
library(purrr)
library(ggplot2)
glimpse(df)
#> Rows: 20
#> Columns: 6
#> $ Date <date> 2000-01-04, 2000-01-05, 2000-01-06, 2000-01-07, …
#> $ `AAPL, not cleaned` <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ AAPL <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ GE <dbl> -4.08219945, -0.17376199, 1.32681098, 3.79869230,…
#> $ SPY <dbl> -3.9891330, 0.1787311, -1.6201970, 5.6452380, 0.3…
#> $ WMT <dbl> -3.8137630, -2.3600840, 1.3913270, 7.2806180, -1.…
# we'll go straight to building the dataframe with `map_dfr`
bacdf <- map_df(df, function(ts) acf(ts, plot = FALSE)$acf)
# The lags are all the same just 0 through the number of rows minus 1
bacdf$lag <- 0:(nrow(bacdf) - 1)
# reorder things and eliminate `Date` and unclean AAPL which is actually identical to AAPL
bacdf <- bacdf %>% select(lag, everything(), -Date, -`AAPL, not cleaned`)
bacdf
#> # A tibble: 14 x 5
#> lag AAPL GE SPY WMT
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 0 1 1 1 1
#> 2 1 -0.147 0.0812 -0.197 0.0769
#> 3 2 0.143 -0.316 0.0197 -0.453
#> 4 3 -0.0940 -0.182 -0.407 -0.296
#> 5 4 0.312 0.326 0.239 0.158
#> 6 5 -0.229 0.0171 -0.0140 0.176
#> 7 6 -0.0727 -0.238 -0.0138 -0.00309
#> 8 7 -0.0300 -0.198 -0.0321 -0.0287
#> 9 8 -0.150 0.0675 -0.135 -0.0981
#> 10 9 -0.238 0.101 -0.0376 -0.106
#> 11 10 -0.186 -0.313 -0.219 -0.0790
#> 12 11 0.114 -0.0322 0.285 0.115
#> 13 12 -0.119 0.148 -0.0444 0.202
#> 14 13 0.128 0.214 0.0559 -0.0870
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))
myaxis <- colnames(bacdf[1])
mynames <- colnames(bacdf[-1])
myaxis
#> [1] "lag"
mynames
#> [1] "AAPL" "GE" "SPY" "WMT"
for (i in seq_along(mynames)) {
print(ggplot(bacdf, aes_string(x = myaxis, y = mynames[[i]])) +
geom_segment(mapping = aes(xend = lag, yend = 0)) +
geom_point() +
geom_hline(yintercept = c(significance_level, -significance_level), lty = 3, color = "blue") +
ggtitle(mynames[[i]]) +
theme_minimal())
}
只是为了记录您的数据...
df <- structure(list(Date = structure(c(10960, 10961, 10962, 10963,
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
`AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
-1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
-0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
-1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
-4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
-0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
-1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
-1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
-7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")
由 reprex package (v0.3.0)
于 2020-05-13 创建
您好,我正在尝试使用 ggplot 绘制多个 ACF 和 PACF。
我已经取得了一些进展,但循环的事情仍然给我带来了很多麻烦。所以希望你能帮助我。
我想制作四个 acf 图,AAPL、GE、SPY 和 WMT 各一张。
col_names <- colnames(df)
col_names <- col_names[-c(1,2)]
for (i in col_names){
bacf <- acf(df, plot = FALSE)
bacfdf <- with(bacf, data.frame(lag, acf))
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))+
q <- ggplot(data = bacfdf, mapping = aes(x = lag, y = acf)) +
geom_segment(mapping = aes(xend = lag, yend = 0))+
geom_hline(yintercept=c(significance_level,-significance_level), lty=3, color="blue") +
ggtitle(i)+theme_minimal()
plot_list[[i]]<-q;
plot_grid(q[[1]],q[[2]],q[[3]],q[[4]])
我的一些数据:
structure(list(Date = structure(c(10960, 10961, 10962, 10963,
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
`AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
-1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
-0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
-1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
-4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
-0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
-1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
-1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
-7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")
已尽力使其尽可能高效以实现您的意图。在某些情况下可以像 lapply
这样使用基数 r 但我用 tidyverse
library(dplyr)
library(purrr)
library(ggplot2)
glimpse(df)
#> Rows: 20
#> Columns: 6
#> $ Date <date> 2000-01-04, 2000-01-05, 2000-01-06, 2000-01-07, …
#> $ `AAPL, not cleaned` <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ AAPL <dbl> -8.8100210, 1.4528100, -9.0514010, 4.6280750, -1.…
#> $ GE <dbl> -4.08219945, -0.17376199, 1.32681098, 3.79869230,…
#> $ SPY <dbl> -3.9891330, 0.1787311, -1.6201970, 5.6452380, 0.3…
#> $ WMT <dbl> -3.8137630, -2.3600840, 1.3913270, 7.2806180, -1.…
# we'll go straight to building the dataframe with `map_dfr`
bacdf <- map_df(df, function(ts) acf(ts, plot = FALSE)$acf)
# The lags are all the same just 0 through the number of rows minus 1
bacdf$lag <- 0:(nrow(bacdf) - 1)
# reorder things and eliminate `Date` and unclean AAPL which is actually identical to AAPL
bacdf <- bacdf %>% select(lag, everything(), -Date, -`AAPL, not cleaned`)
bacdf
#> # A tibble: 14 x 5
#> lag AAPL GE SPY WMT
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 0 1 1 1 1
#> 2 1 -0.147 0.0812 -0.197 0.0769
#> 3 2 0.143 -0.316 0.0197 -0.453
#> 4 3 -0.0940 -0.182 -0.407 -0.296
#> 5 4 0.312 0.326 0.239 0.158
#> 6 5 -0.229 0.0171 -0.0140 0.176
#> 7 6 -0.0727 -0.238 -0.0138 -0.00309
#> 8 7 -0.0300 -0.198 -0.0321 -0.0287
#> 9 8 -0.150 0.0675 -0.135 -0.0981
#> 10 9 -0.238 0.101 -0.0376 -0.106
#> 11 10 -0.186 -0.313 -0.219 -0.0790
#> 12 11 0.114 -0.0322 0.285 0.115
#> 13 12 -0.119 0.148 -0.0444 0.202
#> 14 13 0.128 0.214 0.0559 -0.0870
significance_level <- qnorm((1 + 0.95)/2)/sqrt(sum(!is.na(df)))
myaxis <- colnames(bacdf[1])
mynames <- colnames(bacdf[-1])
myaxis
#> [1] "lag"
mynames
#> [1] "AAPL" "GE" "SPY" "WMT"
for (i in seq_along(mynames)) {
print(ggplot(bacdf, aes_string(x = myaxis, y = mynames[[i]])) +
geom_segment(mapping = aes(xend = lag, yend = 0)) +
geom_point() +
geom_hline(yintercept = c(significance_level, -significance_level), lty = 3, color = "blue") +
ggtitle(mynames[[i]]) +
theme_minimal())
}
只是为了记录您的数据...
df <- structure(list(Date = structure(c(10960, 10961, 10962, 10963,
10966, 10967, 10968, 10969, 10970, 10974, 10975, 10976, 10977,
10980, 10981, 10982, 10983, 10984, 10987, 10988), class = "Date"),
`AAPL, not cleaned` = c(-8.810021, 1.45281, -9.051401, 4.628075,
-1.774445, -5.25055, -6.181806, 10.40407, 3.74302, 3.425328,
2.48944, 6.309463, -1.948374, -4.652429, 5.493372, -1.852238,
-0.1725783, -7.924, 2.074379, -3.431709), AAPL = c(-8.810021,
1.45281, -9.051401, 4.628075, -1.774445, -5.25055, -6.181806,
10.40407, 3.74302, 3.425328, 2.48944, 6.309463, -1.948374,
-4.652429, 5.493372, -1.852238, -0.1725783, -7.924, 2.074379,
-3.431709), GE = c(-4.08219945, -0.17376199, 1.32681098,
3.7986923, -0.03966156, 0.1651528, 0.32948959, 1.14473858,
-1.80480699, -2.00675631, 0.48530696, -1.88697651, -1.24799082,
-4.25203994, 0.26750549, 2.10052733, 0.21893437, -5.62251427,
0, 1.48150858), SPY = c(-3.989133, 0.1787311, -1.620197,
5.645238, 0.3424661, -1.203798, -0.999791, 1.345214, 1.348592,
-0.7898116, 0.8111037, -1.542447, -0.2161228, -2.875245,
1.129238, -0.7957602, -0.4002674, -3.16912, 2.677718, 0.9804
), WMT = c(-3.813763, -2.360084, 1.391327, 7.280618, -1.841673,
-1.498155, -1.812554, 0.1075352, -0.9720049, 1.630053, -2.314561,
-1.067179, -1.494226, -5.024863, 2.904527, 1.316343, -4.642776,
-7.004438, -0.6916664, 6.949199)), row.names = c(NA, 20L), class = "data.frame")
由 reprex package (v0.3.0)
于 2020-05-13 创建