接下来几天的移动平均值,不包括当天
Moving mean for the following days not including the current day
我有一个数据 table,其中包含 return 一家公司的数据,如下所示:
df=structure(list(Date = structure(c(13236, 13237, 13238, 13239,
13240, 13241, 13242, 13243, 13244, 13245, 13246, 13247, 13248,
13249, 13250, 13251), class = "Date"), IBES = c("@O5G", "@O5G",
"@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G",
"@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G"), MktAdjReturn = c(-0.00381466643441897,
-0.00834070809256926, -0.0193226301897589, NA, NA, -0.00885564092195712,
-0.051612619547402, -0.0065292323057804, 0.042244140103735, 0.003100395243401,
NA, NA, -0.00486229222347689, -0.0184708840023963, 0.00273824763632391,
-0.00510010246255499)), .Names = c("Date", "IBES", "MktAdjReturn"
), class = c("data.table", "data.frame"), row.names = c(NA, -16L
))
我想计算除当天以外的接下来5天的移动平均线。因此,代码应该跳过 NA 并获取下一个可用的 returns。
例如。对于第一个日期 2006-03-29,5 天移动平均线应包含:-0.008340708 ; -0.019322630 ; -0.008855641 ; -0.051612620 ; -0,006529232
.
我试过:
rollapply(MktAdjReturn,width = 5,FUN=mean,align = "left",fill = NA,na.rm=T), by=c("IBES")
但是,它也包括当前日期,我不确定它对 fill = NA
的确切作用。
您需要做一些更改。假设,您需要按 IBES
对结果进行分组,然后您可以使用 dplyr
。
首先,您应该删除值为 NA
的行,因为 OP
不想考虑那些宽度为 roll
的行。执行计算后,我们将 join
与 main data.frame 的结果。
要排除当前行并考虑下一行,您可以对滚动平均值使用 lead
。
解决方案应如下所示:
library(tidyverse)
library(zoo)
df %>% group_by(IBES) %>%
filter(!is.na(MktAdjReturn)) %>% #Remove rows with NA
mutate(rollmean = lead(rollapply(MktAdjReturn, 5, FUN=mean, na.rm=TRUE,
align = "left", fill = NA))) %>%
right_join(df, by=c("Date", "IBES")) %>% #Join back
mutate(LastNonNaMean = max(which(!is.na(rollmean)))) %>%
fill(rollmean) %>% #Fill to populate rows with NA values for mean.
mutate(rollmean = ifelse(row_number()>LastNonNaMean, NA, rollmean)) %>%
select(Date, IBES, MktAdjReturn = MktAdjReturn.x, rollmean)
# # A tibble: 16 x 4
# # Groups: IBES [1]
# Date IBES MktAdjReturn rollmean
# <date> <chr> <dbl> <dbl>
# 1 2006-03-29 @O5G - 0.00381 - 0.0189
# 2 2006-03-30 @O5G - 0.00834 - 0.00882
# 3 2006-03-31 @O5G - 0.0193 - 0.00433
# 4 2006-04-01 @O5G NA - 0.00433
# 5 2006-04-02 @O5G NA - 0.00433
# 6 2006-04-03 @O5G - 0.00886 - 0.00353
# 7 2006-04-04 @O5G - 0.0516 0.00310
# 8 2006-04-05 @O5G - 0.00653 0.00495
# 9 2006-04-06 @O5G 0.0422 - 0.00452
# 10 2006-04-07 @O5G 0.00310 NA
# 11 2006-04-08 @O5G NA NA
# 12 2006-04-09 @O5G NA NA
# 13 2006-04-10 @O5G - 0.00486 NA
# 14 2006-04-11 @O5G - 0.0185 NA
# 15 2006-04-12 @O5G 0.00274 NA
# 16 2006-04-13 @O5G - 0.00510 NA
#
我有一个数据 table,其中包含 return 一家公司的数据,如下所示:
df=structure(list(Date = structure(c(13236, 13237, 13238, 13239,
13240, 13241, 13242, 13243, 13244, 13245, 13246, 13247, 13248,
13249, 13250, 13251), class = "Date"), IBES = c("@O5G", "@O5G",
"@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G",
"@O5G", "@O5G", "@O5G", "@O5G", "@O5G", "@O5G"), MktAdjReturn = c(-0.00381466643441897,
-0.00834070809256926, -0.0193226301897589, NA, NA, -0.00885564092195712,
-0.051612619547402, -0.0065292323057804, 0.042244140103735, 0.003100395243401,
NA, NA, -0.00486229222347689, -0.0184708840023963, 0.00273824763632391,
-0.00510010246255499)), .Names = c("Date", "IBES", "MktAdjReturn"
), class = c("data.table", "data.frame"), row.names = c(NA, -16L
))
我想计算除当天以外的接下来5天的移动平均线。因此,代码应该跳过 NA 并获取下一个可用的 returns。
例如。对于第一个日期 2006-03-29,5 天移动平均线应包含:-0.008340708 ; -0.019322630 ; -0.008855641 ; -0.051612620 ; -0,006529232
.
我试过:
rollapply(MktAdjReturn,width = 5,FUN=mean,align = "left",fill = NA,na.rm=T), by=c("IBES")
但是,它也包括当前日期,我不确定它对 fill = NA
的确切作用。
您需要做一些更改。假设,您需要按 IBES
对结果进行分组,然后您可以使用 dplyr
。
首先,您应该删除值为 NA
的行,因为 OP
不想考虑那些宽度为 roll
的行。执行计算后,我们将 join
与 main data.frame 的结果。
要排除当前行并考虑下一行,您可以对滚动平均值使用 lead
。
解决方案应如下所示:
library(tidyverse)
library(zoo)
df %>% group_by(IBES) %>%
filter(!is.na(MktAdjReturn)) %>% #Remove rows with NA
mutate(rollmean = lead(rollapply(MktAdjReturn, 5, FUN=mean, na.rm=TRUE,
align = "left", fill = NA))) %>%
right_join(df, by=c("Date", "IBES")) %>% #Join back
mutate(LastNonNaMean = max(which(!is.na(rollmean)))) %>%
fill(rollmean) %>% #Fill to populate rows with NA values for mean.
mutate(rollmean = ifelse(row_number()>LastNonNaMean, NA, rollmean)) %>%
select(Date, IBES, MktAdjReturn = MktAdjReturn.x, rollmean)
# # A tibble: 16 x 4
# # Groups: IBES [1]
# Date IBES MktAdjReturn rollmean
# <date> <chr> <dbl> <dbl>
# 1 2006-03-29 @O5G - 0.00381 - 0.0189
# 2 2006-03-30 @O5G - 0.00834 - 0.00882
# 3 2006-03-31 @O5G - 0.0193 - 0.00433
# 4 2006-04-01 @O5G NA - 0.00433
# 5 2006-04-02 @O5G NA - 0.00433
# 6 2006-04-03 @O5G - 0.00886 - 0.00353
# 7 2006-04-04 @O5G - 0.0516 0.00310
# 8 2006-04-05 @O5G - 0.00653 0.00495
# 9 2006-04-06 @O5G 0.0422 - 0.00452
# 10 2006-04-07 @O5G 0.00310 NA
# 11 2006-04-08 @O5G NA NA
# 12 2006-04-09 @O5G NA NA
# 13 2006-04-10 @O5G - 0.00486 NA
# 14 2006-04-11 @O5G - 0.0185 NA
# 15 2006-04-12 @O5G 0.00274 NA
# 16 2006-04-13 @O5G - 0.00510 NA
#