如何在数据框的每个子组中找到移动平均线
How to find moving averages within each subgroup of a data frame
如果我有以下数据框:
set.seed(21)
df1 <- data.frame(col1=c(rep('a',5), rep('b',5), rep('c',5)), col4=rnorm(1:15))
col1 col4
1 a 0.793013171
2 a 0.522251264
3 a 1.746222241
4 a -1.271336123
5 a 2.197389533
6 b 0.433130777
7 b -1.570199630
8 b -0.934905667
9 b 0.063493345
10 b -0.002393336
11 c -2.276781240
12 c 0.757412225
13 c -0.548405554
14 c 0.172549478
15 c 0.562853068
如何在每个组内制作 2 点或 3 点或 4 点移动平均线?即
col1 col4 SMA
a 0.793013171 NA
a 0.522251264 0.657632218
a 1.746222241 1.134236753
a -1.271336123 0.237443059
a 2.197389533 0.463026705
b 0.433130777 NA
b -1.57019963 -0.568534427
b -0.934905667 -1.252552649
b 0.063493345 -0.435706161
b -0.002393336 0.030550005
c -2.27678124 NA
c 0.757412225 -0.759684508
c -0.548405554 0.104503336
c 0.172549478 -0.187928038
c 0.562853068 0.367701273
从我读到的内容来看,我认为它会是这样的:
aggregate(df1$col4, by=list(df1$col1), function(x) {filter(x,
rep(1/2,2), sides=1 )} )
Error in aggregate.data.frame(as.data.frame(x), ...) :
'FUN' must always return a scalar
但这告诉我(我认为)聚合应该只 return 一个
每组值。所以,我想,我需要的是能承受一切的东西
给定组中的值,returns 是相同长度的向量。
不确定为此使用哪个函数。
我们可以使用 zoo::rollmeanr
使用基数 R:
df1$SMA <- with(df1, ave(col4, col1, FUN = function(x)
zoo::rollmeanr(x, 2, fill = NA)))
df1
# col1 col4 SMA
#1 a 0.793013171 NA
#2 a 0.522251264 0.6576322
#3 a 1.746222241 1.1342368
#4 a -1.271336123 0.2374431
#5 a 2.197389533 0.4630267
#6 b 0.433130777 NA
#7 b -1.570199630 -0.5685344
#8 b -0.934905667 -1.2525526
#9 b 0.063493345 -0.4357062
#10 b -0.002393336 0.0305500
#11 c -2.276781240 NA
#12 c 0.757412225 -0.7596845
#13 c -0.548405554 0.1045033
#14 c 0.172549478 -0.1879280
#15 c 0.562853068 0.3677013
和dplyr
library(dplyr)
df1 %>%
group_by(col1) %>%
mutate(SMA = zoo::rollmeanr(col4, 2, fill = NA))
和data.table
library(data.table)
setDT(df1)[, SMA := zoo::rollmeanr(col4, 2, fill = NA), by = col1]
如果我有以下数据框:
set.seed(21)
df1 <- data.frame(col1=c(rep('a',5), rep('b',5), rep('c',5)), col4=rnorm(1:15))
col1 col4
1 a 0.793013171
2 a 0.522251264
3 a 1.746222241
4 a -1.271336123
5 a 2.197389533
6 b 0.433130777
7 b -1.570199630
8 b -0.934905667
9 b 0.063493345
10 b -0.002393336
11 c -2.276781240
12 c 0.757412225
13 c -0.548405554
14 c 0.172549478
15 c 0.562853068
如何在每个组内制作 2 点或 3 点或 4 点移动平均线?即
col1 col4 SMA
a 0.793013171 NA
a 0.522251264 0.657632218
a 1.746222241 1.134236753
a -1.271336123 0.237443059
a 2.197389533 0.463026705
b 0.433130777 NA
b -1.57019963 -0.568534427
b -0.934905667 -1.252552649
b 0.063493345 -0.435706161
b -0.002393336 0.030550005
c -2.27678124 NA
c 0.757412225 -0.759684508
c -0.548405554 0.104503336
c 0.172549478 -0.187928038
c 0.562853068 0.367701273
从我读到的内容来看,我认为它会是这样的:
aggregate(df1$col4, by=list(df1$col1), function(x) {filter(x,
rep(1/2,2), sides=1 )} )
Error in aggregate.data.frame(as.data.frame(x), ...) : 'FUN' must always return a scalar
但这告诉我(我认为)聚合应该只 return 一个 每组值。所以,我想,我需要的是能承受一切的东西 给定组中的值,returns 是相同长度的向量。 不确定为此使用哪个函数。
我们可以使用 zoo::rollmeanr
使用基数 R:
df1$SMA <- with(df1, ave(col4, col1, FUN = function(x)
zoo::rollmeanr(x, 2, fill = NA)))
df1
# col1 col4 SMA
#1 a 0.793013171 NA
#2 a 0.522251264 0.6576322
#3 a 1.746222241 1.1342368
#4 a -1.271336123 0.2374431
#5 a 2.197389533 0.4630267
#6 b 0.433130777 NA
#7 b -1.570199630 -0.5685344
#8 b -0.934905667 -1.2525526
#9 b 0.063493345 -0.4357062
#10 b -0.002393336 0.0305500
#11 c -2.276781240 NA
#12 c 0.757412225 -0.7596845
#13 c -0.548405554 0.1045033
#14 c 0.172549478 -0.1879280
#15 c 0.562853068 0.3677013
和dplyr
library(dplyr)
df1 %>%
group_by(col1) %>%
mutate(SMA = zoo::rollmeanr(col4, 2, fill = NA))
和data.table
library(data.table)
setDT(df1)[, SMA := zoo::rollmeanr(col4, 2, fill = NA), by = col1]