在 data.frame 个对象上使用 aggregate()
Using aggregate() on data.frame objects
为什么 aggregate() 在这里不起作用?
> aggregate(cbind(var1 = 1:10, var2 = 101:110),
by=list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x)
{
c(obs=length(x[, "var2"]), avg=mean(x[, "var2"]), sd=dev(x[, "var2"]))
})
Error in x[, "var2"] (from #1) : incorrect number of dimensions
> cbind(var1 = 1:10, var2 = 101:110)[, "var2"]
[1] 101 102 103 104 105 106 107 108 109 110
更新
在 运行 正确版本之后返回了 aggregate() 值:
> r = aggregate(data.frame(var1 = 1:10, var2 = 101:110), by=list(range=cut(1:10, breaks=c(2,4,8,10))), FUN = function(x) { c(obs=length(x), avg=mean(x), sd=sd(x)) })
> class(r)
[1] "data.frame"
> dim(r)
[1] 3 3
> r[,1]
[1] (2,4] (4,8] (8,10]
Levels: (2,4] (4,8] (8,10]
> r[,2]
obs avg sd
[1,] 2 3.5 0.707107
[2,] 4 6.5 1.290994
[3,] 2 9.5 0.707107
> r[,3]
obs avg sd
[1,] 2 103.5 0.707107
[2,] 4 106.5 1.290994
[3,] 2 109.5 0.707107
> class(r[,2])
[1] "matrix"
> class(r[,3])
[1] "matrix"
那是因为 aggregate
没有将 data.frames
传递给它的 FUN=
参数。它传递观察向量。此外,[, "name"]
索引不适用于矩阵。确保传入 data.frame 而不是示例中的矩阵。也许你想要 by
函数而不是
by(data.frame(var1 = 1:10, var2 = 101:110),
list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x) { c(obs=length(x[, "var2"]), avg=mean(x[, "var2"]), sd=sd(x[, "var2"])) })
提供数据框并了解聚合仅传递列向量,因此使用 x[ , "colname"]
注定失败,因为 "x" 不是数据框:
aggregate(data.frame(var1 = 1:10, var2 = 101:110),
by=list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x)
{
c(obs=length(x), avg=mean(x), sd=sd(x))
})
#------------
range var1.obs var1.avg var1.sd var2.obs var2.avg var2.sd
1 (2,4] 2.0000000 3.5000000 0.7071068 2.0000000 103.5000000 0.7071068
2 (4,8] 4.0000000 6.5000000 1.2909944 4.0000000 106.5000000 1.2909944
3 (8,10] 2.0000000 9.5000000 0.7071068 2.0000000 109.5000000 0.7071068
为什么 aggregate() 在这里不起作用?
> aggregate(cbind(var1 = 1:10, var2 = 101:110),
by=list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x)
{
c(obs=length(x[, "var2"]), avg=mean(x[, "var2"]), sd=dev(x[, "var2"]))
})
Error in x[, "var2"] (from #1) : incorrect number of dimensions
> cbind(var1 = 1:10, var2 = 101:110)[, "var2"]
[1] 101 102 103 104 105 106 107 108 109 110
更新
在 运行 正确版本之后返回了 aggregate() 值:
> r = aggregate(data.frame(var1 = 1:10, var2 = 101:110), by=list(range=cut(1:10, breaks=c(2,4,8,10))), FUN = function(x) { c(obs=length(x), avg=mean(x), sd=sd(x)) })
> class(r)
[1] "data.frame"
> dim(r)
[1] 3 3
> r[,1]
[1] (2,4] (4,8] (8,10]
Levels: (2,4] (4,8] (8,10]
> r[,2]
obs avg sd
[1,] 2 3.5 0.707107
[2,] 4 6.5 1.290994
[3,] 2 9.5 0.707107
> r[,3]
obs avg sd
[1,] 2 103.5 0.707107
[2,] 4 106.5 1.290994
[3,] 2 109.5 0.707107
> class(r[,2])
[1] "matrix"
> class(r[,3])
[1] "matrix"
那是因为 aggregate
没有将 data.frames
传递给它的 FUN=
参数。它传递观察向量。此外,[, "name"]
索引不适用于矩阵。确保传入 data.frame 而不是示例中的矩阵。也许你想要 by
函数而不是
by(data.frame(var1 = 1:10, var2 = 101:110),
list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x) { c(obs=length(x[, "var2"]), avg=mean(x[, "var2"]), sd=sd(x[, "var2"])) })
提供数据框并了解聚合仅传递列向量,因此使用 x[ , "colname"]
注定失败,因为 "x" 不是数据框:
aggregate(data.frame(var1 = 1:10, var2 = 101:110),
by=list(range=cut(1:10, breaks=c(2,4,8,10))),
FUN = function(x)
{
c(obs=length(x), avg=mean(x), sd=sd(x))
})
#------------
range var1.obs var1.avg var1.sd var2.obs var2.avg var2.sd
1 (2,4] 2.0000000 3.5000000 0.7071068 2.0000000 103.5000000 0.7071068
2 (4,8] 4.0000000 6.5000000 1.2909944 4.0000000 106.5000000 1.2909944
3 (8,10] 2.0000000 9.5000000 0.7071068 2.0000000 109.5000000 0.7071068