使用单独的类别向量聚合数据框中的列
Aggregate columns in a data frame using a separate category vector
我有一组观察结果,每个观察结果都包含 yes/no 一组详细问题的答案。我想要做的是通过一组减少的问题类别来汇总“是”分数。我可以用“for”循环来做到这一点,但我知道一定有更好的方法。下面是一些设置随机数据集的代码,应该阐明我正在尝试做的事情。
#
# Initial data. 10 observations with no/yes (0/1) answers to 15 questions. Questions are
# grouped into 3 categories, 5 questions each. The category is not included in the data set. I want to compute the sums of each category across all 10 observations.
#
# Set up random test data
#
myData<-data.frame(COL1=sample(rep(c(0,1),5),10),
COL2=sample(rep(c(0,1),5),10),
COL3=sample(rep(c(0,1),5),10),
COL4=sample(rep(c(0,1),5),10),
COL5=sample(rep(c(0,1),5),10),
COL6=sample(rep(c(0,1),5),10),
COL7=sample(rep(c(0,1),5),10),
COL8=sample(rep(c(0,1),5),10),
COL9=sample(rep(c(0,1),5),10),
COL10=sample(rep(c(0,1),5),10),
COL11=sample(rep(c(0,1),5),10),
COL12=sample(rep(c(0,1),5),10),
COL13=sample(rep(c(0,1),5),10),
COL14=sample(rep(c(0,1),5),10),
COL15=sample(rep(c(0,1),5),10))
print(myData)
#
# Allocate storage for category totals
#
catSums<-data.frame(
Cat01=rep(NA,10),
Cat02=rep(NA,10),
Cat03=rep(NA,10))
#
# For loop to aggregate sums in each category
#
for (i in 1:10) {
catSums$Cat01[i]=sum(myData[i,c(1:5)])
catSums$Cat02[i]=sum(myData[i,c(6:10)])
catSums$Cat03[i]=sum(myData[i,c(11:15)])
}
print(catSums)
你可以这样做:
setNames(as.data.frame(sapply(split(1:15, rep(1:3, each = 5)), function(x) {
rowSums(myData[x])})), c('Cat01', 'Cat02', 'Cat03'))
#> Cat01 Cat02 Cat03
#> 1 4 4 3
#> 2 0 1 1
#> 3 2 2 2
#> 4 3 3 2
#> 5 2 3 2
#> 6 3 2 3
#> 7 1 2 2
#> 8 2 3 3
#> 9 4 4 4
#> 10 4 1 3
您可以使用 dplyr
:
library(dplyr)
myData %>%
summarise(Cat01 = rowSums(across(COL1:COL5)),
Cat02 = rowSums(across(COL6:COL10)),
Cat03 = rowSums(across(COL11:COL15)))
这个returns
# Cat01 Cat02 Cat03
# 1 2 3 4
# 2 2 4 1
# 3 2 4 2
# 4 2 4 4
# 5 4 3 2
# 6 2 1 4
# 7 4 3 2
# 8 0 1 3
# 9 3 1 1
# 10 4 1 2
setNames(
as.data.frame(sapply(c(1,6,11),function(x) rowSums(myData[x:(x+4)]))),
c("Cat01","Cat02", "Cat03")
)
输出:
Cat01 Cat02 Cat03
1 2 0 4
2 1 3 2
3 1 2 3
4 3 2 2
5 2 4 2
6 1 2 1
7 4 3 3
8 2 3 2
9 4 5 2
10 5 1 4
我有一组观察结果,每个观察结果都包含 yes/no 一组详细问题的答案。我想要做的是通过一组减少的问题类别来汇总“是”分数。我可以用“for”循环来做到这一点,但我知道一定有更好的方法。下面是一些设置随机数据集的代码,应该阐明我正在尝试做的事情。
#
# Initial data. 10 observations with no/yes (0/1) answers to 15 questions. Questions are
# grouped into 3 categories, 5 questions each. The category is not included in the data set. I want to compute the sums of each category across all 10 observations.
#
# Set up random test data
#
myData<-data.frame(COL1=sample(rep(c(0,1),5),10),
COL2=sample(rep(c(0,1),5),10),
COL3=sample(rep(c(0,1),5),10),
COL4=sample(rep(c(0,1),5),10),
COL5=sample(rep(c(0,1),5),10),
COL6=sample(rep(c(0,1),5),10),
COL7=sample(rep(c(0,1),5),10),
COL8=sample(rep(c(0,1),5),10),
COL9=sample(rep(c(0,1),5),10),
COL10=sample(rep(c(0,1),5),10),
COL11=sample(rep(c(0,1),5),10),
COL12=sample(rep(c(0,1),5),10),
COL13=sample(rep(c(0,1),5),10),
COL14=sample(rep(c(0,1),5),10),
COL15=sample(rep(c(0,1),5),10))
print(myData)
#
# Allocate storage for category totals
#
catSums<-data.frame(
Cat01=rep(NA,10),
Cat02=rep(NA,10),
Cat03=rep(NA,10))
#
# For loop to aggregate sums in each category
#
for (i in 1:10) {
catSums$Cat01[i]=sum(myData[i,c(1:5)])
catSums$Cat02[i]=sum(myData[i,c(6:10)])
catSums$Cat03[i]=sum(myData[i,c(11:15)])
}
print(catSums)
你可以这样做:
setNames(as.data.frame(sapply(split(1:15, rep(1:3, each = 5)), function(x) {
rowSums(myData[x])})), c('Cat01', 'Cat02', 'Cat03'))
#> Cat01 Cat02 Cat03
#> 1 4 4 3
#> 2 0 1 1
#> 3 2 2 2
#> 4 3 3 2
#> 5 2 3 2
#> 6 3 2 3
#> 7 1 2 2
#> 8 2 3 3
#> 9 4 4 4
#> 10 4 1 3
您可以使用 dplyr
:
library(dplyr)
myData %>%
summarise(Cat01 = rowSums(across(COL1:COL5)),
Cat02 = rowSums(across(COL6:COL10)),
Cat03 = rowSums(across(COL11:COL15)))
这个returns
# Cat01 Cat02 Cat03
# 1 2 3 4
# 2 2 4 1
# 3 2 4 2
# 4 2 4 4
# 5 4 3 2
# 6 2 1 4
# 7 4 3 2
# 8 0 1 3
# 9 3 1 1
# 10 4 1 2
setNames(
as.data.frame(sapply(c(1,6,11),function(x) rowSums(myData[x:(x+4)]))),
c("Cat01","Cat02", "Cat03")
)
输出:
Cat01 Cat02 Cat03
1 2 0 4
2 1 3 2
3 1 2 3
4 3 2 2
5 2 4 2
6 1 2 1
7 4 3 3
8 2 3 2
9 4 5 2
10 5 1 4