使用单独的类别向量聚合数据框中的列

Aggregate columns in a data frame using a separate category vector

我有一组观察结果,每个观察结果都包含 yes/no 一组详细问题的答案。我想要做的是通过一组减少的问题类别来汇总“是”分数。我可以用“for”循环来做到这一点,但我知道一定有更好的方法。下面是一些设置随机数据集的代码,应该阐明我正在尝试做的事情。

#
# Initial data. 10 observations with no/yes (0/1) answers to 15 questions. Questions are 
# grouped into 3 categories, 5 questions each. The category is not included in the data set. I want to compute the sums of each category across all 10 observations.
#
# Set up random test data
#
myData<-data.frame(COL1=sample(rep(c(0,1),5),10),
                   COL2=sample(rep(c(0,1),5),10),
                   COL3=sample(rep(c(0,1),5),10),
                   COL4=sample(rep(c(0,1),5),10),
                   COL5=sample(rep(c(0,1),5),10),
                   COL6=sample(rep(c(0,1),5),10),
                   COL7=sample(rep(c(0,1),5),10),
                   COL8=sample(rep(c(0,1),5),10),
                   COL9=sample(rep(c(0,1),5),10),
                   COL10=sample(rep(c(0,1),5),10),
                   COL11=sample(rep(c(0,1),5),10),
                   COL12=sample(rep(c(0,1),5),10),
                   COL13=sample(rep(c(0,1),5),10),
                   COL14=sample(rep(c(0,1),5),10),
                   COL15=sample(rep(c(0,1),5),10))
print(myData)
#
# Allocate storage for category totals   
#
catSums<-data.frame(
  Cat01=rep(NA,10),
  Cat02=rep(NA,10),
  Cat03=rep(NA,10))
# 
# For loop to aggregate sums in each category
#
for (i in 1:10) {
  catSums$Cat01[i]=sum(myData[i,c(1:5)])
  catSums$Cat02[i]=sum(myData[i,c(6:10)])
  catSums$Cat03[i]=sum(myData[i,c(11:15)])
}
print(catSums)

你可以这样做:

setNames(as.data.frame(sapply(split(1:15, rep(1:3, each = 5)), function(x) {
   rowSums(myData[x])})), c('Cat01', 'Cat02', 'Cat03'))
#>    Cat01 Cat02 Cat03
#> 1      4     4     3
#> 2      0     1     1
#> 3      2     2     2
#> 4      3     3     2
#> 5      2     3     2
#> 6      3     2     3
#> 7      1     2     2
#> 8      2     3     3
#> 9      4     4     4
#> 10     4     1     3

您可以使用 dplyr:

library(dplyr)
myData %>% 
  summarise(Cat01 = rowSums(across(COL1:COL5)),
            Cat02 = rowSums(across(COL6:COL10)),
            Cat03 = rowSums(across(COL11:COL15)))

这个returns

# Cat01 Cat02 Cat03
# 1      2     3     4
# 2      2     4     1
# 3      2     4     2
# 4      2     4     4
# 5      4     3     2
# 6      2     1     4
# 7      4     3     2
# 8      0     1     3
# 9      3     1     1
# 10     4     1     2
setNames(
  as.data.frame(sapply(c(1,6,11),function(x) rowSums(myData[x:(x+4)]))),
  c("Cat01","Cat02", "Cat03")
)

输出:

   Cat01 Cat02 Cat03
1      2     0     4
2      1     3     2
3      1     2     3
4      3     2     2
5      2     4     2
6      1     2     1
7      4     3     3
8      2     3     2
9      4     5     2
10     5     1     4