根据 R 中其他列中的值对对行中的值求和

Summing values in rows based on pairs of values in other columns in R

我想根据 OrigDest 的唯一对对 Size 列中的值求和,其中 A-B 不等于 B-A。此外,我希望将第 1 个月和第 2 个月的这些值相加,我现在将其称为“1”,然后将第 3 个月和第 4 个月(我想称为“2”)累加到一个名为 [=21= 的新列中].我的真实数据比这个例子复杂得多,但解决这个问题将帮助我为我的数据制定代码。非常感谢。

初始 df 看起来如下

Orig = c("A","B","A","B","A","B","A","B","A","B")
Dest = c("B","A","B","A","B","A","B","A","B","A")
Month = c(1,2,3,4,2,3,1,2,4,4)
Size = c(30,20,10,10,20,20,30,50,20,60)
df <- data.frame(Orig,Dest,Month,Size)
df
   Orig Dest Month Size
1     A    B     1   30
2     B    A     2   20
3     A    B     3   10
4     B    A     4   10
5     A    B     2   20
6     B    A     3   20
7     A    B     1   30
8     B    A     2   50
9     A    B     4   20
10    B    A     4   60

期望的结果如下所示:

  Orig Dest Semester Size
1    A    B        1   80
2    B    A        1   70
3    A    B        2   30
4    B    A        2   90

使用dplyr

library(dplyr)
df %>% mutate(Semester=ifelse(Month%in%c(1,2),1,2)) %>% 
  group_by(Semester,Orig,Dest) %>% 
  summarise(Size=sum(Size))

  Semester Orig Dest Size
1        1    A    B   80
2        1    B    A   70
3        2    A    B   30
4        2    B    A   90

尽管列顺序与您的略有不同。您可以在其中输入 select 以重新排序它们。

> require(data.table)
> dt1 <- data.table(df)
> setkey(dt1, Orig, Dest,Month) 
> df2 <- dt1[, list(Size=sum(Size)), by=list(Orig, Dest,Month)]
> df2
   Orig Dest Month Size
1:    A    B     1   60
2:    A    B     2   20
3:    A    B     3   10
4:    A    B     4   20
5:    B    A     2   70
6:    B    A     3   20
7:    B    A     4   70


> sapply(df2,class)
     Orig      Dest     Month      Size 
 "factor"  "factor" "numeric" "numeric" 

> library(plyr)
> df2$Month <- revalue(as.factor(df2$Month), c("1"="1", "2"="1","3"="2", "4"="2"))
> df2
   Orig Dest Month Size
1:    A    B     1   60
2:    A    B     1   20
3:    A    B     2   10
4:    A    B     2   20
5:    B    A     1   70
6:    B    A     2   20
7:    B    A     2   70
> df3 <- df2[, list(Size=sum(Size)), by=list(Orig, Dest,Month)]
> df3
   Orig Dest Month Size
1:    A    B     1   80
2:    A    B     2   30
3:    B    A     1   70
4:    B    A     2   90

这是另一个使用 dplyr

的解决方案
group_by(df, Orig, Dest, r = ntile(Month, n = 2)) %>%
+     summarise(sum(Size))
Source: local data frame [4 x 4]
Groups: Orig, Dest

  Orig Dest r sum(Size)
1    A    B 1        80
2    A    B 2        30
3    B    A 1        70
4    B    A 2        90

另一个选项data.table

library(data.table)
setDT(df)[, Semester:=(!Month %in% 1:2)+1L][,
        list(Size=sum(Size)) , .(Semester, Orig, Dest)]
#    Semester Orig Dest Size
#1:        1    A    B   80
#2:        1    B    A   70
#3:        2    A    B   30
#4:        2    B    A   90

或使用 base R

中的 aggregate
aggregate(Size~Orig+Dest +cbind( Semester=(!Month %in% 1:2)+1L), df, FUN=sum)
#   Orig Dest Semester Size
#1    B    A        1   70
#2    A    B        1   80
#3    B    A        2   90
#4    A    B        2   30