如何根据 r 中的不同值组合行?
How do you combine rows based on different values in r?
我想对某些行的值求和。例如,虽然 ID 相同,但当字母为 a 和 b 时,我们将对值求和。
ID <- c(1,1,1,8,8,8,2,2,2,5,5,5,7,7,7)
letter <- c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c")
value <- c(111,75,94,37,85,86,76,55,72,62,56,35,85,44,65)
dataframe <- data.frame(ID,letter,value)
输出如下
ID letter value
1 1 a 111
2 1 b 75
3 1 c 94
4 8 a 37
5 8 b 85
6 8 c 86
7 2 a 76
8 2 b 55
9 2 c 72
10 5 a 62
11 5 b 56
12 5 c 35
13 7 a 85
14 7 b 44
15 7 c 65
data.table
library(data.table)
setDT(dataframe)
dataframe[letter %in% c("a","b"), .(ab_sum = sum(value)), by=ID]
或tidyverse/dplyr:
library(magrittr)
library(dplyr)
dataframe %>%
filter(letter %in% c("a","b")) %>%
group_by(ID) %>%
summarize(ab_sum = sum(value))
使用 dplyr
你可以做到这一点
library(dplyr)
dataframe %>%
group_by(ID) %>%
mutate(sum=sum(ifelse(letter=="a"|letter=="b",value,NA),na.rm=T)) %>%
ungroup()
# A tibble: 15 × 4
ID letter value sum
<dbl> <chr> <dbl> <dbl>
1 1 a 111 186
2 1 b 75 186
3 1 c 94 186
4 8 a 37 122
5 8 b 85 122
6 8 c 86 122
7 2 a 76 131
8 2 b 55 131
9 2 c 72 131
10 5 a 62 118
11 5 b 56 118
12 5 c 35 118
13 7 a 85 129
14 7 b 44 129
15 7 c 65 129
或更紧凑
dataframe %>%
group_by(ID) %>%
summarise(sum=sum(ifelse(letter=="a"|letter=="b",value,NA),na.rm=T))
# A tibble: 5 × 2
ID sum
<dbl> <dbl>
1 1 186
2 2 131
3 5 118
4 7 129
5 8 122
使用dplyr
-
library(dplyr)
dataframe %>%
group_by(ID) %>%
summarise(total = sum(value[letter %in% c('a', 'b')]))
# ID total
# <dbl> <dbl>
#1 1 186
#2 2 131
#3 5 118
#4 7 129
#5 8 122
我想对某些行的值求和。例如,虽然 ID 相同,但当字母为 a 和 b 时,我们将对值求和。
ID <- c(1,1,1,8,8,8,2,2,2,5,5,5,7,7,7)
letter <- c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c")
value <- c(111,75,94,37,85,86,76,55,72,62,56,35,85,44,65)
dataframe <- data.frame(ID,letter,value)
输出如下
ID letter value
1 1 a 111
2 1 b 75
3 1 c 94
4 8 a 37
5 8 b 85
6 8 c 86
7 2 a 76
8 2 b 55
9 2 c 72
10 5 a 62
11 5 b 56
12 5 c 35
13 7 a 85
14 7 b 44
15 7 c 65
data.table
library(data.table)
setDT(dataframe)
dataframe[letter %in% c("a","b"), .(ab_sum = sum(value)), by=ID]
或tidyverse/dplyr:
library(magrittr)
library(dplyr)
dataframe %>%
filter(letter %in% c("a","b")) %>%
group_by(ID) %>%
summarize(ab_sum = sum(value))
使用 dplyr
你可以做到这一点
library(dplyr)
dataframe %>%
group_by(ID) %>%
mutate(sum=sum(ifelse(letter=="a"|letter=="b",value,NA),na.rm=T)) %>%
ungroup()
# A tibble: 15 × 4
ID letter value sum
<dbl> <chr> <dbl> <dbl>
1 1 a 111 186
2 1 b 75 186
3 1 c 94 186
4 8 a 37 122
5 8 b 85 122
6 8 c 86 122
7 2 a 76 131
8 2 b 55 131
9 2 c 72 131
10 5 a 62 118
11 5 b 56 118
12 5 c 35 118
13 7 a 85 129
14 7 b 44 129
15 7 c 65 129
或更紧凑
dataframe %>%
group_by(ID) %>%
summarise(sum=sum(ifelse(letter=="a"|letter=="b",value,NA),na.rm=T))
# A tibble: 5 × 2
ID sum
<dbl> <dbl>
1 1 186
2 2 131
3 5 118
4 7 129
5 8 122
使用dplyr
-
library(dplyr)
dataframe %>%
group_by(ID) %>%
summarise(total = sum(value[letter %in% c('a', 'b')]))
# ID total
# <dbl> <dbl>
#1 1 186
#2 2 131
#3 5 118
#4 7 129
#5 8 122