按变量分组后的相交元素
intersecting elements after grouping by a variable
我有如下数据:
toy.dat <- data.frame(group = c(rep("A_0", 3), rep("A_1", 2),
rep("B_0", 3) , rep("B_1", 3)))
toy.dat$letters <- c("A", 'B', "C", "A", "D", "C", "E", "F", "A", "B", "F")
toy.dat %>%
group_by(group) %>%
summarise(letters = list(letters), num = n()) %>%
mutate(group_number = gsub(".*_", "", group))
group letters num_elements group_num
A_0 c("A", "B", "C") 3 0
A_1 c("A", "D") 2 1
B_0 c("C", "E", "F") 3 0
B_1 c("A", "B", "F") 3 1
我想按 group_numb 分组并找到这些行的字母交集并将它们添加到数据框中。
A_0 和 B_0 的输出应为“c”,A_1 和 B_1 的输出应为“A”。
我们可以用reduce
library(dplyr)
library(purrr)
toy.dat %>% group_by(group) %>% summarise(letters = list(letters), num = n()) %>%
mutate(group_number = gsub(".*_", "", group)) %>% group_by(group_number) %>% mutate(intersect = list(reduce(letters, intersect))) %>%
ungroup %>%
mutate(nintersect = lengths(intersect))
-输出
# A tibble: 4 × 6
group letters num group_number intersect nintersect
<chr> <list> <int> <chr> <list> <int>
1 A_0 <chr [3]> 3 0 <chr [1]> 1
2 A_1 <chr [2]> 2 1 <chr [1]> 1
3 B_0 <chr [3]> 3 0 <chr [1]> 1
4 B_1 <chr [3]> 3 1 <chr [1]> 1
我有如下数据:
toy.dat <- data.frame(group = c(rep("A_0", 3), rep("A_1", 2),
rep("B_0", 3) , rep("B_1", 3)))
toy.dat$letters <- c("A", 'B', "C", "A", "D", "C", "E", "F", "A", "B", "F")
toy.dat %>%
group_by(group) %>%
summarise(letters = list(letters), num = n()) %>%
mutate(group_number = gsub(".*_", "", group))
group letters num_elements group_num
A_0 c("A", "B", "C") 3 0
A_1 c("A", "D") 2 1
B_0 c("C", "E", "F") 3 0
B_1 c("A", "B", "F") 3 1
我想按 group_numb 分组并找到这些行的字母交集并将它们添加到数据框中。
A_0 和 B_0 的输出应为“c”,A_1 和 B_1 的输出应为“A”。
我们可以用reduce
library(dplyr)
library(purrr)
toy.dat %>% group_by(group) %>% summarise(letters = list(letters), num = n()) %>%
mutate(group_number = gsub(".*_", "", group)) %>% group_by(group_number) %>% mutate(intersect = list(reduce(letters, intersect))) %>%
ungroup %>%
mutate(nintersect = lengths(intersect))
-输出
# A tibble: 4 × 6
group letters num group_number intersect nintersect
<chr> <list> <int> <chr> <list> <int>
1 A_0 <chr [3]> 3 0 <chr [1]> 1
2 A_1 <chr [2]> 2 1 <chr [1]> 1
3 B_0 <chr [3]> 3 0 <chr [1]> 1
4 B_1 <chr [3]> 3 1 <chr [1]> 1