累积 R 中每个可能组合的值
Accumulate values for every possible combination in R
假设我有数据 test
(给定输入),其中列表列显示 items
:
test <- structure(list(items = list('a', c('b', 'c'), c('d', 'e'), 'f', c('g', 'h')),
ID = c(1,1,1,2,2)), row.names = c(NA, 5L), class = "data.frame")
library(tidyverse)
test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~paste(.x, .y)))
我得到这样的 list-col 输出
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c e
4 f 2 f
5 g, h 2 f g, f h
我希望第 3 行中有四个项目,具有每种可能的组合,即 c("a b d", "a b e", "a c d", "a c e")
。然而,这些是否是列表中的单独项目并不重要。换句话说,dummy 的输出可能是多级列表类型,其中 row3 将包含列表中的四个项目。我尝试使用 expand.grid
,但我在某处做错了!
所以我想要的输出看起来像
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h
您可以使用外积来粘贴两个向量...
test2 <- test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~outer(.x, .y, paste)))
str(test2)
grouped_df[,3] [5 x 3] (S3: grouped_df/tbl_df/tbl/data.frame)
$ items:List of 5
..$ : chr "a"
..$ : chr [1:2] "b" "c"
..$ : chr [1:2] "d" "e"
..$ : chr "f"
..$ : chr [1:2] "g" "h"
$ ID : num [1:5] 1 1 1 2 2
$ dummy:List of 5
..$ : chr "a"
..$ : chr [1, 1:2] "a b" "a c"
..$ : chr [1, 1:2, 1:2] "a b d" "a c d" "a b e" "a c e"
..$ : chr "f"
..$ : chr [1, 1:2] "f g" "f h"
如果您想要所有可能的组合,请使用 sapply
而不是 .x
library(dplyr)
library(purrr)
test %>%
group_by(ID) %>%
mutate(dummy = accumulate(items, ~c(sapply(.x, paste, .y)))) %>%
pull(dummy)
#[[1]]
#[1] "a"
#[[2]]
#[1] "a b" "a c"
#[[3]]
#[1] "a b d" "a b e" "a c d" "a c e"
#[[4]]
#[1] "f"
#[[5]]
#[1] "f g" "f h"
expand.grid()
、
的另一种方法
test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~do.call("paste",expand.grid(.x, .y)))) %>%
data.frame()
给予,
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h
data.table
选项使用 Reduce
+ outer
setDT(test)[
,
dummy := .(Reduce(function(x, y) outer(x, y, paste),
items,
accumulate = TRUE
)),
ID
]
给予
> test
items ID dummy
1: a 1 a
2: b,c 1 a b,a c
3: d,e 1 a b d,a c d,a b e,a c e
4: f 2 f
5: g,h 2 f g,f h
purrr
包中还有 cross
和 cross2
:
library(tidyverse)
test %>%
group_by(ID) %>%
mutate(
dummy = accumulate(items, cross2) %>% map_depth(unlist, .depth = 2)
) %>%
pull(dummy) %>%
str()
#> List of 5
#> $ :List of 1
#> ..$ : chr "a"
#> $ :List of 2
#> ..$ : chr [1:2] "a" "b"
#> ..$ : chr [1:2] "a" "c"
#> $ :List of 4
#> ..$ : chr [1:3] "a" "b" "d"
#> ..$ : chr [1:3] "a" "c" "d"
#> ..$ : chr [1:3] "a" "b" "e"
#> ..$ : chr [1:3] "a" "c" "e"
#> $ :List of 1
#> ..$ : chr "f"
#> $ :List of 2
#> ..$ : chr [1:2] "f" "g"
#> ..$ : chr [1:2] "f" "h"
由 reprex package (v1.0.0)
于 2021-05-18 创建
也可以使用此解决方案:
library(dplyr)
library(purrr)
test %>%
group_by(ID) %>%
mutate(comb = accumulate(items[-1], .init = unlist(items[1]),
~ expand.grid(.x, .y) %>%
{map2(.$Var1, .$Var2, ~ paste(.x, .y, sep = " "))} %>%
unlist())) %>%
as.data.frame()
items ID comb
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h
假设我有数据 test
(给定输入),其中列表列显示 items
:
test <- structure(list(items = list('a', c('b', 'c'), c('d', 'e'), 'f', c('g', 'h')),
ID = c(1,1,1,2,2)), row.names = c(NA, 5L), class = "data.frame")
library(tidyverse)
test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~paste(.x, .y)))
我得到这样的 list-col 输出
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c e
4 f 2 f
5 g, h 2 f g, f h
我希望第 3 行中有四个项目,具有每种可能的组合,即 c("a b d", "a b e", "a c d", "a c e")
。然而,这些是否是列表中的单独项目并不重要。换句话说,dummy 的输出可能是多级列表类型,其中 row3 将包含列表中的四个项目。我尝试使用 expand.grid
,但我在某处做错了!
所以我想要的输出看起来像
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h
您可以使用外积来粘贴两个向量...
test2 <- test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~outer(.x, .y, paste)))
str(test2)
grouped_df[,3] [5 x 3] (S3: grouped_df/tbl_df/tbl/data.frame)
$ items:List of 5
..$ : chr "a"
..$ : chr [1:2] "b" "c"
..$ : chr [1:2] "d" "e"
..$ : chr "f"
..$ : chr [1:2] "g" "h"
$ ID : num [1:5] 1 1 1 2 2
$ dummy:List of 5
..$ : chr "a"
..$ : chr [1, 1:2] "a b" "a c"
..$ : chr [1, 1:2, 1:2] "a b d" "a c d" "a b e" "a c e"
..$ : chr "f"
..$ : chr [1, 1:2] "f g" "f h"
如果您想要所有可能的组合,请使用 sapply
而不是 .x
library(dplyr)
library(purrr)
test %>%
group_by(ID) %>%
mutate(dummy = accumulate(items, ~c(sapply(.x, paste, .y)))) %>%
pull(dummy)
#[[1]]
#[1] "a"
#[[2]]
#[1] "a b" "a c"
#[[3]]
#[1] "a b d" "a b e" "a c d" "a c e"
#[[4]]
#[1] "f"
#[[5]]
#[1] "f g" "f h"
expand.grid()
、
test %>% group_by(ID) %>%
mutate(dummy = accumulate(items, ~do.call("paste",expand.grid(.x, .y)))) %>%
data.frame()
给予,
items ID dummy
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h
data.table
选项使用 Reduce
+ outer
setDT(test)[
,
dummy := .(Reduce(function(x, y) outer(x, y, paste),
items,
accumulate = TRUE
)),
ID
]
给予
> test
items ID dummy
1: a 1 a
2: b,c 1 a b,a c
3: d,e 1 a b d,a c d,a b e,a c e
4: f 2 f
5: g,h 2 f g,f h
purrr
包中还有 cross
和 cross2
:
library(tidyverse)
test %>%
group_by(ID) %>%
mutate(
dummy = accumulate(items, cross2) %>% map_depth(unlist, .depth = 2)
) %>%
pull(dummy) %>%
str()
#> List of 5
#> $ :List of 1
#> ..$ : chr "a"
#> $ :List of 2
#> ..$ : chr [1:2] "a" "b"
#> ..$ : chr [1:2] "a" "c"
#> $ :List of 4
#> ..$ : chr [1:3] "a" "b" "d"
#> ..$ : chr [1:3] "a" "c" "d"
#> ..$ : chr [1:3] "a" "b" "e"
#> ..$ : chr [1:3] "a" "c" "e"
#> $ :List of 1
#> ..$ : chr "f"
#> $ :List of 2
#> ..$ : chr [1:2] "f" "g"
#> ..$ : chr [1:2] "f" "h"
由 reprex package (v1.0.0)
于 2021-05-18 创建也可以使用此解决方案:
library(dplyr)
library(purrr)
test %>%
group_by(ID) %>%
mutate(comb = accumulate(items[-1], .init = unlist(items[1]),
~ expand.grid(.x, .y) %>%
{map2(.$Var1, .$Var2, ~ paste(.x, .y, sep = " "))} %>%
unlist())) %>%
as.data.frame()
items ID comb
1 a 1 a
2 b, c 1 a b, a c
3 d, e 1 a b d, a c d, a b e, a c e
4 f 2 f
5 g, h 2 f g, f h