R data.table 将向量列分解为行

R data.table explode vector column to rows

我有一个 data.table 和一个向量,我想将其添加为 dt 的一列 - 具体来说,将向量的值“分解”为 dt 的行。

编辑:tidyr::expand_gridtidyr::crossing 给出了想要的结果

下面的代码给了我想要的,但感觉应该有更简单的方法来实现。

想要的结果

(将example_data转换为modified_data

library(data.table)

example_data <- data.table(id = c("a", "b", "c"), value=c(1,2,3))
print(example_data)
#>        id value
#>    <char> <num>
#> 1:      a     1
#> 2:      b     2
#> 3:      c     3

explode_vector <- c("foo", "bar")

dt_list <- list()
for (val in explode_vector) {
    temp_df <- copy(example_data)
    temp_df[, new_val := eval(val)]
    dt_list <- append(dt_list, list(temp_df))
}

modified_data <- rbindlist(dt_list)
print(modified_data)
#>        id value new_val
#>    <char> <num>  <char>
#> 1:      a     1     foo
#> 2:      b     2     foo
#> 3:      c     3     foo
#> 4:      a     1     bar
#> 5:      b     2     bar
#> 6:      c     3     bar

例如感觉下面两个选项之一应该有效,但效果不佳。有人对此有建议吗?

# Some examples that I feel should work
example_data[, new_val := eval(explode_vector), by=id]
cbind(example_data, data.table(new_val=explode_vector))

编辑:使用重复的 id 值进行扩展

例如,可能有多个标识键(例如时间序列观察),因此主要 id 是重复的。

library(data.table)

example_data <- data.table(id = c("a", "a", "b", "c"), date=c(0, 1, 0, 1), value=c(1,2,3,1))
print(example_data)
       id  date value
   <char> <num> <num>
1:      a     0     1
2:      a     1     2
3:      b     0     3
4:      c     1     1

explode_vector <- c("foo", "bar")

# A solution using the CJ method
example_data[, CJ(id, explode_vector, unique=TRUE)][example_data, on = .(id)]
       id explode_vector  date value
   <char>         <char> <num> <num>
1:      a            bar     0     1
2:      a            foo     0     1
3:      a            bar     1     2
4:      a            foo     1     2
5:      b            bar     0     3
6:      b            foo     0     3
7:      c            bar     1     1
8:      c            foo     1     1

# A solution using the expand_grid method (simpler in my opinion but requires 
# converting back to `data.table` if trying to stick to `data.table`)
# I also prefer that it retains the original column order.
data.table(expand_grid(example_data, explode_vector))
       id  date value explode_vector
   <char> <num> <num>         <char>
1:      a     0     1            foo
2:      a     0     1            bar
3:      a     1     2            foo
4:      a     1     2            bar
5:      b     0     3            foo
6:      b     0     3            bar
7:      c     1     1            foo
8:      c     1     1            bar

您可以使用 tidyr 包中的 expand_grid

library(tidyr)
library(data.table)

example_data <- data.table(id = c("a", "b", "c"), value=c(1,2,3))
explode_vector <- c("foo", "bar")
tidyr::expand_grid(example_data, explode_vector)

# A tibble: 6 x 3
#   id    value explode_vector
#   <chr> <dbl> <chr>
# 1 a         1 foo
# 2 a         1 bar
# 3 b         2 foo
# 4 b         2 bar
# 5 c         3 foo
# 6 c         3 bar

data.table中一个选项是CJ

library(data.table)
example_data[, CJ(id, explode_vector)][example_data, on = .(id)]

-输出

       id explode_vector value
   <char>         <char> <num>
1:      a            bar     1
2:      a            foo     1
3:      b            bar     2
4:      b            foo     2
5:      c            bar     3
6:      c            foo     3

如果是多列,我们可能会用crossing

library(tidyr)
crossing(example_data[, ..id_cols], explode_vector)