R:将数据分成两列的组合

R: Separate data into combinations of two columns

我有一些数据,其中每个 id 由不同的 type 测量,可以有不同的值 type_val。测量值为val。一个小的虚拟数据是这样的:

df <- data.frame(id=rep(letters[1:2],6), 
             type=c(rep('t1',6), rep('t2',6)),
             type_val=rep(c(1,1,2,2,3,3),2),
             val=1:12)

则df为:

    id  type    type_val    val
1   a   t1  1   1
2   b   t1  1   2
3   a   t1  2   3
4   b   t1  2   4
5   a   t1  3   5
6   b   t1  3   6
7   a   t2  1   7
8   b   t2  1   8
9   a   t2  2   9
10  b   t2  2   10
11  a   t2  3   11
12  b   t2  3   12

我需要 spread/cast 数据,以便每个 idtypetype_val 的所有组合都是按行排列的。我认为这一定是 pkgs reshape2tidyr 的工作,但我完全没有生成除错误之外的任何内容。 结果数据结构 - 有点冗余 - 会是这样的(希望我做对了!)其中 type 对(由 type_val 的组合给出)是列 type_t1type_t2 ,它们的关联值(val in df)是 val_t1val_t2 - 列名是任意的:

    id  type_t1 type_t2 val_t1  val_t2
1   a   1   1   1   7
2   a   1   2   1   9
3   a   1   3   1   11
4   a   2   1   3   7
5   a   2   2   3   9
6   a   2   3   3   11
7   a   3   1   5   7
8   a   3   2   5   9
9   a   3   3   5   11
10  b   1   1   2   8
11  b   1   2   2   10
12  b   1   3   2   12
13  b   2   1   4   8
14  b   2   2   4   10
15  b   2   3   4   12
16  b   3   1   6   8
17  b   3   2   6   10
18  b   3   3   6   12

更新

请注意 (@Sotos)

> spread(df, type, val)
  id type_val t1 t2
1  a        1  1  7
2  a        2  3  9
3  a        3  5 11
4  b        1  2  8
5  b        2  4 10
6  b        3  6 12

不是所需的输出 - 它无法提供由 df 中的 typetype_val 组合定义的宽格式。

这个怎么样:

df1=df[df$type=="t1",]
df2=df[df$type=="t2",]

DF=merge(df1,df2,by="id")
DF=DF[,-c(2,5)]
colnames(DF)<-c("id", "type_t1", "val_t1","type_t2",   "val_t2")

这里有一些更通用的东西,可以处理任意数量的唯一 type:

library(dplyr)

# This function takes a list of dataframes (.data) and merges them by ID
reduce_merge <- function(.data, ID) {
    return(Reduce(function(x, y) merge(x, y, by = ID), .data))
}

# This function renames the cols columns in .data by appending _identifier
batch_rename <- function(.data, cols, identifier, sep = '_') {
    return(plyr::rename(.data, sapply(cols, function(x){
        x = paste(x, .data[1, identifier], sep = sep)
    })))
}

# This function creates a list of subsetted dataframes
# (subsetted by values of key),
# uses batch_rename() to give each dataframe more informative column names,
# merges them together, and returns the columns you'd like in a sensible order
multi_spread <- function(.data, grp, key, vals) {
    .data %>%
        plyr::dlply(key, subset) %>%
        lapply(batch_rename, vals, key) %>%
        reduce_merge(grp) %>%
        select(-starts_with(paste0(key, '.'))) %>%
        select(id, sort(setdiff(colnames(.), c(grp, key, vals))))
}

# Your example
df <- data.frame(id=rep(letters[1:2],6), 
                 type=c(rep('t1',6), rep('t2',6)),
                 type_val=rep(c(1,1,2,2,3,3),2),
                 val=1:12)

df %>% multi_spread('id', 'type', c('type_val', 'val'))

   id type_val_t1 type_val_t2 val_t1 val_t2
1   a           1           1      1      7
2   a           1           2      1      9
3   a           1           3      1     11
4   a           2           1      3      7
5   a           2           2      3      9
6   a           2           3      3     11
7   a           3           1      5      7
8   a           3           2      5      9
9   a           3           3      5     11
10  b           1           1      2      8
11  b           1           2      2     10
12  b           1           3      2     12
13  b           2           1      4      8
14  b           2           2      4     10
15  b           2           3      4     12
16  b           3           1      6      8
17  b           3           2      6     10
18  b           3           3      6     12

# An example with three unique values of 'type'
df <- data.frame(id = rep(letters[1:2], 9), 
                 type = c(rep('t1', 6), rep('t2', 6), rep('t3', 6)),
                 type_val = rep(c(1, 1, 2, 2, 3, 3), 3),
                 val = 1:18)

df %>% multi_spread('id', 'type', c('type_val', 'val'))

   id type_val_t1 type_val_t2 type_val_t3 val_t1 val_t2 val_t3
1   a           1           1           1      1      7     13
2   a           1           1           2      1      7     15
3   a           1           1           3      1      7     17
4   a           1           2           1      1      9     13
5   a           1           2           2      1      9     15
6   a           1           2           3      1      9     17
7   a           1           3           1      1     11     13
8   a           1           3           2      1     11     15
9   a           1           3           3      1     11     17
10  a           2           1           1      3      7     13
11  a           2           1           2      3      7     15
12  a           2           1           3      3      7     17
13  a           2           2           1      3      9     13
14  a           2           2           2      3      9     15
15  a           2           2           3      3      9     17
16  a           2           3           1      3     11     13
17  a           2           3           2      3     11     15
18  a           2           3           3      3     11     17
19  a           3           1           1      5      7     13
20  a           3           1           2      5      7     15
21  a           3           1           3      5      7     17
22  a           3           2           1      5      9     13
23  a           3           2           2      5      9     15
24  a           3           2           3      5      9     17
25  a           3           3           1      5     11     13
26  a           3           3           2      5     11     15
27  a           3           3           3      5     11     17
28  b           1           1           1      2      8     14
29  b           1           1           2      2      8     16
30  b           1           1           3      2      8     18
31  b           1           2           1      2     10     14
32  b           1           2           2      2     10     16
33  b           1           2           3      2     10     18
34  b           1           3           1      2     12     14
35  b           1           3           2      2     12     16
36  b           1           3           3      2     12     18
37  b           2           1           1      4      8     14
38  b           2           1           2      4      8     16
39  b           2           1           3      4      8     18
40  b           2           2           1      4     10     14
41  b           2           2           2      4     10     16
42  b           2           2           3      4     10     18
43  b           2           3           1      4     12     14
44  b           2           3           2      4     12     16
45  b           2           3           3      4     12     18
46  b           3           1           1      6      8     14
47  b           3           1           2      6      8     16
48  b           3           1           3      6      8     18
49  b           3           2           1      6     10     14
50  b           3           2           2      6     10     16
51  b           3           2           3      6     10     18
52  b           3           3           1      6     12     14
53  b           3           3           2      6     12     16
54  b           3           3           3      6     12     18