如何'reverse melt'一个data.frame?

How to 'reverse melt' a data.frame?

我有 data.frame df1(见下面的代码)。我想将它转换成 df2 的样子(见下面的代码)。

也许这可以用 reshape castreverse melt 来完成?但是我不明白这些功能。有人可以帮忙吗?

 df1 <- data.frame(
   stringsAsFactors = FALSE,
                    sample = c("a","a","a",
                               "a","b","c","c","c","c","c","c","c","c",
                               "d","d","e","e","e","g","g"),
                    LETTER = c("P","R","V",
                               "Y","Q","Q","R","S","T","U","W","X","Z",
                               "Q","X","Q","V","X","Q","T")
        )

 df2 <- data.frame(
   stringsAsFactors = FALSE,
             sample = c("a", "b", "c", "d", "e", "f", "g"),
                  P = c(1L, 0L, 0L, 0L, 0L, 0L, 0L),
                  Q = c(0L, 1L, 1L, 1L, 1L, 0L, 1L),
                  R = c(1L, 0L, 1L, 0L, 0L, 0L, 0L),
                  S = c(0L, 0L, 1L, 0L, 0L, 0L, 0L),
                  T = c(0L, 0L, 1L, 0L, 0L, 0L, 1L),
                  U = c(0L, 0L, 1L, 0L, 0L, 0L, 0L),
                  V = c(1L, 0L, 0L, 0L, 1L, 0L, 0L),
                  W = c(0L, 0L, 1L, 0L, 0L, 0L, 0L),
                  X = c(0L, 0L, 1L, 1L, 1L, 0L, 0L),
                  Y = c(1L, 0L, 0L, 0L, 0L, 0L, 0L),
                  Z = c(0L, 0L, 1L, 0L, 0L, 0L, 0L)
        )

编辑

有人建议我看这个post: How to reshape data from long to wide format。不幸的是,这并没有回答我的问题。等效代码如下并抛出以下错误。

 df2 <- reshape(df, idvar = "sample", timevar = "LETTER", direction = "wide")
 Error in data[, timevar] : object of type 'closure' is not subsettable

首先使用 df1$value <- 1L 添加第三个变量也没有解决它。

请注意,在我的数据中,数据的长度和宽度之间没有完全匹配,这与上述 post 不同。请提供任何帮助。

您可以使用 table() 创建频率 table,并将结果转换为 data.frame。

x <- table(df1$sample, df1$LETTER)
df2 <- cbind(data.frame(sample = rownames(x)), as.data.frame.matrix(x))

sample P Q R S T U V W X Y Z
a      a 1 0 1 0 0 0 1 0 0 1 0
b      b 0 1 0 0 0 0 0 0 0 0 0
c      c 0 1 1 1 1 1 0 1 1 0 1
d      d 0 1 0 0 0 0 0 0 1 0 0
e      e 0 1 0 0 0 0 1 0 1 0 0
g      g 0 1 0 0 1 0 0 0 0 0 0

如果您想在输出中包含 sample = f(不存在于 df1 中),您可以在调用 table() 之前将缺失值作为因子水平添加到 df$sample:

df1$sample <- factor(df1$sample, levels = letters[1:7])
x <- table(df1$sample2, df1$LETTER)
cbind(data.frame(sample = rownames(x)), as.data.frame.matrix(x))

  sample P Q R S T U V W X Y Z
a      a 1 0 1 0 0 0 1 0 0 1 0
b      b 0 1 0 0 0 0 0 0 0 0 0
c      c 0 1 1 1 1 1 0 1 1 0 1
d      d 0 1 0 0 0 0 0 0 1 0 0
e      e 0 1 0 0 0 0 1 0 1 0 0
f      f 0 0 0 0 0 0 0 0 0 0 0
g      g 0 1 0 0 1 0 0 0 0 0 0

您可以创建一个虚拟列并以宽格式获取数据:

library(dplyr)

df1 %>%
  mutate(n = 1) %>%
  tidyr::pivot_wider(names_from = LETTER, values_from = n, values_fill = 0)

#  sample     P     R     V     Y     Q     S     T     U     W     X     Z
#  <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 a          1     1     1     1     0     0     0     0     0     0     0
#2 b          0     0     0     0     1     0     0     0     0     0     0
#3 c          0     1     0     0     1     1     1     1     1     1     1
#4 d          0     0     0     0     1     0     0     0     0     1     0
#5 e          0     0     1     0     1     0     0     0     0     1     0
#6 g          0     0     0     0     1     0     1     0     0     0     0

或在data.table中:

library(data.table)
setDT(df1)[, n := 1]
dcast(df1, sample~LETTER, value.var = 'n', fill = 0)