重塑数据框并添加列索引

Reshape dataframe and add column index

我有一个如下所示的数据框:

df = 
           b   e   t   w   e   e   n
    [1,] 219 125 125  94 172 109 172
    [2,]  78  78 250 156 172 141 140
    [3,] 250 204 296 829 265 125 203
    [4,] 406 110 172 187  63 156 109

当我使用 melt(df) 融化它时,我得到:

df.m = 
   X1 X2 value
1   1  b   219
2   2  b    78
3   3  b   250
4   4  b   406
5   1  e   125
6   2  e    78
7   3  e   204
8   4  e   110
9   1  t   125
10  2  t   250
11  3  t   296
12  4  t   172
13  1  w    94
14  2  w   156
15  3  w   829
16  4  w   187
17  1  e   172
18  2  e   172
19  3  e   265
20  4  e    63
21  1  e   109
22  2  e   141
23  3  e   125
24  4  e   156
25  1  n   172
26  2  n   140
27  3  n   203
28  4  n   109

问题是,当我想绘制每个字母的箱线图时,它只按字母分组。在上面的示例中,有 3 个 "e",它们聚集在一起。因此,下面的公式生成下面的箱线图:

ggplot(df.m, aes(x=X2, y=value)) + 
geom_boxplot(outlier.shape=NA) + 
xlim('b','e','t','w','e','e','n')

如果我可以将一列添加到保留初始列索引的融化数据框中,则很容易制作正确的箱线图。有办法吗?

一个选项是根据“1”创建一个新列(假设数据集是ordered)得到一个逻辑向量,得到累积和,转换为字符('i1'),然后使用 ggplot 的 OP 代码,最后用 scale_x_discrete.

更改刻度线标签
library(dplyr)
library(ggplot2)
df.m %>% 
  mutate(i1 = as.character(cumsum(X1==1))) %>%
  ggplot(., aes(x=i1, y= value))+
        geom_boxplot(outlier.shape=NA) +
        scale_x_discrete(breaks= c("1", "2", "3", "4", "5", "6", "7"), 
                         labels= c("b", "e", "t", "w", "e", "e", "n"))+
        xlab(NULL)


或者我们可以将原来matrix中的列名设置为列的顺序,melt,直接用在ggplot

library(reshape2)
`colnames<-`(df, seq_len(ncol(df))) %>% 
          melt() %>% 
          ggplot(., aes(x=as.character(Var2), y= value)) + 
              geom_boxplot(outlier.shape=NA) + 
              scale_x_discrete(breaks = seq_len(ncol(df)), 
                               labels = colnames(df)) + 
              xlab(NULL)

数据

df.m <- structure(list(X1 = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 
3L, 4L), X2 = c("b", "b", "b", "b", "e", "e", "e", "e", "t", 
"t", "t", "t", "w", "w", "w", "w", "e", "e", "e", "e", "e", "e", 
"e", "e", "n", "n", "n", "n"), value = c(219L, 78L, 250L, 406L, 
125L, 78L, 204L, 110L, 125L, 250L, 296L, 172L, 94L, 156L, 829L, 
187L, 172L, 172L, 265L, 63L, 109L, 141L, 125L, 156L, 172L, 140L, 
203L, 109L)), .Names = c("X1", "X2", "value"), class = "data.frame",
        row.names = c("1", 
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28"))

df <-  structure(c(219L, 78L, 250L, 406L, 125L, 78L, 204L, 110L, 125L, 
250L, 296L, 172L, 94L, 156L, 829L, 187L, 172L, 172L, 265L, 63L, 
109L, 141L, 125L, 156L, 172L, 140L, 203L, 109L), .Dim = c(4L, 
7L), .Dimnames = list(NULL, c("b", "e", "t", "w", "e", "e", "n")))

另一个使用基本 R 绘图的解决方案:

boxplot(df)