重塑数据框并添加列索引
Reshape dataframe and add column index
我有一个如下所示的数据框:
df =
b e t w e e n
[1,] 219 125 125 94 172 109 172
[2,] 78 78 250 156 172 141 140
[3,] 250 204 296 829 265 125 203
[4,] 406 110 172 187 63 156 109
当我使用 melt(df) 融化它时,我得到:
df.m =
X1 X2 value
1 1 b 219
2 2 b 78
3 3 b 250
4 4 b 406
5 1 e 125
6 2 e 78
7 3 e 204
8 4 e 110
9 1 t 125
10 2 t 250
11 3 t 296
12 4 t 172
13 1 w 94
14 2 w 156
15 3 w 829
16 4 w 187
17 1 e 172
18 2 e 172
19 3 e 265
20 4 e 63
21 1 e 109
22 2 e 141
23 3 e 125
24 4 e 156
25 1 n 172
26 2 n 140
27 3 n 203
28 4 n 109
问题是,当我想绘制每个字母的箱线图时,它只按字母分组。在上面的示例中,有 3 个 "e",它们聚集在一起。因此,下面的公式生成下面的箱线图:
ggplot(df.m, aes(x=X2, y=value)) +
geom_boxplot(outlier.shape=NA) +
xlim('b','e','t','w','e','e','n')
如果我可以将一列添加到保留初始列索引的融化数据框中,则很容易制作正确的箱线图。有办法吗?
一个选项是根据“1”创建一个新列(假设数据集是order
ed)得到一个逻辑向量,得到累积和,转换为字符('i1'),然后使用 ggplot
的 OP 代码,最后用 scale_x_discrete
.
更改刻度线标签
library(dplyr)
library(ggplot2)
df.m %>%
mutate(i1 = as.character(cumsum(X1==1))) %>%
ggplot(., aes(x=i1, y= value))+
geom_boxplot(outlier.shape=NA) +
scale_x_discrete(breaks= c("1", "2", "3", "4", "5", "6", "7"),
labels= c("b", "e", "t", "w", "e", "e", "n"))+
xlab(NULL)
或者我们可以将原来matrix
中的列名设置为列的顺序,melt
,直接用在ggplot
上
library(reshape2)
`colnames<-`(df, seq_len(ncol(df))) %>%
melt() %>%
ggplot(., aes(x=as.character(Var2), y= value)) +
geom_boxplot(outlier.shape=NA) +
scale_x_discrete(breaks = seq_len(ncol(df)),
labels = colnames(df)) +
xlab(NULL)
数据
df.m <- structure(list(X1 = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), X2 = c("b", "b", "b", "b", "e", "e", "e", "e", "t",
"t", "t", "t", "w", "w", "w", "w", "e", "e", "e", "e", "e", "e",
"e", "e", "n", "n", "n", "n"), value = c(219L, 78L, 250L, 406L,
125L, 78L, 204L, 110L, 125L, 250L, 296L, 172L, 94L, 156L, 829L,
187L, 172L, 172L, 265L, 63L, 109L, 141L, 125L, 156L, 172L, 140L,
203L, 109L)), .Names = c("X1", "X2", "value"), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28"))
df <- structure(c(219L, 78L, 250L, 406L, 125L, 78L, 204L, 110L, 125L,
250L, 296L, 172L, 94L, 156L, 829L, 187L, 172L, 172L, 265L, 63L,
109L, 141L, 125L, 156L, 172L, 140L, 203L, 109L), .Dim = c(4L,
7L), .Dimnames = list(NULL, c("b", "e", "t", "w", "e", "e", "n")))
另一个使用基本 R 绘图的解决方案:
boxplot(df)
我有一个如下所示的数据框:
df =
b e t w e e n
[1,] 219 125 125 94 172 109 172
[2,] 78 78 250 156 172 141 140
[3,] 250 204 296 829 265 125 203
[4,] 406 110 172 187 63 156 109
当我使用 melt(df) 融化它时,我得到:
df.m =
X1 X2 value
1 1 b 219
2 2 b 78
3 3 b 250
4 4 b 406
5 1 e 125
6 2 e 78
7 3 e 204
8 4 e 110
9 1 t 125
10 2 t 250
11 3 t 296
12 4 t 172
13 1 w 94
14 2 w 156
15 3 w 829
16 4 w 187
17 1 e 172
18 2 e 172
19 3 e 265
20 4 e 63
21 1 e 109
22 2 e 141
23 3 e 125
24 4 e 156
25 1 n 172
26 2 n 140
27 3 n 203
28 4 n 109
问题是,当我想绘制每个字母的箱线图时,它只按字母分组。在上面的示例中,有 3 个 "e",它们聚集在一起。因此,下面的公式生成下面的箱线图:
ggplot(df.m, aes(x=X2, y=value)) +
geom_boxplot(outlier.shape=NA) +
xlim('b','e','t','w','e','e','n')
如果我可以将一列添加到保留初始列索引的融化数据框中,则很容易制作正确的箱线图。有办法吗?
一个选项是根据“1”创建一个新列(假设数据集是order
ed)得到一个逻辑向量,得到累积和,转换为字符('i1'),然后使用 ggplot
的 OP 代码,最后用 scale_x_discrete
.
library(dplyr)
library(ggplot2)
df.m %>%
mutate(i1 = as.character(cumsum(X1==1))) %>%
ggplot(., aes(x=i1, y= value))+
geom_boxplot(outlier.shape=NA) +
scale_x_discrete(breaks= c("1", "2", "3", "4", "5", "6", "7"),
labels= c("b", "e", "t", "w", "e", "e", "n"))+
xlab(NULL)
或者我们可以将原来matrix
中的列名设置为列的顺序,melt
,直接用在ggplot
library(reshape2)
`colnames<-`(df, seq_len(ncol(df))) %>%
melt() %>%
ggplot(., aes(x=as.character(Var2), y= value)) +
geom_boxplot(outlier.shape=NA) +
scale_x_discrete(breaks = seq_len(ncol(df)),
labels = colnames(df)) +
xlab(NULL)
数据
df.m <- structure(list(X1 = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L), X2 = c("b", "b", "b", "b", "e", "e", "e", "e", "t",
"t", "t", "t", "w", "w", "w", "w", "e", "e", "e", "e", "e", "e",
"e", "e", "n", "n", "n", "n"), value = c(219L, 78L, 250L, 406L,
125L, 78L, 204L, 110L, 125L, 250L, 296L, 172L, 94L, 156L, 829L,
187L, 172L, 172L, 265L, 63L, 109L, 141L, 125L, 156L, 172L, 140L,
203L, 109L)), .Names = c("X1", "X2", "value"), class = "data.frame",
row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28"))
df <- structure(c(219L, 78L, 250L, 406L, 125L, 78L, 204L, 110L, 125L,
250L, 296L, 172L, 94L, 156L, 829L, 187L, 172L, 172L, 265L, 63L,
109L, 141L, 125L, 156L, 172L, 140L, 203L, 109L), .Dim = c(4L,
7L), .Dimnames = list(NULL, c("b", "e", "t", "w", "e", "e", "n")))
另一个使用基本 R 绘图的解决方案:
boxplot(df)