如何从分层分组的数据框创建构面图
How to create facet plot from hierarchically grouped data frame
我有以下小标题 s12.tbl
和 ref.tbl
。
s12.tbl <- structure(list(Sample_name = c("s1", "s2", "s1", "s2", "s1",
"s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2",
"s1", "s2", "s1", "s2"), some_score = c("0.04741", "0.09293",
"0.06210", "0.09838", "0.09606", "0.09866", "0.08851", "0.10866",
"0.05063", "0.09726", "0.39775", "0.10731", "0.05509", "0.09866",
"0.10784", "0.09398", "0.04680", "0.10007", "0.04782", "0.10408"
), category = c("real", "real", "real", "real", "real", "real",
"real", "real", "real", "real", "real", "real", "real", "real",
"real", "real", "real", "real", "real", "real")), .Names = c("Sample_name",
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
s12.tbl
#> Sample_name some_score category
#> 1 s1 0.04741 real
#> 2 s2 0.09293 real
#> 3 s1 0.06210 real
#> 4 s2 0.09838 real
#> 5 s1 0.09606 real
#> 6 s2 0.09866 real
#> 7 s1 0.08851 real
#> 8 s2 0.10866 real
#> 9 s1 0.05063 real
#> 10 s2 0.09726 real
#> 11 s1 0.39775 real
#> 12 s2 0.10731 real
#> 13 s1 0.05509 real
#> 14 s2 0.09866 real
#> 15 s1 0.10784 real
#> 16 s2 0.09398 real
#> 17 s1 0.04680 real
#> 18 s2 0.10007 real
#> 19 s1 0.04782 real
#> 20 s2 0.10408 real
ref.tbl <- structure(list(Sample_name = c("K1", "K2", "K3", "K1", "K2",
"K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1",
"K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3",
"K1", "K2", "K3"), some_score = c(0.09651, 0.09787, 0.09526,
0.09543, 0.09479, 0.09435, 0.09122, 0.09123, 0.09024, 0.09458,
0.09478, 0.09299, 0.09382, 0.0945, 0.09285, 0.10158, 0.10127,
0.09889, 0.10339, 0.10476, 0.1037, 0.13594, 0.13257, 0.14651,
0.09458, 0.09473, 0.09325, 0.09296, 0.0935, 0.09196), category = c("fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake")), .Names = c("Sample_name",
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -30L))
ref.tbl
#> Sample_name some_score category
#> 1 K1 0.09651 fake
#> 2 K2 0.09787 fake
#> 3 K3 0.09526 fake
#> 4 K1 0.09543 fake
#> 5 K2 0.09479 fake
#> 6 K3 0.09435 fake
#> 7 K1 0.09122 fake
#> 8 K2 0.09123 fake
#> 9 K3 0.09024 fake
#> 10 K1 0.09458 fake
#> 11 K2 0.09478 fake
#> 12 K3 0.09299 fake
#> 13 K1 0.09382 fake
#> 14 K2 0.09450 fake
#> 15 K3 0.09285 fake
#> 16 K1 0.10158 fake
#> 17 K2 0.10127 fake
#> 18 K3 0.09889 fake
#> 19 K1 0.10339 fake
#> 20 K2 0.10476 fake
#> 21 K3 0.10370 fake
#> 22 K1 0.13594 fake
#> 23 K2 0.13257 fake
#> 24 K3 0.14651 fake
#> 25 K1 0.09458 fake
#> 26 K2 0.09473 fake
#> 27 K3 0.09325 fake
#> 28 K1 0.09296 fake
#> 29 K2 0.09350 fake
#> 30 K3 0.09196 fake
我可以使用以下代码为 s12.tbl 中的 s1
创建一个 ECDF 图:
library(dplyr)
library(ggplot)
# Here we only pick S1,
s1.tbl <- s12.tbl %>% filter(Sample_name=="s1")
s1.tbl <- s1.tbl %>%
mutate(some_score=some_score %>% as.numeric)
ref.tbl <- ref.tbl %>%
mutate(some_score=some_score %>% as.numeric) %>%
setNames(c("Sample_name", "some_score","category"))
dat <- rbind(s1.tbl, ref.tbl)
ggplot(dat, aes(x = some_score)) +
stat_ecdf(aes(group = Sample_name, colour = category, alpha=category)) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_color_manual(values = c("black","red")) +
theme_minimal(base_size=15) +
theme(legend.title=element_blank()) +
theme(axis.text.x=element_text(angle=45, hjust=1,size=15)) +
theme(axis.text.y=element_text( hjust=1,size=10)) +
ylab("ECDF") +
xlab("Score")
产生这个:
我的问题是如何制作包含 s12.tbl
中的 s1
和 s2
的 facet wrap 版本?
也许这是最简单的调用两次 stat_ecdf
:
ggplot(mapping = aes(x = as.numeric(some_score), group = Sample_name,
colour = category, alpha = category)) +
stat_ecdf(data = mutate(s12.tbl, facet = Sample_name)) +
stat_ecdf(data = ref.tbl) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_color_manual(values = c("black","red")) +
facet_wrap(~facet) +
theme_minimal(base_size=15) +
theme(legend.title=element_blank(),
axis.text.x=element_text(angle=45, hjust=1,size=15),
axis.text.y=element_text( hjust=1,size=10)) +
ylab("ECDF") +
xlab("Score")
我有以下小标题 s12.tbl
和 ref.tbl
。
s12.tbl <- structure(list(Sample_name = c("s1", "s2", "s1", "s2", "s1",
"s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2",
"s1", "s2", "s1", "s2"), some_score = c("0.04741", "0.09293",
"0.06210", "0.09838", "0.09606", "0.09866", "0.08851", "0.10866",
"0.05063", "0.09726", "0.39775", "0.10731", "0.05509", "0.09866",
"0.10784", "0.09398", "0.04680", "0.10007", "0.04782", "0.10408"
), category = c("real", "real", "real", "real", "real", "real",
"real", "real", "real", "real", "real", "real", "real", "real",
"real", "real", "real", "real", "real", "real")), .Names = c("Sample_name",
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
s12.tbl
#> Sample_name some_score category
#> 1 s1 0.04741 real
#> 2 s2 0.09293 real
#> 3 s1 0.06210 real
#> 4 s2 0.09838 real
#> 5 s1 0.09606 real
#> 6 s2 0.09866 real
#> 7 s1 0.08851 real
#> 8 s2 0.10866 real
#> 9 s1 0.05063 real
#> 10 s2 0.09726 real
#> 11 s1 0.39775 real
#> 12 s2 0.10731 real
#> 13 s1 0.05509 real
#> 14 s2 0.09866 real
#> 15 s1 0.10784 real
#> 16 s2 0.09398 real
#> 17 s1 0.04680 real
#> 18 s2 0.10007 real
#> 19 s1 0.04782 real
#> 20 s2 0.10408 real
ref.tbl <- structure(list(Sample_name = c("K1", "K2", "K3", "K1", "K2",
"K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1",
"K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3",
"K1", "K2", "K3"), some_score = c(0.09651, 0.09787, 0.09526,
0.09543, 0.09479, 0.09435, 0.09122, 0.09123, 0.09024, 0.09458,
0.09478, 0.09299, 0.09382, 0.0945, 0.09285, 0.10158, 0.10127,
0.09889, 0.10339, 0.10476, 0.1037, 0.13594, 0.13257, 0.14651,
0.09458, 0.09473, 0.09325, 0.09296, 0.0935, 0.09196), category = c("fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake",
"fake", "fake", "fake", "fake", "fake")), .Names = c("Sample_name",
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -30L))
ref.tbl
#> Sample_name some_score category
#> 1 K1 0.09651 fake
#> 2 K2 0.09787 fake
#> 3 K3 0.09526 fake
#> 4 K1 0.09543 fake
#> 5 K2 0.09479 fake
#> 6 K3 0.09435 fake
#> 7 K1 0.09122 fake
#> 8 K2 0.09123 fake
#> 9 K3 0.09024 fake
#> 10 K1 0.09458 fake
#> 11 K2 0.09478 fake
#> 12 K3 0.09299 fake
#> 13 K1 0.09382 fake
#> 14 K2 0.09450 fake
#> 15 K3 0.09285 fake
#> 16 K1 0.10158 fake
#> 17 K2 0.10127 fake
#> 18 K3 0.09889 fake
#> 19 K1 0.10339 fake
#> 20 K2 0.10476 fake
#> 21 K3 0.10370 fake
#> 22 K1 0.13594 fake
#> 23 K2 0.13257 fake
#> 24 K3 0.14651 fake
#> 25 K1 0.09458 fake
#> 26 K2 0.09473 fake
#> 27 K3 0.09325 fake
#> 28 K1 0.09296 fake
#> 29 K2 0.09350 fake
#> 30 K3 0.09196 fake
我可以使用以下代码为 s12.tbl 中的 s1
创建一个 ECDF 图:
library(dplyr)
library(ggplot)
# Here we only pick S1,
s1.tbl <- s12.tbl %>% filter(Sample_name=="s1")
s1.tbl <- s1.tbl %>%
mutate(some_score=some_score %>% as.numeric)
ref.tbl <- ref.tbl %>%
mutate(some_score=some_score %>% as.numeric) %>%
setNames(c("Sample_name", "some_score","category"))
dat <- rbind(s1.tbl, ref.tbl)
ggplot(dat, aes(x = some_score)) +
stat_ecdf(aes(group = Sample_name, colour = category, alpha=category)) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_color_manual(values = c("black","red")) +
theme_minimal(base_size=15) +
theme(legend.title=element_blank()) +
theme(axis.text.x=element_text(angle=45, hjust=1,size=15)) +
theme(axis.text.y=element_text( hjust=1,size=10)) +
ylab("ECDF") +
xlab("Score")
产生这个:
我的问题是如何制作包含 s12.tbl
中的 s1
和 s2
的 facet wrap 版本?
也许这是最简单的调用两次 stat_ecdf
:
ggplot(mapping = aes(x = as.numeric(some_score), group = Sample_name,
colour = category, alpha = category)) +
stat_ecdf(data = mutate(s12.tbl, facet = Sample_name)) +
stat_ecdf(data = ref.tbl) +
scale_alpha_discrete(range = c(0.5, 1)) +
scale_color_manual(values = c("black","red")) +
facet_wrap(~facet) +
theme_minimal(base_size=15) +
theme(legend.title=element_blank(),
axis.text.x=element_text(angle=45, hjust=1,size=15),
axis.text.y=element_text( hjust=1,size=10)) +
ylab("ECDF") +
xlab("Score")