如何从分层分组的数据框创建构面图

How to create facet plot from hierarchically grouped data frame

我有以下小标题 s12.tblref.tbl


s12.tbl <- structure(list(Sample_name = c("s1", "s2", "s1", "s2", "s1", 
"s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2", "s1", "s2", 
"s1", "s2", "s1", "s2"), some_score = c("0.04741", "0.09293", 
"0.06210", "0.09838", "0.09606", "0.09866", "0.08851", "0.10866", 
"0.05063", "0.09726", "0.39775", "0.10731", "0.05509", "0.09866", 
"0.10784", "0.09398", "0.04680", "0.10007", "0.04782", "0.10408"
), category = c("real", "real", "real", "real", "real", "real", 
"real", "real", "real", "real", "real", "real", "real", "real", 
"real", "real", "real", "real", "real", "real")), .Names = c("Sample_name", 
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -20L))
s12.tbl
#>    Sample_name some_score category
#> 1           s1    0.04741     real
#> 2           s2    0.09293     real
#> 3           s1    0.06210     real
#> 4           s2    0.09838     real
#> 5           s1    0.09606     real
#> 6           s2    0.09866     real
#> 7           s1    0.08851     real
#> 8           s2    0.10866     real
#> 9           s1    0.05063     real
#> 10          s2    0.09726     real
#> 11          s1    0.39775     real
#> 12          s2    0.10731     real
#> 13          s1    0.05509     real
#> 14          s2    0.09866     real
#> 15          s1    0.10784     real
#> 16          s2    0.09398     real
#> 17          s1    0.04680     real
#> 18          s2    0.10007     real
#> 19          s1    0.04782     real
#> 20          s2    0.10408     real

ref.tbl <- structure(list(Sample_name = c("K1", "K2", "K3", "K1", "K2", 
"K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", 
"K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", "K1", "K2", "K3", 
"K1", "K2", "K3"), some_score = c(0.09651, 0.09787, 0.09526, 
0.09543, 0.09479, 0.09435, 0.09122, 0.09123, 0.09024, 0.09458, 
0.09478, 0.09299, 0.09382, 0.0945, 0.09285, 0.10158, 0.10127, 
0.09889, 0.10339, 0.10476, 0.1037, 0.13594, 0.13257, 0.14651, 
0.09458, 0.09473, 0.09325, 0.09296, 0.0935, 0.09196), category = c("fake", 
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake", 
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake", 
"fake", "fake", "fake", "fake", "fake", "fake", "fake", "fake", 
"fake", "fake", "fake", "fake", "fake")), .Names = c("Sample_name", 
"some_score", "category"), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -30L))
ref.tbl
#>    Sample_name some_score category
#> 1           K1    0.09651     fake
#> 2           K2    0.09787     fake
#> 3           K3    0.09526     fake
#> 4           K1    0.09543     fake
#> 5           K2    0.09479     fake
#> 6           K3    0.09435     fake
#> 7           K1    0.09122     fake
#> 8           K2    0.09123     fake
#> 9           K3    0.09024     fake
#> 10          K1    0.09458     fake
#> 11          K2    0.09478     fake
#> 12          K3    0.09299     fake
#> 13          K1    0.09382     fake
#> 14          K2    0.09450     fake
#> 15          K3    0.09285     fake
#> 16          K1    0.10158     fake
#> 17          K2    0.10127     fake
#> 18          K3    0.09889     fake
#> 19          K1    0.10339     fake
#> 20          K2    0.10476     fake
#> 21          K3    0.10370     fake
#> 22          K1    0.13594     fake
#> 23          K2    0.13257     fake
#> 24          K3    0.14651     fake
#> 25          K1    0.09458     fake
#> 26          K2    0.09473     fake
#> 27          K3    0.09325     fake
#> 28          K1    0.09296     fake
#> 29          K2    0.09350     fake
#> 30          K3    0.09196     fake

我可以使用以下代码为 s12.tbl 中的 s1 创建一个 ECDF 图:

library(dplyr)
library(ggplot)

# Here we only pick S1, 
s1.tbl <- s12.tbl %>% filter(Sample_name=="s1")
s1.tbl <- s1.tbl %>% 
       mutate(some_score=some_score %>% as.numeric)

ref.tbl <- ref.tbl %>% 
       mutate(some_score=some_score %>% as.numeric) %>% 
       setNames(c("Sample_name", "some_score","category"))

dat <- rbind(s1.tbl, ref.tbl)


ggplot(dat, aes(x = some_score)) +
    stat_ecdf(aes(group = Sample_name, colour = category, alpha=category)) +
    scale_alpha_discrete(range = c(0.5, 1)) +
    scale_color_manual(values = c("black","red")) +
    theme_minimal(base_size=15) +
    theme(legend.title=element_blank()) +
    theme(axis.text.x=element_text(angle=45, hjust=1,size=15)) +
    theme(axis.text.y=element_text( hjust=1,size=10)) +
    ylab("ECDF")  +
    xlab("Score")

产生这个:

我的问题是如何制作包含 s12.tbl 中的 s1s2 的 facet wrap 版本?

也许这是最简单的调用两次 stat_ecdf:

ggplot(mapping = aes(x = as.numeric(some_score), group = Sample_name, 
                     colour = category, alpha = category)) +
  stat_ecdf(data = mutate(s12.tbl, facet = Sample_name)) +
  stat_ecdf(data = ref.tbl) +
  scale_alpha_discrete(range = c(0.5, 1)) +
  scale_color_manual(values = c("black","red")) +
  facet_wrap(~facet) +
  theme_minimal(base_size=15) +
  theme(legend.title=element_blank(),
        axis.text.x=element_text(angle=45, hjust=1,size=15),
        axis.text.y=element_text( hjust=1,size=10)) +
  ylab("ECDF")  +
  xlab("Score")