ggplot2 多层分面

ggplot2 facetting with multiple layers

在单个图上叠加两个 geoms 时,我无法获得正确的刻面行为。

我的输入数据由两个数据框组成。 df 包含按 variable 分组并按 short_taxa 堆叠,然后按 SampleType 分面的分类单元丰度数据。

df <- structure(list(variable = c("Subject1", "Subject1", "Subject2", 
"Subject2", "Subject5", "Subject5", "Subject7", "Subject7", "Subject8", 
"Subject8", "Subject11", "Subject11", "Subject12", "Subject12", 
"Subject14", "Subject14", "Subject15", "Subject15", "Subject18", 
"Subject18", "Subject20", "Subject20", "Subject22", "Subject22", 
"Subject24", "Subject24", "Subject25", "Subject25", "Subject28", 
"Subject28", "Subject30", "Subject30", "Subject31", "Subject31", 
"Subject32", "Subject32"), value = c(32.4137931034483, 0, 13.6363636363636, 
19.435736677116, 16.3304514889529, 27.4735830931796, 38.4180790960452, 
0.564971751412429, 33.9024390243902, 1.46341463414634, 37.2093023255814, 
5.42635658914729, 23.3175355450237, 32.7962085308057, 35.8024691358025, 
0, 14.1210374639769, 48.4149855907781, 1.86516853932584, 1.50561797752809, 
43.9490445859873, 0.955414012738854, 3.76932989690722, 88.8208762886598, 
29.6511627906977, 4.36046511627907, 32.4742268041237, 19.5876288659794, 
2.03570310053241, 91.1055433761353, 40.9356725146199, 0, 31.2335958005249, 
6.2992125984252, 35.1084812623274, 7.88954635108481), short_taxa = c("f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae", "f__Retroviridae", 
"f__Siphoviridae", "f__Retroviridae", "f__Siphoviridae"), SampleType = c("Group2", 
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2", 
"Group3", "Group3", "Group2", "Group2", "Group1", "Group1", "Group2", 
"Group2", "Group3", "Group3", "Group1", "Group1", "Group2", "Group2", 
"Group3", "Group3", "Group3", "Group3", "Group1", "Group1", "Group1", 
"Group1", "Group2", "Group2", "Group3", "Group3", "Group1", "Group1"
)), .Names = c("variable", "value", "short_taxa", "SampleType"
), row.names = c(17L, 21L, 43L, 47L, 121L, 125L, 173L, 177L, 
199L, 203L, 277L, 281L, 303L, 307L, 355L, 359L, 381L, 385L, 459L, 
463L, 511L, 515L, 563L, 567L, 615L, 619L, 641L, 645L, 719L, 723L, 
771L, 775L, 797L, 801L, 823L, 827L), class = "data.frame")

我可以像这样很好地绘制它:

ggplot(df, aes(x=variable, y=value, fill=short_taxa, group=short_taxa))
  + geom_bar(stat="identity", position="stack") + ylim(c(-10, 100)) 
  + facet_wrap(~SampleType, scales="free") + theme_classic() + 
  theme(legend.position = "right", legend.key.size = unit(1, "lines"),
  axis.text.x = element_text(angle=90, vjust=0.5), plot.margin = 
  unit(c(2, 1, 0.5, 0.5), "lines"))

现在我想要做的是在每个堆叠条形图(对应于每个 Subject)下方添加一个 geom_rect,代表该主题的密度测量值。

df.coloring <- structure(list(variable = c("Subject24", "Subject25", "Subject7", 
"Subject28", "Subject29", "Subject13", "Subject9", "Subject32", 
"Subject33", "Subject11", "Subject20", "Subject14", "Subject21", 
"Subject5", "Subject1", "Subject17", "Subject18", "Subject3"), 
    xmin = c(0.5, 1.5, 4.5, 6.5, 7.5, 10.5, 11.5, 13.5, 14.5, 
    17.5, 19.5, 21.5, 23.5, 24.5, 27.5, 29.5, 30.5, 31.5), xmax = c(1.5, 
    2.5, 5.5, 7.5, 8.5, 11.5, 12.5, 14.5, 15.5, 18.5, 20.5, 22.5, 
    24.5, 25.5, 28.5, 30.5, 31.5, 32.5), ymin = c(-6, -6, -6, 
    -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6, -6
    ), ymax = c(-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 
    -4, -4, -4, -4, -4, -4), SampleType = c("Group2", "Group3", 
    "Group1", "Group2", "Group3", "Group2", "Group1", "Group2", 
    "Group3", "Group1", "Group2", "Group3", "Group3", "Group1", 
    "Group1", "Group2", "Group3", "Group1"), density = c(0.640242130728438, 
    0.116821877425537, 0.0310043091885746, 0.0189890721812844, 
    0.974712340626866, 0.421599371824414, 0.169613848207518, 
    0.76187791978009, 0.69058098597452, 0.600862825522199, 0.671995443990454, 
    0.225653737317771, 0.911656582495198, 0.342635749839246, 
    0.138989825500175, 0.987418259494007, 0.739982327679172, 
    0.241753033129498)), .Names = c("variable", "xmin", "xmax", 
"ymin", "ymax", "SampleType", "density"), row.names = c(1L, 2L, 
5L, 7L, 8L, 11L, 12L, 14L, 15L, 18L, 20L, 22L, 24L, 25L, 28L, 
30L, 31L, 32L), class = "data.frame")

我已经尝试了各种迭代:

ggplot(df, aes(x=variable, y=value, fill=short_taxa,  order=short_taxa)) 
  + geom_bar(stat="identity", position="stack") + ylim(c(-10, 100)) 
  + geom_rect(data=df.coloring, aes(xmin=xmin, xmax=xmax, ymin=ymin, 
    ymax=ymax, color=density), fill=NA, inherit.aes=F)
  + scale_color_distiller() + facet_wrap(~SampleType, scales="free") 
  + theme_classic() + theme(legend.position = "right", 
    legend.key.size = unit(1, "lines"), axis.text.x = 
    element_text(angle=90, vjust=0.5), plot.margin = 
    unit(c(2, 1, 0.5, 0.5), "lines"))

但最终发生的是每个 SampleType 绘制所有 18 个主题,而不仅仅是属于该方面的 6 个。

我做错了什么?谢谢!

我认为在这种情况下 geom_tilegeom_rect 更有效。

来自帮助页面:

geom_rect uses the locations of the four corners (xmin, xmax, ymin and ymax).

geom_tile uses the center of the tile and its size (x, y, width, height).

通过将磁贴置于 variable 的中心,您的分面效果应该会更好,因为 geom_bargeom_tile 的 x 轴将基于相同的变量。要使用 geom_tile,您需要定义 ywidthheight。您可以将它们放入 df.coloring 而不是 yminymax

df.coloring$y = -5
df.coloring$height = 2
df.coloring$width = 1

附带说明一下,df.coloring 中的 variable 值似乎与 SampleType 中的值不匹配(与 df 中的值相比)。这使事情变得更加复杂。为了让我做的示例代码的顺序正确:

df.coloring$variable = unique(df$variable)

现在使用 geom_tile 代替 geom_rect 以获得您想要的情节:

ggplot(df, aes(x=variable, y=value, fill=short_taxa,  order=short_taxa)) + 
    geom_bar(stat="identity", position="stack") + 
    ylim(c(-10, 100)) + 
    geom_tile(data = df.coloring, aes(x = variable, y = y, width = width, 
                                    height = height, color = density), 
              fill = NA, inherit.aes = FALSE) + 
    scale_color_distiller() + 
    facet_wrap(~SampleType, scales="free_x") + 
    theme_classic() + 
    theme(legend.position = "right",
          legend.key.size = unit(1, "lines"), 
          axis.text.x = element_text(angle=90, vjust=0.5), 
          plot.margin = unit(c(2, 1, 0.5, 0.5), "lines"))

因为您希望所有图块的大小相同,所以您可以为 ywidth 等提供固定值,而不是将它们添加到数据集中。

geom_tile(data=df.coloring, aes(x = variable, y = -5, width = 1, 
                                        height = 2, color=density), 
                  fill = NA, inherit.aes = FALSE)