使用 ggplot2(或等价物)在 R 中的同一 x 轴上绘制两个数据集

Plotting two datasets across same x-axis in R using ggplot2 (or equivalent)

编辑:这已经解决了,我已经发布了我的学习和问题底部使用的代码

我想绘制跨同一 x 轴的数据集,并将第二个数据集镜像到 x 轴下方。我附上了下面的数据集。

到目前为止我已经尝试过:

#first two give me simple enough graphs
ggplot(data = plot_case) + geom_bar(mapping = aes(x= Exon, fill = Variant_Classification))
ggplot(data = plot_control) + geom_bar(mapping = aes(x= Exon, fill = Variant_Classification))

I tried to then create a negative set in the control group
plot_control$Exon <- as.numeric(plot_control$Exon) * -1
plot_all <- rbind(plot_case, plot_control)
#plotting it gives me this
ggplot(data = plot_all) + geom_col(mapping = aes(x= Exon, y=stat_count(Exon), fill = Variant_Classification))

当我真的想要第二个数据集在第一个数据集下面时,就像这张图:

非常感谢您的帮助,下面是完整的数据集

head(plot_case)

   Variant_Classification Exon
1:      Nonsense_Mutation   22
2:        Frame_Shift_Del   28
3:      Nonsense_Mutation    7
4:      Missense_Mutation   27
5:      Missense_Mutation   28
6:      Missense_Mutation   18

full: 
dput(plot_case)
    structure(list(Variant_Classification = structure(c(5L, 1L, 5L, 
    4L, 4L, 4L, 5L, 2L, 5L, 2L, 5L, 4L, 1L, 2L, 1L, 4L, 5L, 5L, 5L, 
    5L, 6L, 5L, 3L, 2L, 3L, 4L, 4L), .Label = c("Frame_Shift_Del", 
    "Frame_Shift_Ins", "In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", 
    "Splice_Site"), class = "factor"), Exon = structure(c(22L, 28L, 
    7L, 27L, 28L, 18L, 12L, 18L, 20L, 26L, 21L, 11L, 12L, 7L, 14L, 
    13L, 22L, 20L, 15L, 20L, 20L, 21L, 19L, 7L, 3L, 11L, 4L), .Label = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
    "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
    "25", "26", "27", "28", "29", "30"), class = "factor")), row.names = c(NA, 
    -27L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)


   head(plot_control)
   Variant_Classification Exon
1:      Missense_Mutation   1
2:      Missense_Mutation   1
3:      Missense_Mutation   1
4:      Missense_Mutation   1
5:      Missense_Mutation   1
6:      Missense_Mutation   1

dput(plot_control)
structure(list(Variant_Classification = structure(c(4L, 4L, 4L, 
4L, 4L, 4L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 7L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
2L, 2L, 4L, 5L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 7L, 7L, 
5L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 
4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L), .Label = c("Frame_Shift_Del", "Frame_Shift_Ins", 
"In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", 
"Splice_Site"), class = "factor"), Exon = c(1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 
8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 
11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 
12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 17, 17, 17, 17, 17, 17, 
17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 
20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 
21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 
23, 23, 23, 23, 23, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 
25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29)), row.names = c(NA, 
-456L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x56115dc87e30>)

感谢进一步的帮助,我 运行 下面建议的代码得到了下图:

我现在需要让坐标轴包含更多信息,以免丢失所有数据

在@__S 的进一步帮助下,我使用了:

plot_control %>%
  mutate(type = 'control') %>%
  bind_rows(plot_case %>% mutate(type ='case')) %>%
  group_by(Variant_Classification, Exon, type) %>%
  summarise(freq = n()) %>%
  mutate(freq = ifelse(type == 'control', -log(freq), freq)) %>%
  ggplot(aes(x= Exon, y = freq, fill = Variant_Classification)) + 
  geom_col() +
  geom_hline(yintercept = 0) 

看起来棒极了! 查看已接受的答案,问题是我的数据集在通过 ggplot 之前需要整理。我需要将它变成基于频率的 table,这是 group_by 和总结 dplyr 函数帮助完成的。

像这样的事情怎么样:

library(dplyr)

   plot_control %>%
  mutate(type = 'control') %>%
  bind_rows(plot_case %>% mutate(type ='case')) %>%
  group_by(Variant_Classification, Exon, type) %>%
  summarise(freq = n()) %>%
  #Edit2: here you join with a data.frame with all the Exon
  right_join(data.frame(Exon = c(min(.$Exon):max(.$Exon)))) %>%
  # Edit1: here you can add the log()
  mutate(freq = ifelse(type == 'control', -log(freq), freq)) %>%
  ggplot(aes(x= as.factor(Exon), y = freq, fill = Variant_Classification)) + 
  geom_col() +
  geom_hline(yintercept = 0) +
  xlab("Exon")

有数据:

plot_case <- read.table(text = '   Variant_Classification Exon
1:      Nonsense_Mutation   22
2:        Frame_Shift_Del   28
3:      Nonsense_Mutation    7
4:      Missense_Mutation   27
5:      Missense_Mutation   28
6:      Missense_Mutation   18
7:      Missense_Mutation   1', header = T)


plot_control <- read.table(text = '     Variant_Classification Exon
1:      Missense_Mutation   1
2:      Missense_Mutation   1
3:      Missense_Mutation   1
', header = T)