如何添加一个循环来制作多个 "two-group" 散点图，然后自动为每个图中的两组提供相同的 y 轴限制？

Question

我这里有 2 个相互关联的问题：

(1) 我有兴趣为数百个基因制作 2 个散点图（左侧为 Group1，右侧为 Group2）。具有这两个图的每个基因都将打印在单独的 png 文件中。我已经可以做到'per'基因了。但是，我有数百个基因，因此想为每个基因添加一个循环。我在下面给出了 2 个基因的示例数据集：Gene1 和 Gene2。

(2) 事情对我来说有点复杂，因为每个基因的 y 轴刻度都不同。我知道将 "scale_y_continuous(limits = c(0,0.07))" 添加到 Group1 和 Group2 以使比例在单个 png 文件中具有可比性。但是，当运行一个循环有数百个基因时，如何在处理每个基因的不同尺度时让y轴相同？

这是我的示例数据框 'data'。

Biomarkers  TimePoint   Groups  Scale   Readings
Gene1   52.5    Group1  10  0.021066
Gene1   52.5    Group2  10  0.019844
Gene1   57.5    Group1  10  0.024328
Gene1   57.5    Group2  10  0.016862
Gene1   62.5    Group1  10  0.023408
Gene1   62.5    Group2  10  0.018715
Gene1   52.5    Group1  25  0.0271
Gene1   52.5    Group2  25  0.0240525
Gene1   57.5    Group1  25  0.0289975
Gene1   57.5    Group2  25  0.023015
Gene1   62.5    Group1  25  0.029795
Gene1   62.5    Group2  25  0.0235975
Gene1   52.5    Group1  50  0.03457
Gene1   52.5    Group2  50  0.02917
Gene1   57.5    Group1  50  0.039255
Gene1   57.5    Group2  50  0.03051
Gene1   62.5    Group1  50  0.037835
Gene1   62.5    Group2  50  0.029515
Gene1   52.5    Group1  75  0.0446
Gene1   52.5    Group2  75  0.0382675
Gene1   57.5    Group1  75  0.047695
Gene1   57.5    Group2  75  0.0411
Gene1   62.5    Group1  75  0.04727
Gene1   62.5    Group2  75  0.04001
Gene1   52.5    Group1  90  0.056192
Gene1   52.5    Group2  90  0.046091
Gene1   57.5    Group1  90  0.062609
Gene1   57.5    Group2  90  0.04831
Gene1   62.5    Group1  90  0.059525
Gene1   62.5    Group2  90  0.051676
Gene2   52.5    Group1  10  0.019224
Gene2   52.5    Group2  10  0.012512
Gene2   57.5    Group1  10  0.019476
Gene2   57.5    Group2  10  0.017973
Gene2   62.5    Group1  10  0.022266
Gene2   62.5    Group2  10  0.018438
Gene2   52.5    Group1  25  0.02633
Gene2   52.5    Group2  25  0.02284
Gene2   57.5    Group1  25  0.02686
Gene2   57.5    Group2  25  0.023725
Gene2   62.5    Group1  25  0.030415
Gene2   62.5    Group2  25  0.0247175
Gene2   52.5    Group1  50  0.03813
Gene2   52.5    Group2  50  0.03047
Gene2   57.5    Group1  50  0.0384
Gene2   57.5    Group2  50  0.031035
Gene2   62.5    Group1  50  0.04037
Gene2   62.5    Group2  50  0.03219
Gene2   52.5    Group1  75  0.05681
Gene2   52.5    Group2  75  0.04059
Gene2   57.5    Group1  75  0.05618
Gene2   57.5    Group2  75  0.039695
Gene2   62.5    Group1  75  0.05748
Gene2   62.5    Group2  75  0.0429125
Gene2   52.5    Group1  90  0.090266
Gene2   52.5    Group2  90  0.059884
Gene2   57.5    Group1  90  0.092606
Gene2   57.5    Group2  90  0.053052
Gene2   62.5    Group1  90  0.088748
Gene2   62.5    Group2  90  0.0571

我的数据输入是：

> dput(data)
structure(list(Biomarkers = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Gene1", "Gene2"), class = "factor"), 
    TimePoint = c(52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 
    57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 
    52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 57.5, 57.5, 
    62.5, 62.5, 52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 
    57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 
    52.5, 52.5, 57.5, 57.5, 62.5, 62.5, 52.5, 52.5, 57.5, 57.5, 
    62.5, 62.5), Groups = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), .Label = c("Group1", 
    "Group2"), class = "factor"), Scale = c(10L, 10L, 10L, 10L, 
    10L, 10L, 25L, 25L, 25L, 25L, 25L, 25L, 50L, 50L, 50L, 50L, 
    50L, 50L, 75L, 75L, 75L, 75L, 75L, 75L, 90L, 90L, 90L, 90L, 
    90L, 90L, 10L, 10L, 10L, 10L, 10L, 10L, 25L, 25L, 25L, 25L, 
    25L, 25L, 50L, 50L, 50L, 50L, 50L, 50L, 75L, 75L, 75L, 75L, 
    75L, 75L, 90L, 90L, 90L, 90L, 90L, 90L), Readings = c(0.021066, 
    0.019844, 0.024328, 0.016862, 0.023408, 0.018715, 0.0271, 
    0.0240525, 0.0289975, 0.023015, 0.029795, 0.0235975, 0.03457, 
    0.02917, 0.039255, 0.03051, 0.037835, 0.029515, 0.0446, 0.0382675, 
    0.047695, 0.0411, 0.04727, 0.04001, 0.056192, 0.046091, 0.062609, 
    0.04831, 0.059525, 0.051676, 0.019224, 0.012512, 0.019476, 
    0.017973, 0.022266, 0.018438, 0.02633, 0.02284, 0.02686, 
    0.023725, 0.030415, 0.0247175, 0.03813, 0.03047, 0.0384, 
    0.031035, 0.04037, 0.03219, 0.05681, 0.04059, 0.05618, 0.039695, 
    0.05748, 0.0429125, 0.090266, 0.059884, 0.092606, 0.053052, 
    0.088748, 0.0571)), class = "data.frame", row.names = c(NA, 
-60L))

这是我能够为一个 png 文件的每个基因（Group1 和 Group2）制作的代码。

# Load libraries
library(ggplot2)
library(magrittr)
library(dplyr)
library(gridExtra)
library(grid)

# Data select
data_select <- data[ data$Biomarkers %in% "Gene1", ]
data_Group1 <- data_select[ data_select$Groups %in% "Group1", ]
data_Group2 <- data_select[ data_select$Groups %in% "Group2", ]

png ("ScatterPlot_Gene1.png", height=600, width=1111)

# Group1
graph_Group1 <- data_Group1 %>%
    ggplot(aes(x = data_Group1$TimePoint,
        y = data_Group1$Readings,
        group = data_Group1$Scale)) +
    labs(title="Group1", x="Time point", y="Readings") +
    scale_x_continuous(breaks = c(42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5),
        labels = c("1", "2", "3", "4", "5", "6", "7")) +
    geom_line(aes(color = data_Group1$Scale), na.rm = TRUE) +
    geom_point(aes(color = data_Group1$Scale),
                size = 1.5,
                na.rm = TRUE) +
    scale_color_continuous(name = "Scale",
        breaks = c(5, 10, 25, 50, 75, 90)) +
    theme(legend.key.height = unit(3.5, "cm"))
    scale_y_continuous(limits = c(0,0.07))

# Group2
graph_Group2 <- data_Group2 %>%
    ggplot(aes(x = data_Group2$TimePoint,
        y = data_Group2$Readings,
        group = data_Group2$Scale)) +
    labs(title="Group2", x="Time point", y="Readings") +
    scale_x_continuous(breaks = c(42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5),
        labels = c("1", "2", "3", "4", "5", "6", "7")) +
    geom_line(aes(color = data_Group2$Scale), na.rm = TRUE) +
    geom_point(aes(color = data_Group2$Scale),
                size = 1.5,
                na.rm = TRUE) +
    scale_color_continuous(name = "Scale",
        breaks = c(5, 10, 25, 50, 75, 90)) +
    theme(legend.key.height = unit(3.5, "cm"))
    scale_y_continuous(limits = c(0,0.07))

grid.arrange(graph_Group1, graph_Group2, nrow = 1, top=textGrob("Gene1",gp=gpar(fontsize=20)))
dev.off()

这是我现在的身材。

感谢任何帮助。谢谢。

Answer 1

考虑在方法中概括您的绘图过程并调用 by（tapply 的面向对象包装器）以迭代运行对每个独特基因的操作。对于比例，预先计算两组之间的最小和最大读数：

用户自定义函数

proc_plot <- function(sub) {

  data_Group1 <- sub[sub$Groups == "Group1", ]
  data_Group2 <- sub[sub$Groups == "Group2", ]

  min_rdg <- min(data_Group1$Readings, data_Group2$Readings)
  max_rdg <- max(data_Group1$Readings, data_Group2$Readings)

  # Group1
  graph_Group1 <- ggplot(data_Group1, aes(x = TimePoint, y = Readings, group = Scale)) +
    labs(title="Group1", x="Time point", y="Readings") +
    scale_x_continuous(breaks = c(42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5),
                       labels = c("1", "2", "3", "4", "5", "6", "7")) +
    geom_line(aes(color = Scale), na.rm = TRUE) +
    geom_point(aes(color = Scale),size = 1.5, na.rm = TRUE) +
    scale_color_continuous(name = "Scale", breaks = c(5, 10, 25, 50, 75, 90)) +
    scale_y_continuous(limits = c(min_rdg, max_rdg)) +
    theme(legend.key.height = unit(3.5, "cm"))

  # Group2
  graph_Group2 <- ggplot(data_Group2, aes(x = TimePoint, y = Readings, group = Scale)) +
    labs(title="Group2", x="Time point", y="Readings") +
    scale_x_continuous(breaks = c(42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5),
                       labels = c("1", "2", "3", "4", "5", "6", "7")) +
    geom_line(aes(color = Scale), na.rm = TRUE) +
    geom_point(aes(color = Scale), size = 1.5, na.rm = TRUE) +
    scale_color_continuous(name = "Scale", breaks = c(5, 10, 25, 50, 75, 90)) +
    scale_y_continuous(limits = c(min_rdg, max_rdg)) +
    theme(legend.key.height = unit(3.5, "cm"))

  png (paste0("ScatterPlot_", sub$Biomarkers[[1]], ".png"), height=600, width=1111)
    output <- grid.arrange(graph_Group1, graph_Group2, nrow = 1, 
                           top=textGrob(sub$Biomarkers[[1]], gp=gpar(fontsize=20)))
  dev.off()

  return(output)
}

剧情调用

# BUILD PLOT LIST AND PNG FILES
plot_list <- by(data, data$Biomarkers, proc_plot)

显示保存的绘图

dev.off()
grid.draw(plot_list$Gene1)

dev.off()
grid.draw(plot_list$Gene2)

Answer 2

您可以通过 gene group 您的数据，为两个（或更多）制作 plots ) 使用 facet_wrap() 对 grouping gene 进行分组（这也根据组内 Readings 值的范围调整 y 限制），并使用简单和保存图清理管道：

dir.create(temp <- tempfile())                            

dat %>%
  group_by(Biomarkers) %>%
  group_walk(
    ~ ggsave(
      paste0(.y, '.png'), 
      ggplot(
        .x,
        aes(
          x      = TimePoint,
          y      = Readings,
          colour = Scale,
          group  = Scale
          )
        ) +
        geom_line() +
        geom_point(size = 1.5) +
        facet_wrap( ~ Groups, nrow = 1) +
        ggthemes::theme_few() +
        scale_x_continuous(
          breaks = c(42.5, 47.5, 52.5, 57.5, 62.5, 67.5, 72.5),
          labels = c("1", "2", "3", "4", "5", "6", "7")
        ) +
        scale_color_viridis_c(breaks = c(5, 10, 25, 50, 75, 90)) +
        theme(legend.key.height = unit(0.155, 'npc')) +
        ggtitle(.y), 
      device = 'png',
      path = temp
      )
  ) %>% 
  invisible()

# Saving 6.96 x 6.02 in image
# Saving 6.96 x 6.02 in image  

list.files(temp)

# [1] "Gene1.png" "Gene2.png"

阅读情节：

imgs <- lapply(
  list.files(temp),
  function(i){
    png::readPNG(file.path(temp, i))
  }
)

查看 Gene1 的图表：

grid::grid.raster(imgs[[1]])

查看 Gene2 的情节：

grid::grid.raster(imgs[[2]])

如何添加一个循环来制作多个 "two-group" 散点图，然后自动为每个图中的两组提供相同的 y 轴限制？

How to add a loop to make multiple "two-group" scatter plots, and then automatically give same y-axis limits to both groups in each plot?

loops

for-loop

r

yaxis