将 R for 循环的结果存储到对象中并使用 apply 函数绘制图形的问题

Problem with storing results of R for loop into an object and using apply function to plot graphs

问题是将 for 循环中的图形存储为 R 中的向量。

我编写了一个函数,可以通过 table 绘制图形 table(见下文)。

# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)

d1 <- data.frame(options = c("Strongly Agree", "Agree", "Disagree", "N/A",NA), foo2016 =
                   c(1, 4, 5, 6, NA), foo2017 = c(10, 7, 8, 9, NA), foo2018 = c(10, 7, 15, 14, NA))
d2 <- data.frame(options = c("options","Strongly Agree", "Agree", "Disagree", "N/A",NA),
                 foo2016 = c(11, 4, 3, 2, 1, NA), foo2017 = c(12, 6, 5, 4, 5, NA), foo2018 = c(10, 7, 6, 15, 14, NA))

mytables_in_a_list <- list(d1, d2)


x <- mytables_in_a_list

# where x = my tables in a list, n = table index in the list 
foo_graph <- function(x, n){
  tbl1 <- x[[n]]

  if(tbl1[1,1] != "Strongly Agree"){
    tbl1 <- tbl1[-1,]
  }

  #rename column
  names(tbl1) <- c("Options", "2016", "2017", "2018")

  # remove rofoo with NAs
  tbl1 <- tbl1 %>% drop_na()

  cols.num <- c("2016","2017", "2018")

  tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)

  sapply(tbl1, class)

  # alternative to removing rofoo with NAs
  # na.omit(tbl)
  mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")

  foo_graph <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
    geom_line() +
    geom_point( size=4, shape=21, fill="white")

foo_graph
}

上面的代码工作正常。然而,由于我有很多tables(大约40个),我想我可以通过使用for循环来迭代绘图来节省很多时间,这样图表(大约40) 可以存储在单个 R 对象中。我试过 for 循环(见下面的代码)但我得到的 R 对象是空的,没有错误消息。

# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)

x <- mytables_in_a_list
foo_graph <- list()
for (i in length(x)){
tbl1 <- x[[i]]

# delete table 1st row if the 1st element in the 1st row is not "Strongly Agree" 
if(tbl1[1,1] != "Strongly Agree"){
  tbl1 <- tbl1[-1,]
}

#rename column
names(tbl1) <- c("Options", "2016", "2017", "2018")

# remove rows with NAs
tbl1 <- tbl1 %>% drop_na()

# change "2016","2017", "2018" columns to numeric
cols.num <- c("2016","2017", "2018")
tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)

# melt the table
mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")

# plot the graph with ggplot
foo_graph[[i]] <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
  geom_line() +
  geom_point( size=4, shape=21, fill="white")
}

foo_graph

我也知道可以使用 lapply 函数来做同样的事情。我尝试了 mapply 因为我的函数有两个参数,但不幸的是,我得到了以下错误。

> mapply(x, foo_graph, n)
Error in get(as.character(FUN), mode = "function", envir = envir) : 
  object 'alistoftables' of mode 'function' was not found

我希望输出是存储在 R 对象中的图形,这样如果我查询对象中的第 3 个元素,例如 foo_graph[3],将显示对象中的第 3 个图形。但是,下面显示的不是预期结果。

> foo_graph[3]
[[1]]
NULL

没有您的数据,我们无法重现该行为。但这里有一些示例代码可以执行此操作。我使用 purrr::map 而不是循环或 apply 系列,但您可以将 map 替换为 lapply 并获得相同的结果。

library(tidyverse)
library(cowplot) # to plot a list of plots

# create some fake data
# make a vector for table size
sz <- 21:60

# function to make data frames
make_tbl <- function(size) {
  a <- sample(x = 1:50, size = size, replace = TRUE)
  b <- sample(x = LETTERS[1:3], size = size, replace = TRUE)
  return(tibble(a,b))
}

# a list of tables
list_of_tbls <- map(sz, make_tbl )

# function to plot
make_plot <- function(tbl) {
  ggplot(data=tbl) + geom_boxplot(aes(x=b, y=a, fill=b))
}

# make plot for all tables
list_of_plots <- map(list_of_tbls, make_plot)

# plot (all 40 if on a big screen)
cowplot::plot_grid(plotlist = list_of_plots[1:8], nrow=2)

另一种方法是按行绑定 table 并使用分面绘图。当然 tables 应该有相同的列。这里的参数 .id 将创建一个列 tbl 来跟踪 table,因此按 40 table 分面很简单。

# alternative to bind the tables if they have the same columns
bound_tbls <- bind_rows(list_of_tbls, .id = "tbl")

# then plot with facet
ggplot(bound_tbls) + geom_boxplot(aes(x=b, y=a, fill=b)) + facet_wrap("tbl", ncol=8)

编辑:使用 OP 的代码和数据。稍微修改了绘图功能以匹配虚拟数据。注意最后使用 cowplot::plot_grid 。但是如果你只是 运行 all_graphs[[graph_number]],你应该能够让图表一张一张地显示。

# library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)

d1 <-
  data.frame(
    options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
    foo2016 =
      c(1, 4, 5, 6, NA),
    foo2017 = c(10, 7, 8, 9, NA)
  )
d2 <-
  data.frame(
    options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
    foo2016 = c(11, 4, 3, 2, 1, NA),
    foo2017 = c(12, 6, 5, 4, 5, NA)
  )

mytables_in_a_list <- list(d1, d2)

# where x = my tables in a list, n = table index in the list
foo_graph <- function(x, n) {
  tbl1 <- x[[n]]

  if (tbl1[1, 1] != "Strongly Agree") {
    tbl1 <- tbl1[-1, ]
  }

  #rename column
  # edited to match input data that doesn't have 2018
  names(tbl1) <- c("Options", "2016", "2017")

  # remove rofoo with NAs
  tbl1 <- tbl1 %>% drop_na()

  # edited to match input data that doesn't have 2018
  cols.num <- c("2016", "2017")

  tbl1[cols.num] <- sapply(tbl1[cols.num], as.numeric)

  sapply(tbl1, class)

  # alternative to removing rofoo with NAs
  # na.omit(tbl)
  mdf <-
    melt(
      tbl1,
      value.name = "value",
      variable.name = "year",
      id.vars = "Options"
    )

  foo_graph <-
    ggplot(data = mdf, aes(
      x = year,
      y = value,
      group = Options,
      colour = Options
    )) +
    geom_line() +
    geom_point(size = 4,
               shape = 21,
               fill = "white")

  foo_graph
}

all_graphs <-
  lapply(1:length(x), function(i)
    foo_graph(x = mytables_in_a_list, n = i))

# plot all of them
library(cowplot)
pp <- plot_grid(plotlist = all_graphs,
          align = "hv",
          axis = "ltbr")

# to save:
# ggsave(pp, filename = "all_plots.pdf", width=10, height=5)

要查看绘制在一起的图,只需调用 pp 对象:

pp

获得 all_graphs 之后,您应该可以通过调用以下命令查看各个图:

all_graphs[[1]]

如果您只调用 all_graphs,您只会在显示中看到最后一个图 window,因为每个图都会显示并替换为以下图。在 Rstudio 中,您可以在显示窗格中向后浏览以查看列表中的先前绘图。

> all_graphs
[[1]]

[[2]]

**编辑 2:使用分面代替 cowplot。使用 40 tables 这应该会更好。尽管如此,问题是是否有办法从 40 个 table 中的每一个中 summarize/extract 有趣的信息并制作一个摘要图。而不是绘制 40 次调查的原始结果。

library(tidyverse)

d1 <-
  data.frame(
    options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
    foo2016 =
      c(1, 4, 5, 6, NA),
    foo2017 = c(10, 7, 8, 9, NA)
  )
d2 <-
  data.frame(
    options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
    foo2016 = c(11, 4, 3, 2, 1, NA),
    foo2017 = c(12, 6, 5, 4, 5, NA)
  )

mytables_in_a_list <- list(d1, d2)

# combine into a single table
mytables_df <- bind_rows(mytables_in_a_list, .id="table")

# a single chain instead of function. 
# You could make this a function, but not necessary

mytables_df %>%
  drop_na() %>%
  rename("Options" = options,
         "2016" = foo2016,
         "2017" = foo2017) %>%
  filter(Options %in% c("Strongly Agree", "Agree", "Disagree", "N/A")) %>%
# make sure the options are ordered appropriatelly
  mutate(Options = factor(Options, levels = c(
    "Strongly Agree", "Agree", "Disagree", "N/A"
  ))) %>%
# using `gather` instead of `melt`, but its the same operation
  gather("Year", "Value", -table, -Options) %>% 
  ggplot(data = ., aes(x=Year, y=Value, group=Options, color=Options)) +
  geom_line() +
  geom_point() +
  facet_wrap("table", ncol=2) +
  theme(legend.position = "top")

制作这个情节: