将 R for 循环的结果存储到对象中并使用 apply 函数绘制图形的问题
Problem with storing results of R for loop into an object and using apply function to plot graphs
问题是将 for 循环中的图形存储为 R 中的向量。
我编写了一个函数,可以通过 table 绘制图形 table(见下文)。
# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
d1 <- data.frame(options = c("Strongly Agree", "Agree", "Disagree", "N/A",NA), foo2016 =
c(1, 4, 5, 6, NA), foo2017 = c(10, 7, 8, 9, NA), foo2018 = c(10, 7, 15, 14, NA))
d2 <- data.frame(options = c("options","Strongly Agree", "Agree", "Disagree", "N/A",NA),
foo2016 = c(11, 4, 3, 2, 1, NA), foo2017 = c(12, 6, 5, 4, 5, NA), foo2018 = c(10, 7, 6, 15, 14, NA))
mytables_in_a_list <- list(d1, d2)
x <- mytables_in_a_list
# where x = my tables in a list, n = table index in the list
foo_graph <- function(x, n){
tbl1 <- x[[n]]
if(tbl1[1,1] != "Strongly Agree"){
tbl1 <- tbl1[-1,]
}
#rename column
names(tbl1) <- c("Options", "2016", "2017", "2018")
# remove rofoo with NAs
tbl1 <- tbl1 %>% drop_na()
cols.num <- c("2016","2017", "2018")
tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)
sapply(tbl1, class)
# alternative to removing rofoo with NAs
# na.omit(tbl)
mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")
foo_graph <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
geom_line() +
geom_point( size=4, shape=21, fill="white")
foo_graph
}
上面的代码工作正常。然而,由于我有很多tables(大约40个),我想我可以通过使用for
循环来迭代绘图来节省很多时间,这样图表(大约40) 可以存储在单个 R 对象中。我试过 for
循环(见下面的代码)但我得到的 R 对象是空的,没有错误消息。
# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
x <- mytables_in_a_list
foo_graph <- list()
for (i in length(x)){
tbl1 <- x[[i]]
# delete table 1st row if the 1st element in the 1st row is not "Strongly Agree"
if(tbl1[1,1] != "Strongly Agree"){
tbl1 <- tbl1[-1,]
}
#rename column
names(tbl1) <- c("Options", "2016", "2017", "2018")
# remove rows with NAs
tbl1 <- tbl1 %>% drop_na()
# change "2016","2017", "2018" columns to numeric
cols.num <- c("2016","2017", "2018")
tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)
# melt the table
mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")
# plot the graph with ggplot
foo_graph[[i]] <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
geom_line() +
geom_point( size=4, shape=21, fill="white")
}
foo_graph
我也知道可以使用 lapply 函数来做同样的事情。我尝试了 mapply 因为我的函数有两个参数,但不幸的是,我得到了以下错误。
> mapply(x, foo_graph, n)
Error in get(as.character(FUN), mode = "function", envir = envir) :
object 'alistoftables' of mode 'function' was not found
我希望输出是存储在 R 对象中的图形,这样如果我查询对象中的第 3 个元素,例如 foo_graph[3]
,将显示对象中的第 3 个图形。但是,下面显示的不是预期结果。
> foo_graph[3]
[[1]]
NULL
没有您的数据,我们无法重现该行为。但这里有一些示例代码可以执行此操作。我使用 purrr::map
而不是循环或 apply
系列,但您可以将 map
替换为 lapply
并获得相同的结果。
library(tidyverse)
library(cowplot) # to plot a list of plots
# create some fake data
# make a vector for table size
sz <- 21:60
# function to make data frames
make_tbl <- function(size) {
a <- sample(x = 1:50, size = size, replace = TRUE)
b <- sample(x = LETTERS[1:3], size = size, replace = TRUE)
return(tibble(a,b))
}
# a list of tables
list_of_tbls <- map(sz, make_tbl )
# function to plot
make_plot <- function(tbl) {
ggplot(data=tbl) + geom_boxplot(aes(x=b, y=a, fill=b))
}
# make plot for all tables
list_of_plots <- map(list_of_tbls, make_plot)
# plot (all 40 if on a big screen)
cowplot::plot_grid(plotlist = list_of_plots[1:8], nrow=2)
另一种方法是按行绑定 table 并使用分面绘图。当然 tables 应该有相同的列。这里的参数 .id
将创建一个列 tbl
来跟踪 table,因此按 40 table 分面很简单。
# alternative to bind the tables if they have the same columns
bound_tbls <- bind_rows(list_of_tbls, .id = "tbl")
# then plot with facet
ggplot(bound_tbls) + geom_boxplot(aes(x=b, y=a, fill=b)) + facet_wrap("tbl", ncol=8)
编辑:使用 OP 的代码和数据。稍微修改了绘图功能以匹配虚拟数据。注意最后使用 cowplot::plot_grid
。但是如果你只是 运行 all_graphs[[graph_number]]
,你应该能够让图表一张一张地显示。
# library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
d1 <-
data.frame(
options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 =
c(1, 4, 5, 6, NA),
foo2017 = c(10, 7, 8, 9, NA)
)
d2 <-
data.frame(
options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 = c(11, 4, 3, 2, 1, NA),
foo2017 = c(12, 6, 5, 4, 5, NA)
)
mytables_in_a_list <- list(d1, d2)
# where x = my tables in a list, n = table index in the list
foo_graph <- function(x, n) {
tbl1 <- x[[n]]
if (tbl1[1, 1] != "Strongly Agree") {
tbl1 <- tbl1[-1, ]
}
#rename column
# edited to match input data that doesn't have 2018
names(tbl1) <- c("Options", "2016", "2017")
# remove rofoo with NAs
tbl1 <- tbl1 %>% drop_na()
# edited to match input data that doesn't have 2018
cols.num <- c("2016", "2017")
tbl1[cols.num] <- sapply(tbl1[cols.num], as.numeric)
sapply(tbl1, class)
# alternative to removing rofoo with NAs
# na.omit(tbl)
mdf <-
melt(
tbl1,
value.name = "value",
variable.name = "year",
id.vars = "Options"
)
foo_graph <-
ggplot(data = mdf, aes(
x = year,
y = value,
group = Options,
colour = Options
)) +
geom_line() +
geom_point(size = 4,
shape = 21,
fill = "white")
foo_graph
}
all_graphs <-
lapply(1:length(x), function(i)
foo_graph(x = mytables_in_a_list, n = i))
# plot all of them
library(cowplot)
pp <- plot_grid(plotlist = all_graphs,
align = "hv",
axis = "ltbr")
# to save:
# ggsave(pp, filename = "all_plots.pdf", width=10, height=5)
要查看绘制在一起的图,只需调用 pp
对象:
pp
获得 all_graphs
之后,您应该可以通过调用以下命令查看各个图:
all_graphs[[1]]
如果您只调用 all_graphs
,您只会在显示中看到最后一个图 window,因为每个图都会显示并替换为以下图。在 Rstudio 中,您可以在显示窗格中向后浏览以查看列表中的先前绘图。
> all_graphs
[[1]]
[[2]]
**编辑 2:使用分面代替 cowplot
。使用 40 tables 这应该会更好。尽管如此,问题是是否有办法从 40 个 table 中的每一个中 summarize/extract 有趣的信息并制作一个摘要图。而不是绘制 40 次调查的原始结果。
library(tidyverse)
d1 <-
data.frame(
options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 =
c(1, 4, 5, 6, NA),
foo2017 = c(10, 7, 8, 9, NA)
)
d2 <-
data.frame(
options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 = c(11, 4, 3, 2, 1, NA),
foo2017 = c(12, 6, 5, 4, 5, NA)
)
mytables_in_a_list <- list(d1, d2)
# combine into a single table
mytables_df <- bind_rows(mytables_in_a_list, .id="table")
# a single chain instead of function.
# You could make this a function, but not necessary
mytables_df %>%
drop_na() %>%
rename("Options" = options,
"2016" = foo2016,
"2017" = foo2017) %>%
filter(Options %in% c("Strongly Agree", "Agree", "Disagree", "N/A")) %>%
# make sure the options are ordered appropriatelly
mutate(Options = factor(Options, levels = c(
"Strongly Agree", "Agree", "Disagree", "N/A"
))) %>%
# using `gather` instead of `melt`, but its the same operation
gather("Year", "Value", -table, -Options) %>%
ggplot(data = ., aes(x=Year, y=Value, group=Options, color=Options)) +
geom_line() +
geom_point() +
facet_wrap("table", ncol=2) +
theme(legend.position = "top")
制作这个情节:
问题是将 for 循环中的图形存储为 R 中的向量。
我编写了一个函数,可以通过 table 绘制图形 table(见下文)。
# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
d1 <- data.frame(options = c("Strongly Agree", "Agree", "Disagree", "N/A",NA), foo2016 =
c(1, 4, 5, 6, NA), foo2017 = c(10, 7, 8, 9, NA), foo2018 = c(10, 7, 15, 14, NA))
d2 <- data.frame(options = c("options","Strongly Agree", "Agree", "Disagree", "N/A",NA),
foo2016 = c(11, 4, 3, 2, 1, NA), foo2017 = c(12, 6, 5, 4, 5, NA), foo2018 = c(10, 7, 6, 15, 14, NA))
mytables_in_a_list <- list(d1, d2)
x <- mytables_in_a_list
# where x = my tables in a list, n = table index in the list
foo_graph <- function(x, n){
tbl1 <- x[[n]]
if(tbl1[1,1] != "Strongly Agree"){
tbl1 <- tbl1[-1,]
}
#rename column
names(tbl1) <- c("Options", "2016", "2017", "2018")
# remove rofoo with NAs
tbl1 <- tbl1 %>% drop_na()
cols.num <- c("2016","2017", "2018")
tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)
sapply(tbl1, class)
# alternative to removing rofoo with NAs
# na.omit(tbl)
mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")
foo_graph <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
geom_line() +
geom_point( size=4, shape=21, fill="white")
foo_graph
}
上面的代码工作正常。然而,由于我有很多tables(大约40个),我想我可以通过使用for
循环来迭代绘图来节省很多时间,这样图表(大约40) 可以存储在单个 R 对象中。我试过 for
循环(见下面的代码)但我得到的 R 对象是空的,没有错误消息。
# packages used
library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
x <- mytables_in_a_list
foo_graph <- list()
for (i in length(x)){
tbl1 <- x[[i]]
# delete table 1st row if the 1st element in the 1st row is not "Strongly Agree"
if(tbl1[1,1] != "Strongly Agree"){
tbl1 <- tbl1[-1,]
}
#rename column
names(tbl1) <- c("Options", "2016", "2017", "2018")
# remove rows with NAs
tbl1 <- tbl1 %>% drop_na()
# change "2016","2017", "2018" columns to numeric
cols.num <- c("2016","2017", "2018")
tbl1[cols.num] <- sapply(tbl1[cols.num],as.numeric)
# melt the table
mdf <- melt(tbl1, value.name="value", variable.name="year", id.vars="Options")
# plot the graph with ggplot
foo_graph[[i]] <- ggplot(data=mdf, aes(x=year, y=value, group = Options, colour = Options)) +
geom_line() +
geom_point( size=4, shape=21, fill="white")
}
foo_graph
我也知道可以使用 lapply 函数来做同样的事情。我尝试了 mapply 因为我的函数有两个参数,但不幸的是,我得到了以下错误。
> mapply(x, foo_graph, n)
Error in get(as.character(FUN), mode = "function", envir = envir) :
object 'alistoftables' of mode 'function' was not found
我希望输出是存储在 R 对象中的图形,这样如果我查询对象中的第 3 个元素,例如 foo_graph[3]
,将显示对象中的第 3 个图形。但是,下面显示的不是预期结果。
> foo_graph[3]
[[1]]
NULL
没有您的数据,我们无法重现该行为。但这里有一些示例代码可以执行此操作。我使用 purrr::map
而不是循环或 apply
系列,但您可以将 map
替换为 lapply
并获得相同的结果。
library(tidyverse)
library(cowplot) # to plot a list of plots
# create some fake data
# make a vector for table size
sz <- 21:60
# function to make data frames
make_tbl <- function(size) {
a <- sample(x = 1:50, size = size, replace = TRUE)
b <- sample(x = LETTERS[1:3], size = size, replace = TRUE)
return(tibble(a,b))
}
# a list of tables
list_of_tbls <- map(sz, make_tbl )
# function to plot
make_plot <- function(tbl) {
ggplot(data=tbl) + geom_boxplot(aes(x=b, y=a, fill=b))
}
# make plot for all tables
list_of_plots <- map(list_of_tbls, make_plot)
# plot (all 40 if on a big screen)
cowplot::plot_grid(plotlist = list_of_plots[1:8], nrow=2)
另一种方法是按行绑定 table 并使用分面绘图。当然 tables 应该有相同的列。这里的参数 .id
将创建一个列 tbl
来跟踪 table,因此按 40 table 分面很简单。
# alternative to bind the tables if they have the same columns
bound_tbls <- bind_rows(list_of_tbls, .id = "tbl")
# then plot with facet
ggplot(bound_tbls) + geom_boxplot(aes(x=b, y=a, fill=b)) + facet_wrap("tbl", ncol=8)
编辑:使用 OP 的代码和数据。稍微修改了绘图功能以匹配虚拟数据。注意最后使用 cowplot::plot_grid
。但是如果你只是 运行 all_graphs[[graph_number]]
,你应该能够让图表一张一张地显示。
# library(xlsx)
library(ggplot2)
library(tidyverse)
library(readxl)
library(ggplot2)
library(reshape2)
d1 <-
data.frame(
options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 =
c(1, 4, 5, 6, NA),
foo2017 = c(10, 7, 8, 9, NA)
)
d2 <-
data.frame(
options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 = c(11, 4, 3, 2, 1, NA),
foo2017 = c(12, 6, 5, 4, 5, NA)
)
mytables_in_a_list <- list(d1, d2)
# where x = my tables in a list, n = table index in the list
foo_graph <- function(x, n) {
tbl1 <- x[[n]]
if (tbl1[1, 1] != "Strongly Agree") {
tbl1 <- tbl1[-1, ]
}
#rename column
# edited to match input data that doesn't have 2018
names(tbl1) <- c("Options", "2016", "2017")
# remove rofoo with NAs
tbl1 <- tbl1 %>% drop_na()
# edited to match input data that doesn't have 2018
cols.num <- c("2016", "2017")
tbl1[cols.num] <- sapply(tbl1[cols.num], as.numeric)
sapply(tbl1, class)
# alternative to removing rofoo with NAs
# na.omit(tbl)
mdf <-
melt(
tbl1,
value.name = "value",
variable.name = "year",
id.vars = "Options"
)
foo_graph <-
ggplot(data = mdf, aes(
x = year,
y = value,
group = Options,
colour = Options
)) +
geom_line() +
geom_point(size = 4,
shape = 21,
fill = "white")
foo_graph
}
all_graphs <-
lapply(1:length(x), function(i)
foo_graph(x = mytables_in_a_list, n = i))
# plot all of them
library(cowplot)
pp <- plot_grid(plotlist = all_graphs,
align = "hv",
axis = "ltbr")
# to save:
# ggsave(pp, filename = "all_plots.pdf", width=10, height=5)
要查看绘制在一起的图,只需调用 pp
对象:
pp
获得 all_graphs
之后,您应该可以通过调用以下命令查看各个图:
all_graphs[[1]]
如果您只调用 all_graphs
,您只会在显示中看到最后一个图 window,因为每个图都会显示并替换为以下图。在 Rstudio 中,您可以在显示窗格中向后浏览以查看列表中的先前绘图。
> all_graphs
[[1]]
[[2]]
**编辑 2:使用分面代替 cowplot
。使用 40 tables 这应该会更好。尽管如此,问题是是否有办法从 40 个 table 中的每一个中 summarize/extract 有趣的信息并制作一个摘要图。而不是绘制 40 次调查的原始结果。
library(tidyverse)
d1 <-
data.frame(
options = c("Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 =
c(1, 4, 5, 6, NA),
foo2017 = c(10, 7, 8, 9, NA)
)
d2 <-
data.frame(
options = c("options", "Strongly Agree", "Agree", "Disagree", "N/A", NA),
foo2016 = c(11, 4, 3, 2, 1, NA),
foo2017 = c(12, 6, 5, 4, 5, NA)
)
mytables_in_a_list <- list(d1, d2)
# combine into a single table
mytables_df <- bind_rows(mytables_in_a_list, .id="table")
# a single chain instead of function.
# You could make this a function, but not necessary
mytables_df %>%
drop_na() %>%
rename("Options" = options,
"2016" = foo2016,
"2017" = foo2017) %>%
filter(Options %in% c("Strongly Agree", "Agree", "Disagree", "N/A")) %>%
# make sure the options are ordered appropriatelly
mutate(Options = factor(Options, levels = c(
"Strongly Agree", "Agree", "Disagree", "N/A"
))) %>%
# using `gather` instead of `melt`, but its the same operation
gather("Year", "Value", -table, -Options) %>%
ggplot(data = ., aes(x=Year, y=Value, group=Options, color=Options)) +
geom_line() +
geom_point() +
facet_wrap("table", ncol=2) +
theme(legend.position = "top")
制作这个情节: