根据列名创建多个图表
Creating multiple graphs based upon the column names
这是我在 stackoverlow 上的第一个问题,如果我没有遵循正确的问题协议,请纠正我。
我正在尝试为在三个时间点(时间 1、时间 2、时间 3)收集的数据创建一些图表,这些时间点等于 X1...、X2... 和 X3...列名的开头。这些图也由数据框的 $Group 列分隔。
我创建图表没问题,我只有很多变量 (~170),我想比较时间 1 与时间 2、时间 2 与时间 3 等。所以我想找一个捷径运行 这种代码,而不必逐个输入。
如上所述,我创建了像 X1...X2... 这样的变量名,它表示变量被记录的时间,即 X1BCSTCAT = time 1; X2BCSTCAT = 时间 2; X3BCSTCAT = 时间 3。这是我的数据的一小部分示例:
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
这里是一些工作代码,使用 ggplot 为一个变量的时间 1 与时间 2 数据创建一个图表:
library(ggplot2)
p <- ggplot(df, aes(x=df$X1BCSTCAT, y=df$X2BCSTCAT, shape = df$Group, color = df$Group)) +
geom_point() + geom_smooth(method=lm, aes(fill=df$Group), fullrange = TRUE) +
labs(title="BCSTCAT", x="Time 1", y = "Time 2") +
scale_color_manual(name = "Group",labels = c("C8","TC"),values = c("blue", "red")) +
scale_shape_manual(name = "Group",labels = c("C8","TC"),values = c(16, 17)) +
scale_fill_manual(name = "Group",labels = c("C8", "TC"),values = c("light blue", "pink"))
所以我真的在尝试创建某种快捷方式,R 将在其中循环并匹配变量名称 X1... 与 X2... 等等并创建图形。我假设必须有某种方法可以根据匹配的列号进行绘制,例如df[7] vs df[10] 并迭代此过程或通过实际匹配名称进行绘图(其中变量名称的唯一区别是表示时间的数字)。
我以前循环使用 lapply
函数创建单独的图表,但不知道从哪里开始尝试做这个。
使用tidyeval
方法的解决方案。我们需要 ggplot2 v3.0.0
(记得重启你的 R 会话)
install.packages("ggplot2", dependencies = TRUE)
首先我们构建一个将列名和组名作为输入的函数。注意 rlang::sym
、rlang::quo_name
& !!
.
的使用
然后为 x-
和 y-
值创建 2 个名称向量,以便我们可以使用 purrr::map2
同时循环遍历它们。
library(rlang)
library(tidyverse)
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
# define a function that accept strings as input
pair_plot <- function(x_var, y_var, group_var) {
# convert strings to symbols
x_var <- rlang::sym(x_var)
y_var <- rlang::sym(y_var)
group_var <- rlang::sym(group_var)
# unquote symbols using !!
ggplot(df, aes(x = !! x_var, y = !! y_var, shape = !! group_var, color = !! group_var)) +
geom_point() + geom_smooth(method = lm, aes(fill = !! group_var), fullrange = TRUE) +
labs(title = "BCSTCAT", x = rlang::quo_name(x_var), y = rlang::quo_name(y_var)) +
scale_color_manual(name = "Group", labels = c("C8", "TC"), values = c("blue", "red")) +
scale_shape_manual(name = "Group", labels = c("C8", "TC"), values = c(16, 17)) +
scale_fill_manual(name = "Group", labels = c("C8", "TC"), values = c("light blue", "pink")) +
theme_bw()
}
# Test if the new function works
pair_plot("X1BCSTCAT", "X2BCSTCAT", "Group")
# Create 2 parallel lists
list_x <- colnames(df)[-c(1:6, (ncol(df)-2):(ncol(df)))]
list_x
#> [1] "X1BCSTCAT" "X1BCSTCR" "X1BCSTPR" "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR"
list_y <- lead(colnames(df)[-(1:6)], 3)[1:length(list_x)]
list_y
#> [1] "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR" "X3BCSTCAT" "X3BCSTCR" "X3BCSTPR"
# Loop through 2 lists simultaneously
# Supply inputs to pair_plot function using purrr::map2
map2(list_x, list_y, ~ pair_plot(.x, .y, "Group"))
示例输出:
#> [[1]]
#>
#> [[2]]
由 reprex package (v0.2.0) 创建于 2018-05-24。
这是我在 stackoverlow 上的第一个问题,如果我没有遵循正确的问题协议,请纠正我。
我正在尝试为在三个时间点(时间 1、时间 2、时间 3)收集的数据创建一些图表,这些时间点等于 X1...、X2... 和 X3...列名的开头。这些图也由数据框的 $Group 列分隔。
我创建图表没问题,我只有很多变量 (~170),我想比较时间 1 与时间 2、时间 2 与时间 3 等。所以我想找一个捷径运行 这种代码,而不必逐个输入。
如上所述,我创建了像 X1...X2... 这样的变量名,它表示变量被记录的时间,即 X1BCSTCAT = time 1; X2BCSTCAT = 时间 2; X3BCSTCAT = 时间 3。这是我的数据的一小部分示例:
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
这里是一些工作代码,使用 ggplot 为一个变量的时间 1 与时间 2 数据创建一个图表:
library(ggplot2)
p <- ggplot(df, aes(x=df$X1BCSTCAT, y=df$X2BCSTCAT, shape = df$Group, color = df$Group)) +
geom_point() + geom_smooth(method=lm, aes(fill=df$Group), fullrange = TRUE) +
labs(title="BCSTCAT", x="Time 1", y = "Time 2") +
scale_color_manual(name = "Group",labels = c("C8","TC"),values = c("blue", "red")) +
scale_shape_manual(name = "Group",labels = c("C8","TC"),values = c(16, 17)) +
scale_fill_manual(name = "Group",labels = c("C8", "TC"),values = c("light blue", "pink"))
所以我真的在尝试创建某种快捷方式,R 将在其中循环并匹配变量名称 X1... 与 X2... 等等并创建图形。我假设必须有某种方法可以根据匹配的列号进行绘制,例如df[7] vs df[10] 并迭代此过程或通过实际匹配名称进行绘图(其中变量名称的唯一区别是表示时间的数字)。
我以前循环使用 lapply
函数创建单独的图表,但不知道从哪里开始尝试做这个。
使用tidyeval
方法的解决方案。我们需要 ggplot2 v3.0.0
(记得重启你的 R 会话)
install.packages("ggplot2", dependencies = TRUE)
首先我们构建一个将列名和组名作为输入的函数。注意
rlang::sym
、rlang::quo_name
&!!
. 的使用
然后为
x-
和y-
值创建 2 个名称向量,以便我们可以使用purrr::map2
同时循环遍历它们。
library(rlang)
library(tidyverse)
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
# define a function that accept strings as input
pair_plot <- function(x_var, y_var, group_var) {
# convert strings to symbols
x_var <- rlang::sym(x_var)
y_var <- rlang::sym(y_var)
group_var <- rlang::sym(group_var)
# unquote symbols using !!
ggplot(df, aes(x = !! x_var, y = !! y_var, shape = !! group_var, color = !! group_var)) +
geom_point() + geom_smooth(method = lm, aes(fill = !! group_var), fullrange = TRUE) +
labs(title = "BCSTCAT", x = rlang::quo_name(x_var), y = rlang::quo_name(y_var)) +
scale_color_manual(name = "Group", labels = c("C8", "TC"), values = c("blue", "red")) +
scale_shape_manual(name = "Group", labels = c("C8", "TC"), values = c(16, 17)) +
scale_fill_manual(name = "Group", labels = c("C8", "TC"), values = c("light blue", "pink")) +
theme_bw()
}
# Test if the new function works
pair_plot("X1BCSTCAT", "X2BCSTCAT", "Group")
# Create 2 parallel lists
list_x <- colnames(df)[-c(1:6, (ncol(df)-2):(ncol(df)))]
list_x
#> [1] "X1BCSTCAT" "X1BCSTCR" "X1BCSTPR" "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR"
list_y <- lead(colnames(df)[-(1:6)], 3)[1:length(list_x)]
list_y
#> [1] "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR" "X3BCSTCAT" "X3BCSTCR" "X3BCSTPR"
# Loop through 2 lists simultaneously
# Supply inputs to pair_plot function using purrr::map2
map2(list_x, list_y, ~ pair_plot(.x, .y, "Group"))
示例输出:
#> [[1]]
#>
#> [[2]]
由 reprex package (v0.2.0) 创建于 2018-05-24。