使用 rowwise 和 rlang 创建数据集所有可能的 geom_col 图表
Create all possible geom_col charts of the dataset using rowwise and rlang
我想创建并保存数据集的所有可能 geom_col 图表,其中 Y 轴为分类变量,x 轴为数值变量的平均值(分组变量为 y)。
我借鉴了 this and link 的想法并创建了以下代码,但努力让它工作。请指教
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
plots <- bind_rows(rep(list(mpg), nrow(vars)), .id = "grp") %>%
nest_by(grp) %>%
bind_cols(vars) %>%
rowwise() %>%
mutate(plot = list(~(data %>%
group_by(y) %>%
summarise("{x}" = mean(.data[[x]], na.rm = T)) %>%
ungroup()) %>%
ggplot() +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()))),
filename = paste0(x, "_by_", y, ".pdf")) %>%
select(filename, plot)
pwalk(plots, ggsave, path = getwd())
不需要bind_row
s,nest
,...而是:
将您的绘图代码放在辅助函数中,而不是将所有内容都包装在管道中。这使得调试更容易,代码也更清晰。
使用 map2 遍历 df 的列 vars
您的代码存在一个问题 summarise("{x}" = mean(.data[[x]], na.rm = T))
,它将创建一个名为 {x}
的变量。而是使用例如!!sym(x) := ...
如果 x
是一个字符串,则将值分配回 x
。
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
make_plot <- function(data, x, y) {
data <- data %>%
group_by(across(all_of(y))) %>%
summarise(!!sym(x) := mean(.data[[x]], na.rm = T), .groups = "drop") %>%
ungroup()
ggplot(data) +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()
}
plots <- map2(vars$x, vars$y, make_plot, data = mpg)
length(plots)
#> [1] 25
plots[[1]]
# Export
pwalk(list(x = vars$x, y = vars$y, p = plots), function(x, y, p) ggsave(filename = paste0(x, "_by_", y, ".pdf"), plot = p, path = getwd()))
我想创建并保存数据集的所有可能 geom_col 图表,其中 Y 轴为分类变量,x 轴为数值变量的平均值(分组变量为 y)。
我借鉴了 this and
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
plots <- bind_rows(rep(list(mpg), nrow(vars)), .id = "grp") %>%
nest_by(grp) %>%
bind_cols(vars) %>%
rowwise() %>%
mutate(plot = list(~(data %>%
group_by(y) %>%
summarise("{x}" = mean(.data[[x]], na.rm = T)) %>%
ungroup()) %>%
ggplot() +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()))),
filename = paste0(x, "_by_", y, ".pdf")) %>%
select(filename, plot)
pwalk(plots, ggsave, path = getwd())
不需要bind_row
s,nest
,...而是:
将您的绘图代码放在辅助函数中,而不是将所有内容都包装在管道中。这使得调试更容易,代码也更清晰。
使用 map2 遍历 df 的列
vars
您的代码存在一个问题
summarise("{x}" = mean(.data[[x]], na.rm = T))
,它将创建一个名为{x}
的变量。而是使用例如!!sym(x) := ...
如果x
是一个字符串,则将值分配回x
。
library(tidyverse)
library(skimr)
cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable)
num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)
vars <- cross_df(list(y= cat_vars, x = num_vars))
make_plot <- function(data, x, y) {
data <- data %>%
group_by(across(all_of(y))) %>%
summarise(!!sym(x) := mean(.data[[x]], na.rm = T), .groups = "drop") %>%
ungroup()
ggplot(data) +
geom_col(aes(x = .data[[x]],
y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
fill = .data[[y]]), width = 0.8) +
xlab(paste0("Avg. ", x)) +
ylab(y) +
theme_classic()
}
plots <- map2(vars$x, vars$y, make_plot, data = mpg)
length(plots)
#> [1] 25
plots[[1]]
# Export
pwalk(list(x = vars$x, y = vars$y, p = plots), function(x, y, p) ggsave(filename = paste0(x, "_by_", y, ".pdf"), plot = p, path = getwd()))