使用 rowwise 和 rlang 创建数据集所有可能的 geom_col 图表

Create all possible geom_col charts of the dataset using rowwise and rlang

我想创建并保存数据集的所有可能 geom_col 图表,其中 Y 轴为分类变量,x 轴为数值变量的平均值(分组变量为 y)。

我借鉴了 this and link 的想法并创建了以下代码,但努力让它工作。请指教

library(tidyverse)
library(skimr)

cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable) 

num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)

vars <- cross_df(list(y= cat_vars, x = num_vars))

plots <- bind_rows(rep(list(mpg), nrow(vars)), .id = "grp") %>%
  nest_by(grp) %>%
  bind_cols(vars) %>%
  rowwise() %>% 
  mutate(plot = list(~(data %>%
                         group_by(y) %>%
                         summarise("{x}" = mean(.data[[x]], na.rm = T)) %>%
                         ungroup()) %>%
                       ggplot() +
                       geom_col(aes(x = .data[[x]],
                                    y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
                                    fill = .data[[y]]), width = 0.8) +
                       xlab(paste0("Avg. ", x)) +
                       ylab(y) +
                       theme_classic()))),
filename = paste0(x, "_by_", y, ".pdf")) %>%
  select(filename, plot)

pwalk(plots, ggsave, path =  getwd())

不需要bind_rows,nest,...而是:

  1. 将您的绘图代码放在辅助函数中,而不是将所有内容都包装在管道中。这使得调试更容易,代码也更清晰。

  2. 使用 map2 遍历 df 的列 vars

  3. 您的代码存在一个问题 summarise("{x}" = mean(.data[[x]], na.rm = T)),它将创建一个名为 {x} 的变量。而是使用例如!!sym(x) := ... 如果 x 是一个字符串,则将值分配回 x

library(tidyverse)
library(skimr)

cat_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "character", character.n_unique <= 16) %>% pull(skim_variable) 

num_vars <- skim(mpg) %>% as.data.frame() %>% filter(skim_type == "numeric") %>% pull(skim_variable)

vars <- cross_df(list(y= cat_vars, x = num_vars))

make_plot <- function(data, x, y) {
  data <- data %>%
    group_by(across(all_of(y))) %>%
    summarise(!!sym(x) := mean(.data[[x]], na.rm = T), .groups = "drop") %>%
    ungroup()
  
  ggplot(data) +
    geom_col(aes(x = .data[[x]],
                 y = fct_reorder(.data[[y]], .data[[x]], .fun = sum, na.rm = T),
                 fill = .data[[y]]), width = 0.8) +
    xlab(paste0("Avg. ", x)) +
    ylab(y) +
    theme_classic()
}

plots <- map2(vars$x, vars$y, make_plot, data = mpg)

length(plots)
#> [1] 25

plots[[1]]

# Export

pwalk(list(x = vars$x, y = vars$y, p = plots), function(x, y, p) ggsave(filename = paste0(x, "_by_", y, ".pdf"), plot = p, path =  getwd()))