如何制作一个函数来制作复杂的欧拉图?

How to make a function to make a complex euler diagram?

我目前正在制作欧拉图。我设法使用这些代码制作了一个包含 6 个变量的欧拉图,尽管我认为这些代码效率不高:

dataset <- data.frame(
        A = rep(c(1, 2, NA), length.out = 100),
        B = rep(c(2, NA, 1), length.out = 100),
        C = rep(c(NA, 1, 2), length.out = 100),
        D = rep(c(NA, 2, 1), length.out = 100),
        E = rep(c(1, NA, 2), length.out = 100),
        F = rep(c(1, 2, NA), length.out = 100)) 

euler_primary <- c("A" = sum(dataset$A == 1, na.rm = TRUE),
                    "B" = sum(dataset$B == 1, na.rm = TRUE),
                    "C" = sum(dataset$C == 1, na.rm = TRUE),
                    "D" = sum(dataset$D == 1, na.rm = TRUE),
                    "E" = sum(dataset$E == 1, na.rm = TRUE),
                    "F" = sum(dataset$F == 1, na.rm = TRUE),
                    "A&B" = sum(dataset$B == 1 & dataset$A == 1, na.rm=TRUE),
                    "A&C" = sum(dataset$C == 1 & dataset$A == 1, na.rm=TRUE),
                    "A&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm = TRUE),
                    "A&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm = TRUE),
                    "A&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm = TRUE),
                    "B&C" = sum(dataset$B == 1 & dataset$C == 1, na.rm=TRUE),
                    "B&D" = sum(dataset$B == 1 & dataset$D == 1, na.rm=TRUE),
                    "B&E" = sum(dataset$B == 1 & dataset$E == 1, na.rm=TRUE),
                    "B&F" = sum(dataset$B == 1 & dataset$F == 1, na.rm=TRUE),
                    "C&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
                    "C&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
                    "C&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
                    "D&E" = sum(dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "D&F" = sum(dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "E&F" = sum(dataset$F == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&B&C" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1, na.rm=TRUE),
                    "A&B&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1, na.rm=TRUE),
                    "A&B&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&B&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&C&D" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
                    "A&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&C&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&D&E" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&D&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&E&F" = sum(dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "B&C&D" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
                    "B&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
                    "B&C&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
                    "B&D&E" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "B&D&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "B&E&F" = sum(dataset$B == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "C&D&E" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "C&D&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "C&E&F" = sum(dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "D&E&F" = sum(dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&C&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
                    "A&B&C&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&B&C&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&B&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&C&D&E" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&C&D&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&C&E&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&D&E&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "B&C&D&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E, na.rm=TRUE),
                    "B&C&D&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F, na.rm=TRUE),
                    "B&C&E&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
                    "B&D&E&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
                    "C&D&E&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
                    "A&B&C&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
                    "A&B&C&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&C&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&C&D&E&F" = sum(dataset$C == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "B&C&D&E&F" = sum(dataset$C == 1 & dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
                    "A&B&C&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE)
                )

    venn_primary <- euler(euler_primary)
    plot(venn_primary6,
            quantities = list(cex = .75),
            fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
            lty = 1,
            cex = 0.5,
            labels = NULL,
            legend = list(labels = letters[1:6]))

以上代码产生以下情节:

但是,现在我需要制作一个包含 11 个变量的欧拉图。用 11 个变量来制作这样的图表似乎是不可能的,因为变量的组合将达到数百甚至数千个。我认为创建一个函数来分配字母并创建列表可能是解决方案。然而,由于我是 R 中数据清理和条件的新手,我无法想出这样的功能。 任何人都可以帮我创建一个函数,我可以只输入数据集名称和将包含在图表中的列,然后该函数将完成其余的清理工作吗?

#p.s.: 我注意到 eulerr 包要求我们在变量之间添加 & 来表示重叠的情况。例如,如果我们想要查看变量 AB 之间的交集,我们将需要创建一个 A&B.

的精确变量

非常感谢您

我尝试使用 tidyverse 在没有任何手动输入的情况下创建您的绘图,但最终的绘图存在某种缺陷。也许您可以基于此代码创建包含 11 个变量的图,并以某种方式解决图中缺少标签的问题。

dataset <- data.frame(
  A = rep(c(1, 2, NA), length.out = 100),
  B = rep(c(2, NA, 1), length.out = 100),
  C = rep(c(NA, 1, 2), length.out = 100),
  D = rep(c(NA, 2, 1), length.out = 100),
  E = rep(c(1, NA, 2), length.out = 100),
  F = rep(c(1, 2, NA), length.out = 100)) 

library(tidyverse)
library(eulerr)

map(1:6, ~combn(names(dataset), .x)) %>% 
  map_df(~.x %>% 
           split(rep(1:ncol(.x), each = nrow(.x))) %>% 
           `names<-`(., paste0(split(.x, rep(1:ncol(.x), each = nrow(.x))))) %>% 
           map_df(~dataset %>% 
                    mutate(rn = row_number()) %>% 
                    pivot_longer(-rn) %>% 
                    filter(name %in% .x, value == 1) %>% 
                    group_by(rn) %>% 
                    filter(n() == length(.x)) %>% 
                    ungroup() %>% 
                    summarise(n = n_distinct(rn)),
                  .id = "cols")) %>% 
  mutate(cols = str_remove(str_replace_all(str_remove_all(cols, "[^[A-Z]]"), "(\w)", "\1&"),
                           "&$")) %>% 
  deframe() %>% 
  euler() %>% 
  plot(fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
       lty = 1,
       cex = 0.5,
       labels = NULL,
       legend = list(labels = letters[1:6]))