如何制作一个函数来制作复杂的欧拉图?
How to make a function to make a complex euler diagram?
我目前正在制作欧拉图。我设法使用这些代码制作了一个包含 6 个变量的欧拉图,尽管我认为这些代码效率不高:
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
euler_primary <- c("A" = sum(dataset$A == 1, na.rm = TRUE),
"B" = sum(dataset$B == 1, na.rm = TRUE),
"C" = sum(dataset$C == 1, na.rm = TRUE),
"D" = sum(dataset$D == 1, na.rm = TRUE),
"E" = sum(dataset$E == 1, na.rm = TRUE),
"F" = sum(dataset$F == 1, na.rm = TRUE),
"A&B" = sum(dataset$B == 1 & dataset$A == 1, na.rm=TRUE),
"A&C" = sum(dataset$C == 1 & dataset$A == 1, na.rm=TRUE),
"A&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm = TRUE),
"A&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm = TRUE),
"A&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm = TRUE),
"B&C" = sum(dataset$B == 1 & dataset$C == 1, na.rm=TRUE),
"B&D" = sum(dataset$B == 1 & dataset$D == 1, na.rm=TRUE),
"B&E" = sum(dataset$B == 1 & dataset$E == 1, na.rm=TRUE),
"B&F" = sum(dataset$B == 1 & dataset$F == 1, na.rm=TRUE),
"C&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"C&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"C&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"D&E" = sum(dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"D&F" = sum(dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"E&F" = sum(dataset$F == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1, na.rm=TRUE),
"A&B&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&D&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&E&F" = sum(dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"B&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"B&C&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"B&D&E" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"B&D&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"B&E&F" = sum(dataset$B == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"C&D&E" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"C&D&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"C&E&F" = sum(dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"D&E&F" = sum(dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&C&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&D&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&E&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E, na.rm=TRUE),
"B&C&D&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F, na.rm=TRUE),
"B&C&E&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"B&D&E&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"C&D&E&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"A&B&C&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E&F" = sum(dataset$C == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E&F" = sum(dataset$C == 1 & dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE)
)
venn_primary <- euler(euler_primary)
plot(venn_primary6,
quantities = list(cex = .75),
fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))
以上代码产生以下情节:
但是,现在我需要制作一个包含 11 个变量的欧拉图。用 11 个变量来制作这样的图表似乎是不可能的,因为变量的组合将达到数百甚至数千个。我认为创建一个函数来分配字母并创建列表可能是解决方案。然而,由于我是 R 中数据清理和条件的新手,我无法想出这样的功能。 任何人都可以帮我创建一个函数,我可以只输入数据集名称和将包含在图表中的列,然后该函数将完成其余的清理工作吗?
#p.s.: 我注意到 eulerr
包要求我们在变量之间添加 &
来表示重叠的情况。例如,如果我们想要查看变量 A
和 B
之间的交集,我们将需要创建一个 A&B
.
的精确变量
非常感谢您
我尝试使用 tidyverse
在没有任何手动输入的情况下创建您的绘图,但最终的绘图存在某种缺陷。也许您可以基于此代码创建包含 11 个变量的图,并以某种方式解决图中缺少标签的问题。
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
library(tidyverse)
library(eulerr)
map(1:6, ~combn(names(dataset), .x)) %>%
map_df(~.x %>%
split(rep(1:ncol(.x), each = nrow(.x))) %>%
`names<-`(., paste0(split(.x, rep(1:ncol(.x), each = nrow(.x))))) %>%
map_df(~dataset %>%
mutate(rn = row_number()) %>%
pivot_longer(-rn) %>%
filter(name %in% .x, value == 1) %>%
group_by(rn) %>%
filter(n() == length(.x)) %>%
ungroup() %>%
summarise(n = n_distinct(rn)),
.id = "cols")) %>%
mutate(cols = str_remove(str_replace_all(str_remove_all(cols, "[^[A-Z]]"), "(\w)", "\1&"),
"&$")) %>%
deframe() %>%
euler() %>%
plot(fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))
我目前正在制作欧拉图。我设法使用这些代码制作了一个包含 6 个变量的欧拉图,尽管我认为这些代码效率不高:
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
euler_primary <- c("A" = sum(dataset$A == 1, na.rm = TRUE),
"B" = sum(dataset$B == 1, na.rm = TRUE),
"C" = sum(dataset$C == 1, na.rm = TRUE),
"D" = sum(dataset$D == 1, na.rm = TRUE),
"E" = sum(dataset$E == 1, na.rm = TRUE),
"F" = sum(dataset$F == 1, na.rm = TRUE),
"A&B" = sum(dataset$B == 1 & dataset$A == 1, na.rm=TRUE),
"A&C" = sum(dataset$C == 1 & dataset$A == 1, na.rm=TRUE),
"A&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm = TRUE),
"A&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm = TRUE),
"A&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm = TRUE),
"B&C" = sum(dataset$B == 1 & dataset$C == 1, na.rm=TRUE),
"B&D" = sum(dataset$B == 1 & dataset$D == 1, na.rm=TRUE),
"B&E" = sum(dataset$B == 1 & dataset$E == 1, na.rm=TRUE),
"B&F" = sum(dataset$B == 1 & dataset$F == 1, na.rm=TRUE),
"C&D" = sum(dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"C&E" = sum(dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"C&F" = sum(dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"D&E" = sum(dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"D&F" = sum(dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"E&F" = sum(dataset$F == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1, na.rm=TRUE),
"A&B&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&D&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&E&F" = sum(dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"B&C&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"B&C&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"B&D&E" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"B&D&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"B&E&F" = sum(dataset$B == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"C&D&E" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"C&D&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"C&E&F" = sum(dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"D&E&F" = sum(dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1, na.rm=TRUE),
"A&B&C&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&C&D&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&E&F" = sum(dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&D&E&F" = sum(dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E, na.rm=TRUE),
"B&C&D&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F, na.rm=TRUE),
"B&C&E&F" = sum(dataset$B == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"B&D&E&F" = sum(dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"C&D&E&F" = sum(dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F, na.rm=TRUE),
"A&B&C&D&E" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1, na.rm=TRUE),
"A&B&C&D&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&C&D&E&F" = sum(dataset$C == 1 & dataset$A == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"B&C&D&E&F" = sum(dataset$C == 1 & dataset$B == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE),
"A&B&C&D&E&F" = sum(dataset$B == 1 & dataset$A == 1 & dataset$C == 1 & dataset$D == 1 & dataset$E == 1 & dataset$F == 1, na.rm=TRUE)
)
venn_primary <- euler(euler_primary)
plot(venn_primary6,
quantities = list(cex = .75),
fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))
以上代码产生以下情节:
但是,现在我需要制作一个包含 11 个变量的欧拉图。用 11 个变量来制作这样的图表似乎是不可能的,因为变量的组合将达到数百甚至数千个。我认为创建一个函数来分配字母并创建列表可能是解决方案。然而,由于我是 R 中数据清理和条件的新手,我无法想出这样的功能。 任何人都可以帮我创建一个函数,我可以只输入数据集名称和将包含在图表中的列,然后该函数将完成其余的清理工作吗?
#p.s.: 我注意到 eulerr
包要求我们在变量之间添加 &
来表示重叠的情况。例如,如果我们想要查看变量 A
和 B
之间的交集,我们将需要创建一个 A&B
.
非常感谢您
我尝试使用 tidyverse
在没有任何手动输入的情况下创建您的绘图,但最终的绘图存在某种缺陷。也许您可以基于此代码创建包含 11 个变量的图,并以某种方式解决图中缺少标签的问题。
dataset <- data.frame(
A = rep(c(1, 2, NA), length.out = 100),
B = rep(c(2, NA, 1), length.out = 100),
C = rep(c(NA, 1, 2), length.out = 100),
D = rep(c(NA, 2, 1), length.out = 100),
E = rep(c(1, NA, 2), length.out = 100),
F = rep(c(1, 2, NA), length.out = 100))
library(tidyverse)
library(eulerr)
map(1:6, ~combn(names(dataset), .x)) %>%
map_df(~.x %>%
split(rep(1:ncol(.x), each = nrow(.x))) %>%
`names<-`(., paste0(split(.x, rep(1:ncol(.x), each = nrow(.x))))) %>%
map_df(~dataset %>%
mutate(rn = row_number()) %>%
pivot_longer(-rn) %>%
filter(name %in% .x, value == 1) %>%
group_by(rn) %>%
filter(n() == length(.x)) %>%
ungroup() %>%
summarise(n = n_distinct(rn)),
.id = "cols")) %>%
mutate(cols = str_remove(str_replace_all(str_remove_all(cols, "[^[A-Z]]"), "(\w)", "\1&"),
"&$")) %>%
deframe() %>%
euler() %>%
plot(fill = list(c("red", "blue", "green", "violet", "orange", "brown")),
lty = 1,
cex = 0.5,
labels = NULL,
legend = list(labels = letters[1:6]))