如何在 ggpubr 中使用 facet_wrap 为 R 中的每组数据帧应用 p 值
How to apply p-value for each group of dataframe in R using facet_wrap in ggpubr
我有一个如下所示的数据:
melted.df <- structure(list(Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("24",
"36", "48", "72"), class = "factor"), id = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), Samples = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("WT_Ago2_800", "WT_Ago2_400", "WT_Ago2_200",
"WT_Ago4_800"), class = "factor"), Size = c(0, 0, 0, 0, 0, 0,
0.3, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.8, 0.5, 0, 0,
0, 0, 0, 0, 0.1, 0.65, 0.2, 0.85, 0.725, 0.575, 0.1, 1.1, 0.9,
1.325, 1, 0.8, 0.5, 2.2, 1.65, 0, 0, 0, 0, 0, 0, 0.825, 1.175,
0.1, 0.55, 0.85, 0.85, 1.1, 1.4, 0.6, 0.95, 1.15, 0.975, 2.35,
1.15, 2.1, 0, 0, 0, 0, 0, 0, 0.65, 1.4, 0.55, 0.1, 0.7, 1.1,
0.95, 1.85, 0.85, 0.1, 1.5, 1.25, 1.8, 1.75, 2.15)), row.names = c(NA,
-84L), class = "data.frame")
此数据包含 4 个时间范围(24、36、48 和 72 小时)。我想使用下面的代码为每个 time.levels
粘贴计算为 stat.test
的 p 值,并将其应用于每个 facet_wrap
。如果检查 i=1,则没有 p 值,因此您不想将其应用于图形,如果检查 i=2,则会将 p 值应用于图形。问题是我无法将 p 值应用于其各自的方面。它只是在所有方面应用相同的 p 值。我该如何解决这个问题?
代码:
library(devtools)
# install_github("https://github.com/kassambara/rstatix")
library(rstatix) # https://github.com/kassambara/rstatix
library(stringi)
library(ggpubr)
time.levels <- levels(melted.df$Time)
stat.test <- NULL
for (i in 1:length(time.levels)){
stat.test <- aov(Size ~ Samples, data = melted.df[melted.df$Time == time.levels[i],]) %>%
tukey_hsd()
# stat.test <- rbind(stat.test, tmp.stat)
bp <- ggboxplot(melted.df, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
bp
}
注意。 Time == 24L
的大小中的所有值均为零:
> filter(melted.df, Time == 24L) %>% select(Size) %>% summary
Size
Min. :0
1st Qu.:0
Median :0
Mean :0
3rd Qu.:0
Max. :0
如果您仍然希望继续,您应该单独制作绘图,然后使用 gridExtra::grid.arrange
:
library(gridExtra)
bp <- vector("list", length = length(time.levels))
for (i in seq_along(time.levels)) {
sdf <- melted.df[melted.df$Time == time.levels[i],]
stat.test <- aov(Size ~ Samples, data = sdf) %>%
tukey_hsd()
bp[[i]] <- ggboxplot(sdf, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
}
do.call(grid.arrange, bp)
注意 您必须使用子集 data.frame
sdf
作为 ggboxplot
的输入。
您不需要使用 gridExtra::grid.arrange
。
这是一个干净的解决方案。
library(rstatix) # latest version
library(ggpubr) # latest version
stat.test <- melted.df %>%
group_by(Time) %>%
tukey_hsd(Size ~ Samples)
ggboxplot(melted.df, x = "Samples", y = "Size", facet.by = "Time") +
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
我有一个如下所示的数据:
melted.df <- structure(list(Time = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("24",
"36", "48", "72"), class = "factor"), id = c(1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L), Samples = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L), .Label = c("WT_Ago2_800", "WT_Ago2_400", "WT_Ago2_200",
"WT_Ago4_800"), class = "factor"), Size = c(0, 0, 0, 0, 0, 0,
0.3, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0.8, 0.5, 0, 0,
0, 0, 0, 0, 0.1, 0.65, 0.2, 0.85, 0.725, 0.575, 0.1, 1.1, 0.9,
1.325, 1, 0.8, 0.5, 2.2, 1.65, 0, 0, 0, 0, 0, 0, 0.825, 1.175,
0.1, 0.55, 0.85, 0.85, 1.1, 1.4, 0.6, 0.95, 1.15, 0.975, 2.35,
1.15, 2.1, 0, 0, 0, 0, 0, 0, 0.65, 1.4, 0.55, 0.1, 0.7, 1.1,
0.95, 1.85, 0.85, 0.1, 1.5, 1.25, 1.8, 1.75, 2.15)), row.names = c(NA,
-84L), class = "data.frame")
此数据包含 4 个时间范围(24、36、48 和 72 小时)。我想使用下面的代码为每个 time.levels
粘贴计算为 stat.test
的 p 值,并将其应用于每个 facet_wrap
。如果检查 i=1,则没有 p 值,因此您不想将其应用于图形,如果检查 i=2,则会将 p 值应用于图形。问题是我无法将 p 值应用于其各自的方面。它只是在所有方面应用相同的 p 值。我该如何解决这个问题?
代码:
library(devtools)
# install_github("https://github.com/kassambara/rstatix")
library(rstatix) # https://github.com/kassambara/rstatix
library(stringi)
library(ggpubr)
time.levels <- levels(melted.df$Time)
stat.test <- NULL
for (i in 1:length(time.levels)){
stat.test <- aov(Size ~ Samples, data = melted.df[melted.df$Time == time.levels[i],]) %>%
tukey_hsd()
# stat.test <- rbind(stat.test, tmp.stat)
bp <- ggboxplot(melted.df, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
bp
}
注意。 Time == 24L
的大小中的所有值均为零:
> filter(melted.df, Time == 24L) %>% select(Size) %>% summary
Size
Min. :0
1st Qu.:0
Median :0
Mean :0
3rd Qu.:0
Max. :0
如果您仍然希望继续,您应该单独制作绘图,然后使用 gridExtra::grid.arrange
:
library(gridExtra)
bp <- vector("list", length = length(time.levels))
for (i in seq_along(time.levels)) {
sdf <- melted.df[melted.df$Time == time.levels[i],]
stat.test <- aov(Size ~ Samples, data = sdf) %>%
tukey_hsd()
bp[[i]] <- ggboxplot(sdf, x = "Samples", y = "Size") +
facet_wrap(vars(Time))+
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)
}
do.call(grid.arrange, bp)
注意 您必须使用子集 data.frame
sdf
作为 ggboxplot
的输入。
您不需要使用 gridExtra::grid.arrange
。
这是一个干净的解决方案。
library(rstatix) # latest version
library(ggpubr) # latest version
stat.test <- melted.df %>%
group_by(Time) %>%
tukey_hsd(Size ~ Samples)
ggboxplot(melted.df, x = "Samples", y = "Size", facet.by = "Time") +
stat_pvalue_manual(
stat.test, label = "p.adj",
y.position = c(2, 2.5, 3, 3.5, 3.8, 4)
)