如何将数据框中的 Tukey HSD p 值括号添加到列表列表中的 ggplots?
How can I add Tukey HSD p-value brackets from a data frame to ggplots from a list of lists?
我有一个循环,可以为任何给定数量的分析物(在本例中为 3)生成一个 ggplots 列表。执行方差分析并为每个 ggplot 生成和映射 p 值。
但是,我还想用 Tukey HSD p 值括号对这些图进行注释。如果可能的话,我只想可视化低于调整后 p < 0.05 的括号。
我写了两个分别执行这些操作的脚本;我的循环的输出是一个列表列表(ggplot 元素),而我的 Tukey HSD 脚本的输出是一个数据帧。
我指的括号在这个例子中看起来像 ggplot:
以下是我的代码。一、数据:
set.seed(10)
Label<-as.data.frame(c("Baseline","Baseline","Baseline","Baseline","A","B","B","C","D"))
Label<-do.call("rbind", replicate(10, Label, simplify = FALSE))
colnames(Label)<-"Label"
Analyte1<- rnorm(90, mean = 1, sd = 1)
Analyte2<- rnorm(90, mean = 1, sd = 1)
Analyte3<- rnorm(90, mean = .2, sd = 1)
df<-cbind(Label,Analyte1,Analyte2,Analyte3)
以下是我的方差分析循环:
library(dplyr)
library(ggplot2)
library(ggpubr)
library(rstatix)
library(rlist)
# Select numeric columns to obtain df length.
sample_list <-
colnames(select_if(df, is.numeric))
sample_list
# Create a list where the plots will be saved.
ANOVA.plots <- list()
# The for loop.
for (i in 1:length(sample_list)) {
ANOVA.ggplot <-
ggplot(
df,
aes_string(
x = "Label",
y = sample_list[i],
fill = "Label",
title = sample_list[i],
outlier.shape = NA
)
) +
border(color = "black", size = 2.5) +
geom_jitter(
aes(fill = Label),
shape = 21,
size = 4,
color = "black",
stroke = 1,
position = position_jitter()
) +
geom_boxplot(alpha = 0.7,
linetype = 1,
size = 1.1) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set2") +
stat_boxplot(geom = "errorbar",
size = 1.1,
width = 0.4) +
theme (axis.title.y = element_blank()) +
theme (axis.title.x = element_blank()) +
stat_compare_means(
inherit.aes = TRUE,
data = df,
method = "anova",
paired = FALSE ,
method.args = list(var.equal = TRUE),
fontface = "bold.italic",
size = 5,
vjust = 0,
hjust = 0
)
ANOVA.plots[[i]] <- ANOVA.ggplot
}
# Print each ggplot element from the list of lists.
ANOVA.plots
目前,这个块生成的图看起来类似于 Analyte 2 中的这个图。
最后,此块用于获取Tukey HSD数据帧。
TUKEY.length<-length(sample_list)
TUKEY.list <-
vector(mode = "list", length = TUKEY.length) # Empty list for looping, where Tukey HSD results are stored.
for (i in 1:TUKEY.length + 1) {
TUKEY.list[[i]] <-
aov(df[[i]] ~ df[[1]]) %>% tukey_hsd() # Obtain Tukey HSD p-values from comparing groups.
}
TUKEY.list <-
TUKEY.list[lengths(TUKEY.list) > 0L] # Remove empty lists, artifacts from piping.
p.value.threshold <- 0.05
TUKEY.df<-list.rbind(TUKEY.list)
Analytes <-
colnames(df[, sapply(df, class) %in% c('integer', 'numeric')])
# Stores the analyte names for Tukey. Only pulls numeric colnames.
# The following was done to properly bind the analyte IDs to the Tukey data frame.
Analytes.df<-as.data.frame(Analytes)
Analytes.df$Value<-c(1:nrow(Analytes.df))
Analytes.df<-do.call("rbind", replicate(10, Analytes.df, simplify = FALSE))
Analytes.df <-
Analytes.df[order(Analytes.df$Value), ]
# Orders the dataframe so that the rownames can be assigned to the TUKEY HSD.
Analytes.df<-as.data.frame(Analytes.df[,-2])
toDrop <- c("^term", "^null") # Columns to drop, left over from Tukey loop.
TUKEY.df<-TUKEY.df[,!grepl(paste(toDrop, collapse = "|"),names(TUKEY.df))]
TUKEY.df<-cbind(Analytes.df, TUKEY.df)
TUKEY.df<-filter(TUKEY.df, p.adj <= p.value.threshold)
所需的输出仍将显示方差分析 p 值,并且 在 Tukey HSD.[=15= 之后调整后的 p 值低于 0.05 的那些组有括号]
在这个例子中,只有分析物 2,组 A 和 B,调整后的 p 值低于 0.05(即 p = 0.000754),因此括号应该出现在这些上面两个地块。然而,映射这些 Tukey-HSD 括号的代码应该能够适用于更多变量(即 20 多个分析物)并捕获对每个图都很重要的所有成对比较,而不仅仅是我给出的那个。
您可以使用 ggsignif
进行自定义注释。示例使用 p < 0.2 作为截止值以显示多个误差线:
library(ggplot2)
library(ggsignif)
library(cowplot)
library(data.table)
set.seed(10)
df <- data.table(Label=rep(c("Baseline","Baseline","Baseline","Baseline","A","B","B","C","D"), 10),
`Analyte 1` = rnorm(90, mean = 1, sd = 1),
`Analyte 2` = rnorm(90, mean = 1, sd = 1),
`Analyte 3` = rnorm(90, mean = .2, sd = 1))
df[, Label := factor(Label, unique(Label))]
df <- melt(df, id.vars = "Label", variable.name = "Analyte")
dfl <- split(df, df$Analyte, drop=TRUE)
doPlots <- function(x, signif.cutoff=.05){
set.seed(123)
p1 <- ggplot(dfl[[x]], aes(x=Label, y=value, fill=Label)) +
geom_boxplot(alpha = 0.7, linetype = 1, size = 1.1) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set2") +
stat_boxplot(geom = "errorbar", size = 1.1, width = 0.4) +
geom_jitter(aes(fill = Label), shape = 21, size = 4, stroke = 1) +
ggtitle(x)
a <- stats::TukeyHSD(stats::aov(value ~ Label, data = dfl[[x]]))[[1]]
a <- stats::setNames(
data.frame(
do.call(rbind, strsplit(rownames(a), "-")),
a[, "p adj"]
), c("Var1", "Var2", "p") )
a <- subset(a[stats::complete.cases(a),], p < signif.cutoff)
if (nrow(a) == 0) return(p1) else {
a$p <- formatC(signif(a$p, digits = 3),
digits = 3, format = "g", flag = "#")
keep.tests <- unname(t(apply(a[, -3], 1, sort)))
keep.tests <- unname(split(keep.tests, seq(dim(keep.tests)[1])))
ts <- list(a = a, keep.tests = keep.tests)
p1 + ggsignif::geom_signif(
comparisons=ts$keep.tests,
test="TukeyHSD",
annotations=ts$a$p,
step_increase=0.1)
}
}
plot_grid(plotlist=lapply(names(dfl), doPlots, signif.cutoff=.2), ncol=3)
由 reprex package (v1.0.0)
于 2021-01-29 创建
我有一个循环,可以为任何给定数量的分析物(在本例中为 3)生成一个 ggplots 列表。执行方差分析并为每个 ggplot 生成和映射 p 值。
但是,我还想用 Tukey HSD p 值括号对这些图进行注释。如果可能的话,我只想可视化低于调整后 p < 0.05 的括号。
我写了两个分别执行这些操作的脚本;我的循环的输出是一个列表列表(ggplot 元素),而我的 Tukey HSD 脚本的输出是一个数据帧。
我指的括号在这个例子中看起来像 ggplot:
以下是我的代码。一、数据:
set.seed(10)
Label<-as.data.frame(c("Baseline","Baseline","Baseline","Baseline","A","B","B","C","D"))
Label<-do.call("rbind", replicate(10, Label, simplify = FALSE))
colnames(Label)<-"Label"
Analyte1<- rnorm(90, mean = 1, sd = 1)
Analyte2<- rnorm(90, mean = 1, sd = 1)
Analyte3<- rnorm(90, mean = .2, sd = 1)
df<-cbind(Label,Analyte1,Analyte2,Analyte3)
以下是我的方差分析循环:
library(dplyr)
library(ggplot2)
library(ggpubr)
library(rstatix)
library(rlist)
# Select numeric columns to obtain df length.
sample_list <-
colnames(select_if(df, is.numeric))
sample_list
# Create a list where the plots will be saved.
ANOVA.plots <- list()
# The for loop.
for (i in 1:length(sample_list)) {
ANOVA.ggplot <-
ggplot(
df,
aes_string(
x = "Label",
y = sample_list[i],
fill = "Label",
title = sample_list[i],
outlier.shape = NA
)
) +
border(color = "black", size = 2.5) +
geom_jitter(
aes(fill = Label),
shape = 21,
size = 4,
color = "black",
stroke = 1,
position = position_jitter()
) +
geom_boxplot(alpha = 0.7,
linetype = 1,
size = 1.1) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set2") +
stat_boxplot(geom = "errorbar",
size = 1.1,
width = 0.4) +
theme (axis.title.y = element_blank()) +
theme (axis.title.x = element_blank()) +
stat_compare_means(
inherit.aes = TRUE,
data = df,
method = "anova",
paired = FALSE ,
method.args = list(var.equal = TRUE),
fontface = "bold.italic",
size = 5,
vjust = 0,
hjust = 0
)
ANOVA.plots[[i]] <- ANOVA.ggplot
}
# Print each ggplot element from the list of lists.
ANOVA.plots
目前,这个块生成的图看起来类似于 Analyte 2 中的这个图。
最后,此块用于获取Tukey HSD数据帧。
TUKEY.length<-length(sample_list)
TUKEY.list <-
vector(mode = "list", length = TUKEY.length) # Empty list for looping, where Tukey HSD results are stored.
for (i in 1:TUKEY.length + 1) {
TUKEY.list[[i]] <-
aov(df[[i]] ~ df[[1]]) %>% tukey_hsd() # Obtain Tukey HSD p-values from comparing groups.
}
TUKEY.list <-
TUKEY.list[lengths(TUKEY.list) > 0L] # Remove empty lists, artifacts from piping.
p.value.threshold <- 0.05
TUKEY.df<-list.rbind(TUKEY.list)
Analytes <-
colnames(df[, sapply(df, class) %in% c('integer', 'numeric')])
# Stores the analyte names for Tukey. Only pulls numeric colnames.
# The following was done to properly bind the analyte IDs to the Tukey data frame.
Analytes.df<-as.data.frame(Analytes)
Analytes.df$Value<-c(1:nrow(Analytes.df))
Analytes.df<-do.call("rbind", replicate(10, Analytes.df, simplify = FALSE))
Analytes.df <-
Analytes.df[order(Analytes.df$Value), ]
# Orders the dataframe so that the rownames can be assigned to the TUKEY HSD.
Analytes.df<-as.data.frame(Analytes.df[,-2])
toDrop <- c("^term", "^null") # Columns to drop, left over from Tukey loop.
TUKEY.df<-TUKEY.df[,!grepl(paste(toDrop, collapse = "|"),names(TUKEY.df))]
TUKEY.df<-cbind(Analytes.df, TUKEY.df)
TUKEY.df<-filter(TUKEY.df, p.adj <= p.value.threshold)
所需的输出仍将显示方差分析 p 值,并且 在 Tukey HSD.[=15= 之后调整后的 p 值低于 0.05 的那些组有括号]
在这个例子中,只有分析物 2,组 A 和 B,调整后的 p 值低于 0.05(即 p = 0.000754),因此括号应该出现在这些上面两个地块。然而,映射这些 Tukey-HSD 括号的代码应该能够适用于更多变量(即 20 多个分析物)并捕获对每个图都很重要的所有成对比较,而不仅仅是我给出的那个。
您可以使用 ggsignif
进行自定义注释。示例使用 p < 0.2 作为截止值以显示多个误差线:
library(ggplot2)
library(ggsignif)
library(cowplot)
library(data.table)
set.seed(10)
df <- data.table(Label=rep(c("Baseline","Baseline","Baseline","Baseline","A","B","B","C","D"), 10),
`Analyte 1` = rnorm(90, mean = 1, sd = 1),
`Analyte 2` = rnorm(90, mean = 1, sd = 1),
`Analyte 3` = rnorm(90, mean = .2, sd = 1))
df[, Label := factor(Label, unique(Label))]
df <- melt(df, id.vars = "Label", variable.name = "Analyte")
dfl <- split(df, df$Analyte, drop=TRUE)
doPlots <- function(x, signif.cutoff=.05){
set.seed(123)
p1 <- ggplot(dfl[[x]], aes(x=Label, y=value, fill=Label)) +
geom_boxplot(alpha = 0.7, linetype = 1, size = 1.1) +
theme(legend.position = "none") +
scale_fill_brewer(palette = "Set2") +
stat_boxplot(geom = "errorbar", size = 1.1, width = 0.4) +
geom_jitter(aes(fill = Label), shape = 21, size = 4, stroke = 1) +
ggtitle(x)
a <- stats::TukeyHSD(stats::aov(value ~ Label, data = dfl[[x]]))[[1]]
a <- stats::setNames(
data.frame(
do.call(rbind, strsplit(rownames(a), "-")),
a[, "p adj"]
), c("Var1", "Var2", "p") )
a <- subset(a[stats::complete.cases(a),], p < signif.cutoff)
if (nrow(a) == 0) return(p1) else {
a$p <- formatC(signif(a$p, digits = 3),
digits = 3, format = "g", flag = "#")
keep.tests <- unname(t(apply(a[, -3], 1, sort)))
keep.tests <- unname(split(keep.tests, seq(dim(keep.tests)[1])))
ts <- list(a = a, keep.tests = keep.tests)
p1 + ggsignif::geom_signif(
comparisons=ts$keep.tests,
test="TukeyHSD",
annotations=ts$a$p,
step_increase=0.1)
}
}
plot_grid(plotlist=lapply(names(dfl), doPlots, signif.cutoff=.2), ncol=3)
由 reprex package (v1.0.0)
于 2021-01-29 创建