R - 使用 'stat_compare_means' 在 ggplot 中重新格式化 P 值
R - reformat P value in ggplot using 'stat_compare_means'
我想将 p 值绘制到多面 ggplot 中的每个面板。如果 p 值大于 0.05,我想按原样显示 p 值。如果 p 值小于 0.05,我想以科学计数法显示该值(即 0.0032 -> 3.20e-3;0.0000425 -> 4.25e-5)。
我为此编写的代码是:
p1 <- ggplot(data = CD3, aes(location, value, color = factor(location),
fill = factor(location))) +
theme_bw(base_rect_size = 1) +
geom_boxplot(alpha = 0.3, size = 1.5, show.legend = FALSE) +
geom_jitter(width = 0.2, size = 2, show.legend = FALSE) +
scale_color_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
scale_fill_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
ylab(expression(paste("Density of clusters, ", mm^{-2}))) +
xlab(NULL) +
stat_compare_means(comparisons = list(c("CT", 'N'), c("IF","N")),
aes(label = ifelse(..p.format.. < 0.05, formatC(..p.format.., format = "e", digits = 2),
..p.format..)),
method = 'wilcox.test', show.legend = FALSE, size = 10) +
#ylab(expression(paste('Density, /', mm^2, )))+
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 20),
legend.text = element_text(size = 38),
legend.title = element_text(size = 40),
strip.background = element_rect(colour="black", fill="white", size = 2),
strip.text = element_text(margin = margin(10, 10, 10, 10), size = 40),
panel.grid = element_line(size = 1.5))
plot(p1)
这段代码运行没有错误,但是,数字的格式没有改变。我究竟做错了什么?
我附上了重现情节的数据:donwload data here
编辑
structure(list(value = c(0.931966449207829, 3.24210526315789,
3.88811650210901, 0.626860993574675, 4.62085308056872, 0.477508650519031,
0.111900110501359, 3.2495164410058, 4.06626506024096, 0.21684918139434,
1.10365086026018, 4.66666666666667, 0.174109967855698, 0.597625869832174,
2.3758865248227, 0.360751947840548, 1.00441501103753, 3.65168539325843
), Criteria = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Density", "Density of cluster",
"nodular count", "Elongated count"), class = "factor"), Case = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L), .Label = c("Case 1A", "Case 1B", "Case 2", "Case 3", "Case 4",
"Case 5"), class = "factor"), Mark = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("CD3",
"CD4", "CD8", "CD20", "FoxP3"), class = "factor"), location = structure(c(3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L), .Label = c("CT", "IF", "N"), class = "factor")), row.names = c(91L,
92L, 93L, 106L, 107L, 108L, 121L, 122L, 123L, 136L, 137L, 138L,
151L, 152L, 153L, 166L, 167L, 168L), class = "data.frame")
我认为您的问题来自 stat_compare_means
和 comparisons
的使用。
我不太确定,但我猜想 stat_compare_means
的 p 值输出与 compare_means
不同,因此,您不能将它用于 aes
的 [=] 19=].
让我解释一下,用你的例子,你可以像这样修改p.value的显示:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(ref.group = "N", aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
您得到 p.value 的正确显示,但您失去了酒吧。所以,如果你使用 comparisons
参数,你会得到:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(comparisons = list(c("CT","N"), c("IF","N")), aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
所以,现在,您得到了条形图,但显示不正确。
为了避免这个问题,您可以使用 compare_means
函数在 ggplot2 之外执行统计,并使用包 ggsignif
来显示正确的显示。
在这里,我使用 dplyr
和函数 mutate
来创建新列,但您可以在 base
R 中轻松完成。
library(dplyr)
library(magrittr)
c <- compare_means(value~location, data = df, ref.group = "N")
c %<>% mutate(y_pos = c(5,5.5), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# A tibble: 2 x 10
.y. group1 group2 p p.adj p.format p.signif method y_pos labels
<chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr>
1 value N CT 0.00866 0.017 0.0087 ** Wilcoxon 5 8.7e-03
2 value N IF 0.00866 0.017 0.0087 ** Wilcoxon 5.5 8.7e-03
然后,你可以绘制它:
library(ggplot2)
library(ggpubr)
library(ggsignif)
ggplot(df, aes(x = location, y = value))+
geom_boxplot(aes(colour = location))+
ylim(0,6)+
geom_signif(data = as.data.frame(c), aes(xmin=group1, xmax=group2, annotations=labels, y_position=y_pos),
manual = TRUE)
它看起来像你想要绘制的东西吗?
我想将 p 值绘制到多面 ggplot 中的每个面板。如果 p 值大于 0.05,我想按原样显示 p 值。如果 p 值小于 0.05,我想以科学计数法显示该值(即 0.0032 -> 3.20e-3;0.0000425 -> 4.25e-5)。
我为此编写的代码是:
p1 <- ggplot(data = CD3, aes(location, value, color = factor(location),
fill = factor(location))) +
theme_bw(base_rect_size = 1) +
geom_boxplot(alpha = 0.3, size = 1.5, show.legend = FALSE) +
geom_jitter(width = 0.2, size = 2, show.legend = FALSE) +
scale_color_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
scale_fill_manual(values=c("#4cdee6", "#e47267", "#13ec87")) +
ylab(expression(paste("Density of clusters, ", mm^{-2}))) +
xlab(NULL) +
stat_compare_means(comparisons = list(c("CT", 'N'), c("IF","N")),
aes(label = ifelse(..p.format.. < 0.05, formatC(..p.format.., format = "e", digits = 2),
..p.format..)),
method = 'wilcox.test', show.legend = FALSE, size = 10) +
#ylab(expression(paste('Density, /', mm^2, )))+
theme(axis.text = element_text(size = 10),
axis.title = element_text(size = 20),
legend.text = element_text(size = 38),
legend.title = element_text(size = 40),
strip.background = element_rect(colour="black", fill="white", size = 2),
strip.text = element_text(margin = margin(10, 10, 10, 10), size = 40),
panel.grid = element_line(size = 1.5))
plot(p1)
这段代码运行没有错误,但是,数字的格式没有改变。我究竟做错了什么?
编辑
structure(list(value = c(0.931966449207829, 3.24210526315789,
3.88811650210901, 0.626860993574675, 4.62085308056872, 0.477508650519031,
0.111900110501359, 3.2495164410058, 4.06626506024096, 0.21684918139434,
1.10365086026018, 4.66666666666667, 0.174109967855698, 0.597625869832174,
2.3758865248227, 0.360751947840548, 1.00441501103753, 3.65168539325843
), Criteria = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Density", "Density of cluster",
"nodular count", "Elongated count"), class = "factor"), Case = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 5L, 5L, 5L, 6L, 6L,
6L), .Label = c("Case 1A", "Case 1B", "Case 2", "Case 3", "Case 4",
"Case 5"), class = "factor"), Mark = structure(c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("CD3",
"CD4", "CD8", "CD20", "FoxP3"), class = "factor"), location = structure(c(3L,
1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L), .Label = c("CT", "IF", "N"), class = "factor")), row.names = c(91L,
92L, 93L, 106L, 107L, 108L, 121L, 122L, 123L, 136L, 137L, 138L,
151L, 152L, 153L, 166L, 167L, 168L), class = "data.frame")
我认为您的问题来自 stat_compare_means
和 comparisons
的使用。
我不太确定,但我猜想 stat_compare_means
的 p 值输出与 compare_means
不同,因此,您不能将它用于 aes
的 [=] 19=].
让我解释一下,用你的例子,你可以像这样修改p.value的显示:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(ref.group = "N", aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
您得到 p.value 的正确显示,但您失去了酒吧。所以,如果你使用 comparisons
参数,你会得到:
library(ggplot2)
library(ggpubr)
ggplot(df, aes(x = location, y = value, color = location))+
geom_boxplot()+
stat_compare_means(comparisons = list(c("CT","N"), c("IF","N")), aes(label = ifelse(p < 0.05,sprintf("p = %2.1e", as.numeric(..p.format..)), ..p.format..)))
所以,现在,您得到了条形图,但显示不正确。
为了避免这个问题,您可以使用 compare_means
函数在 ggplot2 之外执行统计,并使用包 ggsignif
来显示正确的显示。
在这里,我使用 dplyr
和函数 mutate
来创建新列,但您可以在 base
R 中轻松完成。
library(dplyr)
library(magrittr)
c <- compare_means(value~location, data = df, ref.group = "N")
c %<>% mutate(y_pos = c(5,5.5), labels = ifelse(p < 0.05, sprintf("%2.1e",p),p))
# A tibble: 2 x 10
.y. group1 group2 p p.adj p.format p.signif method y_pos labels
<chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl> <chr>
1 value N CT 0.00866 0.017 0.0087 ** Wilcoxon 5 8.7e-03
2 value N IF 0.00866 0.017 0.0087 ** Wilcoxon 5.5 8.7e-03
然后,你可以绘制它:
library(ggplot2)
library(ggpubr)
library(ggsignif)
ggplot(df, aes(x = location, y = value))+
geom_boxplot(aes(colour = location))+
ylim(0,6)+
geom_signif(data = as.data.frame(c), aes(xmin=group1, xmax=group2, annotations=labels, y_position=y_pos),
manual = TRUE)
它看起来像你想要绘制的东西吗?