将 ggrepel / geom_text_repel 的标签从使用 geom_vline() 和 geom_hline() 绘制的线条移开
Move ggrepel / geom_text_repel's labels away from lines drawn with geom_vline() and geom_hline()
ggrepel
提供了一系列出色的函数来注释 ggplot2
图形,the examples page 包含许多关于如何扩展其功能的很好的提示,包括将生成的标签从两者中移开绘图的轴、其他标签等。
但是,没有涵盖的一件事是将标签从使用 geom_hline()
和 geom_vline()
手动绘制的线条移开,例如 making an annotated volcano plot 中可能发生的情况。
这里有一个简单的 MWE 来突出问题:
library("tidyverse")
library("ggrepel")
dat <- subset(mtcars, wt > 2.75 & wt < 3.45)
dat$car <- rownames(dat)
ggplot(dat, aes(wt, mpg, label = car)) +
geom_point(color = "red") +
geom_text_repel(seed = 1) + #Seed for reproducibility
geom_vline(xintercept = 3.216) + #Deliberately chosen "bad" numbers
geom_hline(yintercept = 19.64) + theme_bw()
这会产生以下输出:
请注意这些线条是如何与标签的文本重叠并使其模糊不清的(是“Horret 4 Drive”还是“Hornet 4 Drive”?)
稍微调整一下点 post 事实上,您可以做出更合适的效果 – 我只是稍微移动了一些标签,使它们脱线。
是否可以让 ggrepel
自动执行此操作?我知道给出的例子并不完全稳定(其他种子给出了可接受的结果)但是对于具有大量点的复杂图来说这绝对是一个问题。
编辑:如果你很好奇,下面是一个不那么“最小”的工作示例(取自 bioconductor):
download.file("https://raw.githubusercontent.com/biocorecrg/CRG_RIntroduction/master/de_df_for_volcano.rds", "de_df_for_volcano.rds", method="curl")
tmp <- readRDS("de_df_for_volcano.rds")
de <- tmp[complete.cases(tmp), ]
de$diffexpressed <- "NO"
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP"
de$diffexpressed[de$log2FoldChange > 0.6 & de$pvalue < 0.05] <- "UP"
# if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN"
de$diffexpressed[de$log2FoldChange < -0.6 & de$pvalue < 0.05] <- "DOWN"
# Create a new column "delabel" to de, that will contain the name of genes differentially expressed (NA in case they are not)
de$delabel <- NA
de$delabel[de$diffexpressed != "NO"] <- de$gene_symbol[de$diffexpressed != "NO"]
#Actually do plot
ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point() +
theme_minimal() +
geom_text_repel() +
scale_color_manual(values=c("blue", "black", "red")) +
geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")
这将产生以下内容,其中文本重叠行问题非常明显:
我不确定是否有任何函数允许 ggrepel
自动执行此操作。解决此问题的一种方法是创建多个数据子集,并将 nudge
添加到标签中。这里我以火山图为例
library(ggplot2)
library(ggrepel)
ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point() +
theme_minimal() +
geom_text_repel(data = subset(de, log2FoldChange < -0.6),
nudge_x = -0.05) +
geom_text_repel(data = subset(de, log2FoldChange > 0.6),
nudge_x = 0.08) +
scale_color_manual(values=c("blue", "black", "red")) +
geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")
我认为没有 built-in 方法可以做到这一点。
一个 non-elegant 我脑子里乱七八糟的是沿着截线添加不可见的点,然后标签会排斥这些点。
dat <- subset(mtcars, wt > 2.75 & wt < 3.45)
dat$car <- rownames(dat)
xintercept = 3.216
yintercept = 19.64
dat %>%
mutate(alpha = 1) %>%
bind_rows(.,
tibble(wt = seq(from = min(.$wt), to = max(.$wt), length.out = 20), mpg = yintercept, car = '', alpha = 0),
tibble(wt = xintercept, mpg = seq(from = min(.$mpg), to = max(.$mpg), length.out = 20), car = '', alpha = 0)
) %>%
ggplot(aes(wt, mpg, label = car, alpha = alpha)) +
geom_point(color = "red") +
geom_text_repel(seed = 1) + #Seed for reproducibility
geom_vline(xintercept = xintercept) +
geom_hline(yintercept = yintercept) + theme_bw() +
scale_alpha_identity()
一个(公认的非正统)解决方案是沿着拦截线绘制“隐形”文本,从而欺骗 geom_text_repel
远离它们。复杂的是,您必须向数据集中添加多个填充行,然后修改绘图以将填充渲染为不可见。但最终结果很干净:
dat2 <- bind_rows(
data.frame(wt = seq(min(dat$wt), max(dat$wt), length = 20), mpg = 19.64, car = 'O'),
data.frame(mpg = seq(min(dat$mpg), max(dat$mpg), length = 20), wt = 3.216, car = 'O'),
dat
)
ggplot(dat2, aes(wt, mpg, label = car)) +
geom_point(data = filter(dat2, car != 'O'), color = "red") +
geom_text_repel(aes(color = car == 'O'), seed = 1, show.legend = F) + #Seed for reproducibility
geom_vline(xintercept = 3.216) + #Deliberately chosen "bad" numbers
geom_hline(yintercept = 19.64) +
scale_color_manual(values = c('black', 'transparent'))
theme_bw()
ggrepel
提供了一系列出色的函数来注释 ggplot2
图形,the examples page 包含许多关于如何扩展其功能的很好的提示,包括将生成的标签从两者中移开绘图的轴、其他标签等。
但是,没有涵盖的一件事是将标签从使用 geom_hline()
和 geom_vline()
手动绘制的线条移开,例如 making an annotated volcano plot 中可能发生的情况。
这里有一个简单的 MWE 来突出问题:
library("tidyverse")
library("ggrepel")
dat <- subset(mtcars, wt > 2.75 & wt < 3.45)
dat$car <- rownames(dat)
ggplot(dat, aes(wt, mpg, label = car)) +
geom_point(color = "red") +
geom_text_repel(seed = 1) + #Seed for reproducibility
geom_vline(xintercept = 3.216) + #Deliberately chosen "bad" numbers
geom_hline(yintercept = 19.64) + theme_bw()
这会产生以下输出:
请注意这些线条是如何与标签的文本重叠并使其模糊不清的(是“Horret 4 Drive”还是“Hornet 4 Drive”?)
稍微调整一下点 post 事实上,您可以做出更合适的效果 – 我只是稍微移动了一些标签,使它们脱线。
是否可以让 ggrepel
自动执行此操作?我知道给出的例子并不完全稳定(其他种子给出了可接受的结果)但是对于具有大量点的复杂图来说这绝对是一个问题。
编辑:如果你很好奇,下面是一个不那么“最小”的工作示例(取自 bioconductor):
download.file("https://raw.githubusercontent.com/biocorecrg/CRG_RIntroduction/master/de_df_for_volcano.rds", "de_df_for_volcano.rds", method="curl")
tmp <- readRDS("de_df_for_volcano.rds")
de <- tmp[complete.cases(tmp), ]
de$diffexpressed <- "NO"
# if log2Foldchange > 0.6 and pvalue < 0.05, set as "UP"
de$diffexpressed[de$log2FoldChange > 0.6 & de$pvalue < 0.05] <- "UP"
# if log2Foldchange < -0.6 and pvalue < 0.05, set as "DOWN"
de$diffexpressed[de$log2FoldChange < -0.6 & de$pvalue < 0.05] <- "DOWN"
# Create a new column "delabel" to de, that will contain the name of genes differentially expressed (NA in case they are not)
de$delabel <- NA
de$delabel[de$diffexpressed != "NO"] <- de$gene_symbol[de$diffexpressed != "NO"]
#Actually do plot
ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point() +
theme_minimal() +
geom_text_repel() +
scale_color_manual(values=c("blue", "black", "red")) +
geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")
这将产生以下内容,其中文本重叠行问题非常明显:
我不确定是否有任何函数允许 ggrepel
自动执行此操作。解决此问题的一种方法是创建多个数据子集,并将 nudge
添加到标签中。这里我以火山图为例
library(ggplot2)
library(ggrepel)
ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point() +
theme_minimal() +
geom_text_repel(data = subset(de, log2FoldChange < -0.6),
nudge_x = -0.05) +
geom_text_repel(data = subset(de, log2FoldChange > 0.6),
nudge_x = 0.08) +
scale_color_manual(values=c("blue", "black", "red")) +
geom_vline(xintercept=c(-0.6, 0.6), col="red") +
geom_hline(yintercept=-log10(0.05), col="red")
我认为没有 built-in 方法可以做到这一点。
一个 non-elegant 我脑子里乱七八糟的是沿着截线添加不可见的点,然后标签会排斥这些点。
dat <- subset(mtcars, wt > 2.75 & wt < 3.45)
dat$car <- rownames(dat)
xintercept = 3.216
yintercept = 19.64
dat %>%
mutate(alpha = 1) %>%
bind_rows(.,
tibble(wt = seq(from = min(.$wt), to = max(.$wt), length.out = 20), mpg = yintercept, car = '', alpha = 0),
tibble(wt = xintercept, mpg = seq(from = min(.$mpg), to = max(.$mpg), length.out = 20), car = '', alpha = 0)
) %>%
ggplot(aes(wt, mpg, label = car, alpha = alpha)) +
geom_point(color = "red") +
geom_text_repel(seed = 1) + #Seed for reproducibility
geom_vline(xintercept = xintercept) +
geom_hline(yintercept = yintercept) + theme_bw() +
scale_alpha_identity()
一个(公认的非正统)解决方案是沿着拦截线绘制“隐形”文本,从而欺骗 geom_text_repel
远离它们。复杂的是,您必须向数据集中添加多个填充行,然后修改绘图以将填充渲染为不可见。但最终结果很干净:
dat2 <- bind_rows(
data.frame(wt = seq(min(dat$wt), max(dat$wt), length = 20), mpg = 19.64, car = 'O'),
data.frame(mpg = seq(min(dat$mpg), max(dat$mpg), length = 20), wt = 3.216, car = 'O'),
dat
)
ggplot(dat2, aes(wt, mpg, label = car)) +
geom_point(data = filter(dat2, car != 'O'), color = "red") +
geom_text_repel(aes(color = car == 'O'), seed = 1, show.legend = F) + #Seed for reproducibility
geom_vline(xintercept = 3.216) + #Deliberately chosen "bad" numbers
geom_hline(yintercept = 19.64) +
scale_color_manual(values = c('black', 'transparent'))
theme_bw()