R - ggplot2 - 当 x 轴是因子变量时,如果 geom_errorbar 超出限制,则添加箭头
R - ggplot2 - Add arrow if geom_errorbar outside limits when x-axis is a factor variable
我想用geom_segment在误差超过一定限度时用箭头替换误差线。我发现以前的 post 解决了这个问题:
代码运行良好,除了我的 x 轴是 因子变量 而不是数字变量。在 geom_segment 语句中使用 position_dodge 使箭头从正确的位置开始,但它不会改变终点 (xend) 并且 所有箭头都指向一个中心点x 轴而不是从原点直接向上。
不是将 x 轴重新编码为数字(我将使用此代码创建许多具有一系列 x 轴值的图,最后一个数值始终以“+”结尾),是否有一种在 geom_segment?
内更正此问题的方法
使用的代码:
data$OR.95U_u = ifelse(data$OR.95U > 10, 10 , NA)
ggplot(data, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(position = position_dodge(.5), stat = "identity", width = .4, color = "black") + geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) + geom_errorbar(aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(.5)) +
theme(legend.key = element_blank(), text = element_text(size = 11.5), legend.title = element_blank()) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(aes(x = numAlleles, xend = numAlleles, y = OR, yend = OR.95U_u), position = position_dodge(.5), arrow = arrow(length = unit(0.3, "cm")))
结果图
好吧,经过一番调查,我没有找到一个干净的方法来做到这一点,似乎 position_dodge
只改变了 x aes,并且不是 xend aes。 position_nudge
在这里也不起作用,因为它会同时移动所有箭头。
所以我想出了一个肮脏的方法来做到这一点。我们只需要为 geom_segment
创建一个具有所需 xend 位置的新变量。我尝试使用 semi-automtized 方法来实现,对于任意数量的着色变量级别,并且还创建了一个可重现的数据集来使用,因为我相信这可以通过人们进行很多改进知识比我多。
该代码具有解释步骤的内联注释:
library(tidyverse)
# dummy data (tried to replicate your plot data more or less accurately)
df <- tibble(
numAlleles = rep(c("1", "2+"), each = 4),
Outcome = rep(LETTERS[1:4], 2),
OR = c(1.4, 1.5, 1.45, 2.3, 3.8, 4.2, 4.0, 1.55),
OR.95U = c(1.9,2.1,1.9,3.8,12,12,12,12),
OR.95L = c(0.9, 0.9, 0.9, 0.8, NA, NA,NA,NA)
) %>%
mutate(
OR.95U_u = if_else(OR.95U > 10, 10, NA_real_)
)
# as it seems that position_dodge in a geom_segment only "dodge" the x aes and
# not the xend aes, we need to supply a custom xend. Also, we need to try
# to automatize the position, for more classes or different dodge widths.
# To do that, lets start with some parameters:
# position_dodge width
position_dodge_width <- 0.5
# number of bars per x axis class
bars_per_class <- length(unique(df$Outcome))
# total space available per class. In discrete vars, this is 1 au (arbitrary unit)
# for each class, but position_dodge only use the fraction of that unit
# indicated in the width parameter, so we need to calculate the real
# space available:
total_space_available <- 1 * position_dodge_width
# now we calculate the real bar width used by ggplot in these au, dividing the
# space available by the number of bars to plot for each class
bar_width_real <- (total_space_available / bars_per_class)
# position_dodge with discrete variables place bars to the left and to the right of the
# class au value, so we need to know when to place the xend to the left or
# to the right. Also, the number of bars has to be taken in to account, as
# in odd number of bars, one is located on the exact au value
if (bars_per_class%%2 == 0) {
# we need an offset, as bars are wider than arrows, and we want them in the
# middle of the bar
offset_segment <- bar_width_real / 2
# offset modifier to know when to substract or add the modifier
offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
# we also need to know how meny bars to the left and how many to the right,
# but, the first bar of each side is already taken in account with the offset,
# so the bar modifier has to have one bar less for each side
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
# when odd number of columns, the offset is the same as the bar width
offset_segment <- bar_width_real
# and the modifiers have to have a middle zero value for the middle bar
offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}
# finally we create the vector of xend values needed:
df %>%
mutate(
numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
)
ggplot(df, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(
position = position_dodge(position_dodge_width), stat = "identity",
width = 0.4, color = "black"
) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) +
geom_errorbar(
aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
) +
theme(
legend.key = element_blank(), text = element_text(size = 11.5),
legend.title = element_blank()
) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(
aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
)
剧情:
我们可以检查三个级别的离散变量是否也有效:
df_three_bars <- df %>% filter(Outcome != 'D')
bars_per_class <- length(unique(df_three_bars$Outcome))
total_space_available <- 1 * position_dodge_width
bar_width_real <- (total_space_available / bars_per_class)
if (bars_per_class%%2 == 0) {
offset_segment <- bar_width_real / 2
offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
offset_segment <- bar_width_real
offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}
df_three_bars <- df_three_bars %>%
mutate(
numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
)
ggplot(df_three_bars, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(
position = position_dodge(position_dodge_width), stat = "identity",
width = 0.4, color = "black"
) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) +
geom_errorbar(
aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
) +
theme(
legend.key = element_blank(), text = element_text(size = 11.5),
legend.title = element_blank()
) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(
aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
)
我想用geom_segment在误差超过一定限度时用箭头替换误差线。我发现以前的 post 解决了这个问题:
代码运行良好,除了我的 x 轴是 因子变量 而不是数字变量。在 geom_segment 语句中使用 position_dodge 使箭头从正确的位置开始,但它不会改变终点 (xend) 并且 所有箭头都指向一个中心点x 轴而不是从原点直接向上。
不是将 x 轴重新编码为数字(我将使用此代码创建许多具有一系列 x 轴值的图,最后一个数值始终以“+”结尾),是否有一种在 geom_segment?
内更正此问题的方法使用的代码:
data$OR.95U_u = ifelse(data$OR.95U > 10, 10 , NA)
ggplot(data, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(position = position_dodge(.5), stat = "identity", width = .4, color = "black") + geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) + geom_errorbar(aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(.5)) +
theme(legend.key = element_blank(), text = element_text(size = 11.5), legend.title = element_blank()) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(aes(x = numAlleles, xend = numAlleles, y = OR, yend = OR.95U_u), position = position_dodge(.5), arrow = arrow(length = unit(0.3, "cm")))
结果图
好吧,经过一番调查,我没有找到一个干净的方法来做到这一点,似乎 position_dodge
只改变了 x aes,并且不是 xend aes。 position_nudge
在这里也不起作用,因为它会同时移动所有箭头。
所以我想出了一个肮脏的方法来做到这一点。我们只需要为 geom_segment
创建一个具有所需 xend 位置的新变量。我尝试使用 semi-automtized 方法来实现,对于任意数量的着色变量级别,并且还创建了一个可重现的数据集来使用,因为我相信这可以通过人们进行很多改进知识比我多。
该代码具有解释步骤的内联注释:
library(tidyverse)
# dummy data (tried to replicate your plot data more or less accurately)
df <- tibble(
numAlleles = rep(c("1", "2+"), each = 4),
Outcome = rep(LETTERS[1:4], 2),
OR = c(1.4, 1.5, 1.45, 2.3, 3.8, 4.2, 4.0, 1.55),
OR.95U = c(1.9,2.1,1.9,3.8,12,12,12,12),
OR.95L = c(0.9, 0.9, 0.9, 0.8, NA, NA,NA,NA)
) %>%
mutate(
OR.95U_u = if_else(OR.95U > 10, 10, NA_real_)
)
# as it seems that position_dodge in a geom_segment only "dodge" the x aes and
# not the xend aes, we need to supply a custom xend. Also, we need to try
# to automatize the position, for more classes or different dodge widths.
# To do that, lets start with some parameters:
# position_dodge width
position_dodge_width <- 0.5
# number of bars per x axis class
bars_per_class <- length(unique(df$Outcome))
# total space available per class. In discrete vars, this is 1 au (arbitrary unit)
# for each class, but position_dodge only use the fraction of that unit
# indicated in the width parameter, so we need to calculate the real
# space available:
total_space_available <- 1 * position_dodge_width
# now we calculate the real bar width used by ggplot in these au, dividing the
# space available by the number of bars to plot for each class
bar_width_real <- (total_space_available / bars_per_class)
# position_dodge with discrete variables place bars to the left and to the right of the
# class au value, so we need to know when to place the xend to the left or
# to the right. Also, the number of bars has to be taken in to account, as
# in odd number of bars, one is located on the exact au value
if (bars_per_class%%2 == 0) {
# we need an offset, as bars are wider than arrows, and we want them in the
# middle of the bar
offset_segment <- bar_width_real / 2
# offset modifier to know when to substract or add the modifier
offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
# we also need to know how meny bars to the left and how many to the right,
# but, the first bar of each side is already taken in account with the offset,
# so the bar modifier has to have one bar less for each side
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
# when odd number of columns, the offset is the same as the bar width
offset_segment <- bar_width_real
# and the modifiers have to have a middle zero value for the middle bar
offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}
# finally we create the vector of xend values needed:
df %>%
mutate(
numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
)
ggplot(df, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(
position = position_dodge(position_dodge_width), stat = "identity",
width = 0.4, color = "black"
) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) +
geom_errorbar(
aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
) +
theme(
legend.key = element_blank(), text = element_text(size = 11.5),
legend.title = element_blank()
) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(
aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
)
剧情:
我们可以检查三个级别的离散变量是否也有效:
df_three_bars <- df %>% filter(Outcome != 'D')
bars_per_class <- length(unique(df_three_bars$Outcome))
total_space_available <- 1 * position_dodge_width
bar_width_real <- (total_space_available / bars_per_class)
if (bars_per_class%%2 == 0) {
offset_segment <- bar_width_real / 2
offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
offset_segment <- bar_width_real
offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}
df_three_bars <- df_three_bars %>%
mutate(
numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
)
ggplot(df_three_bars, aes(x = numAlleles, y = OR, fill = Outcome)) +
geom_bar(
position = position_dodge(position_dodge_width), stat = "identity",
width = 0.4, color = "black"
) +
geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
ylim(0,10) +
geom_errorbar(
aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
) +
theme(
legend.key = element_blank(), text = element_text(size = 11.5),
legend.title = element_blank()
) +
labs(x = "Number of rare alleles") +
scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) +
geom_segment(
aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
)