R - ggplot2 - 当 x 轴是因子变量时,如果 geom_errorbar 超出限制,则添加箭头

R - ggplot2 - Add arrow if geom_errorbar outside limits when x-axis is a factor variable

我想用geom_segment在误差超过一定限度时用箭头替换误差线。我发现以前的 post 解决了这个问题:

代码运行良好,除了我的 x 轴是 因子变量 而不是数字变量。在 geom_segment 语句中使用 position_dodge 使箭头从正确的位置开始,但它不会改变终点 (xend) 并且 所有箭头都指向一个中心点x 轴而不是从原点直接向上。

不是将 x 轴重新编码为数字(我将使用此代码创建许多具有一系列 x 轴值的图,最后一个数值始终以“+”结尾),是否有一种在 geom_segment?

内更正此问题的方法

使用的代码:

data$OR.95U_u = ifelse(data$OR.95U > 10, 10 , NA)

ggplot(data, aes(x = numAlleles, y = OR, fill = Outcome)) + 
  geom_bar(position = position_dodge(.5), stat = "identity", width = .4, color = "black") + geom_hline(yintercept = 1, linetype = "dashed", color = "black") + 
  ylim(0,10) + geom_errorbar(aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(.5)) +
  theme(legend.key = element_blank(), text = element_text(size = 11.5), legend.title = element_blank()) + 
  labs(x = "Number of rare alleles") +
  scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) + 
  geom_segment(aes(x = numAlleles, xend = numAlleles, y = OR, yend = OR.95U_u), position = position_dodge(.5), arrow = arrow(length = unit(0.3, "cm")))

结果图

好吧,经过一番调查,我没有找到一个干净的方法来做到这一点,似乎 position_dodge 只改变了 x aes,并且不是 xend aesposition_nudge 在这里也不起作用,因为它会同时移动所有箭头。 所以我想出了一个肮脏的方法来做到这一点。我们只需要为 geom_segment 创建一个具有所需 xend 位置的新变量。我尝试使用 semi-automtized 方法来实现,对于任意数量的着色变量级别,并且还创建了一个可重现的数据集来使用,因为我相信这可以通过人们进行很多改进知识比我多。 该代码具有解释步骤的内联注释:

library(tidyverse)

# dummy data (tried to replicate your plot data more or less accurately)
df <- tibble(
  numAlleles = rep(c("1", "2+"), each = 4),
  Outcome = rep(LETTERS[1:4], 2),
  OR = c(1.4, 1.5, 1.45, 2.3, 3.8, 4.2, 4.0, 1.55),
  OR.95U = c(1.9,2.1,1.9,3.8,12,12,12,12),
  OR.95L = c(0.9, 0.9, 0.9, 0.8, NA, NA,NA,NA)
) %>%
  mutate(
    OR.95U_u = if_else(OR.95U > 10, 10, NA_real_)
  )

# as it seems that position_dodge in a geom_segment only "dodge" the x aes and
# not the xend aes, we need to supply a custom xend. Also, we need to try
# to automatize the position, for more classes or different dodge widths.
# To do that, lets start with some parameters:
# position_dodge width
position_dodge_width <- 0.5
# number of bars per x axis class
bars_per_class <- length(unique(df$Outcome))
# total space available per class. In discrete vars, this is 1 au (arbitrary unit)
# for each class, but position_dodge only use the fraction of that unit
# indicated in the width parameter, so we need to calculate the real
# space available:
total_space_available <- 1 * position_dodge_width
# now we calculate the real bar width used by ggplot in these au, dividing the
# space available by the number of bars to plot for each class
bar_width_real <- (total_space_available / bars_per_class)
# position_dodge with discrete variables place bars to the left and to the right of the
# class au value, so we need to know when to place the xend to the left or
# to the right. Also, the number of bars has to be taken in to account, as
# in odd number of bars, one is located on the exact au value
if (bars_per_class%%2 == 0) {
  # we need an offset, as bars are wider than arrows, and we want them in the
  # middle of the bar
  offset_segment <- bar_width_real / 2
  # offset modifier to know when to substract or add the modifier
  offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
  # we also need to know how meny bars to the left and how many to the right,
  # but, the first bar of each side is already taken in account with the offset,
  # so the bar modifier has to have one bar less for each side
  bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
  # when odd number of columns, the offset is the same as the bar width
  offset_segment <- bar_width_real
  # and the modifiers have to have a middle zero value for the middle bar
  offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
  bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}

# finally we create the vector of xend values needed:
df %>%
  mutate(
    numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
  )

ggplot(df, aes(x = numAlleles, y = OR, fill = Outcome)) + 
  geom_bar(
    position = position_dodge(position_dodge_width), stat = "identity",
    width = 0.4, color = "black"
  ) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  ylim(0,10) +
  geom_errorbar(
    aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
  ) +
  theme(
    legend.key = element_blank(), text = element_text(size = 11.5),
    legend.title = element_blank()
  ) + 
  labs(x = "Number of rare alleles") +
  scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) + 
  geom_segment(
    aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
    position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
  )

剧情:

我们可以检查三个级别的离散变量是否也有效:

df_three_bars <- df %>% filter(Outcome != 'D')
bars_per_class <- length(unique(df_three_bars$Outcome))
total_space_available <- 1 * position_dodge_width
bar_width_real <- (total_space_available / bars_per_class)
if (bars_per_class%%2 == 0) {
  offset_segment <- bar_width_real / 2
  offset_modifier <- c(rep(-1, bars_per_class%/%2), rep(1, bars_per_class%/%2))
  bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), seq(0, (bars_per_class%/%2-1)))
} else {
  offset_segment <- bar_width_real
  offset_modifier <- c(rep(-1, bars_per_class%/%2), 0, rep(1, bars_per_class%/%2))
  bar_width_modifier <- c(seq((bars_per_class%/%2-1), 0), 0, seq(0, (bars_per_class%/%2-1)))
}
df_three_bars <- df_three_bars %>%
  mutate(
    numAlleles_u = as.numeric(as.factor(numAlleles)) + offset_modifier*(offset_segment + (bar_width_modifier*bar_width_real))
  )

ggplot(df_three_bars, aes(x = numAlleles, y = OR, fill = Outcome)) + 
  geom_bar(
    position = position_dodge(position_dodge_width), stat = "identity",
    width = 0.4, color = "black"
  ) +
  geom_hline(yintercept = 1, linetype = "dashed", color = "black") +
  ylim(0,10) +
  geom_errorbar(
    aes(ymin=OR.95L, ymax=OR.95U), width=.2,position=position_dodge(position_dodge_width)
  ) +
  theme(
    legend.key = element_blank(), text = element_text(size = 11.5),
    legend.title = element_blank()
  ) + 
  labs(x = "Number of rare alleles") +
  scale_fill_manual(values=c("chocolate1","coral1", "red2", "darkred")) + 
  geom_segment(
    aes(x = numAlleles, xend = numAlleles_u, y = OR, yend = OR.95U_u),
    position = position_dodge(position_dodge_width), arrow = arrow(length = unit(0.3, "cm"))
  )