geom_segment 多变量图的垂直线

geom_segment line vertical for multiple variable plot

下面的绘图代码使所有颜色的线段汇聚在离散 x 变量的中心。可以做些什么来使每个 x 变量的线段线都是垂直的?

#~ Plot data
ggplot(data = data0, aes(x = v3, y = v4)) +
  geom_point(aes(colour = v2),
             size = 2,
             alpha = .25,
             position = position_jitterdodge(jitter.width = 0.15, seed = 1)
  ) +
  stat_summary(fun = mean, geom = "point", size = 5, aes(colour = v2), position = position_dodge(0.75)) +
  geom_segment(data = data1, aes(x = v3, xend = v3, y = v4, yend = grand_mean, colour = v2), 
               size = 1, position = position_dodge(width = 0.75)) +
  geom_hline(data = data1, aes(yintercept = grand_mean)) +
  facet_wrap(facets = vars(v1)) +
  theme_bw()

数据:

#~ Raw data
data0 <- data.frame(v1 = sample(c("foo", "bar"), 1000, replace = TRUE),
                    v2 = sample(letters[1:4], 1000 , replace = TRUE),
                    v3 = sample(1:4, 1000, replace = TRUE),
                    v4 = rnorm(1000))

#~ Summary data
data1 <- data0 %>%
  group_by(v1, v2, v3) %>%
  summarise(v4 = mean(v4))
data1$grand_mean <- mean(data0$v4)

position_dodge() 无法处理 xendyend。使用 geom_line()geom_linerange() 代替:

ggplot(data = data0, aes(x = v3, y = v4)) +
  geom_point(aes(colour = v2),
             size = 2,
             alpha = .25,
             position = position_jitterdodge(jitter.width = 0.15, seed = 1)
  ) +
  stat_summary(fun = mean, geom = "point", size = 5, aes(colour = v2), position = position_dodge(0.75)) +
  geom_linerange(data = data1, aes(x = v3, ymin = v4, ymax = grand_mean, colour = v2), 
               size = 1, position = position_dodge(width = 0.75)) +
  geom_hline(data = data1, aes(yintercept = grand_mean)) +
  facet_wrap(facets = vars(v1)) +
  theme_bw()

更长的答案 - 我在其中手动计算每个组的 v3_revised(x 轴)位置。

library(dplyr)
library(ggplot2)

#~ Raw data
data0 <- data.frame(v1 = sample(c("foo", "bar"), 1000, replace = TRUE),
  v2 = sample(letters[1:4], 1000 , replace = TRUE),
  v3 = sample(1:4, 1000, replace = TRUE),
  v4 = rnorm(1000))

v2_unique <- sort(unique(data0$v2))
x_variation <- seq(from = -0.3, to = 0.3, length.out = length(v2_unique))
names(x_variation) <- v2_unique
#~ Summary data
data0 <- data0 %>%
  mutate(v3_revised = v3 + x_variation[v2])
data1 <- data0 %>%
  group_by(v1, v2, v3_revised) %>%
  summarise(v4 = mean(v4))
#> `summarise()` has grouped output by 'v1', 'v2'. You can override using the `.groups` argument.
data1$grand_mean <- mean(data0$v4)

#~ Plot data
ggplot(data = data0, aes(x = v3_revised, y = v4)) +
  geom_point(aes(colour = v2),
    size = 2,
    alpha = .25,
    position = position_jitterdodge(jitter.width = 0.15, seed = 1)) +
  stat_summary(fun = mean, geom = "point", size = 2, aes(colour = v2)) +
  geom_segment(data = data1, aes(x = v3_revised, xend = v3_revised,
    y = v4, yend = grand_mean,
    colour = v2), 
    size = 1) +
  geom_hline(data = data1, aes(yintercept = grand_mean)) +
  facet_wrap(facets = vars(v1)) +
  theme_bw()
#> Warning: position_jitterdodge requires non-overlapping x intervals

#> Warning: position_jitterdodge requires non-overlapping x intervals

reprex package (v2.0.0)

于 2021-06-07 创建