按ggplot中连续变量的中值重新排序因子水平

Reorder factor levels by median of continuous variable in ggplot

问题:

我有一个包含两个变量(年龄和组)的数据框,我想按组构建一个年龄箱线图。我想根据他们的中位年龄对组级别进行排序。为此,我尝试遵循两个教程 (here and here)。我不知道我做错了什么。下面,我提供了一个可重现的例子。

可重现的例子:

我的数据:

df <- structure(list(age = c(27, 28, 22, 28, 35, 25, 23, 28, 29, 26, 51, 29, 30, 29, 22, 23, 22, 22, 29, 27, 22, 43, 22, 36, 28, 24, 22, 22, 43, 22, 26, 24, 22, 24, 27, 23, 27, 22, 28, 32, 30, 22, 21, 30, 37, 28, 30, 31, 28, 24, 21, 30, 27, 33, 22, 29, 45, 30, 30, 23, 30, 23, 22, 30, 23, 28, 22, 28, 22, 22, 22,
 22, 19, 35, 24, 27, 23, 26, 22, 21, 26, 28, 29, 26, 42, 27, 28, 22, 22, 21, 41, 23, 31, 21, 30, 28, 28, 21, 21, 28, 24, 28, 29, 21, 27, 24, 28, 22, 23, 28, 22, 22, 28, 35, 29, 23, 23, 30, 28, 22, 29, 42, 
34, 29, 39, 35, 30, 32, 29, 27, 33, 30, 28, 22, 21, 28, 29, 28, 28, 29, 29, 29, 28, 29, 27, 28, 28, 28, 38, 29, 28, 28, 29, 22, 25, 30, 31, 30, 30, 30, 28, 29, 29, 26, 29, 29, 30, 28, 28, 22, 
29, 30, 28, 22, 22, 28, 22, 28, 29, 27, 29, 29, 29, 30, 30, 30, 51, 36, 28, 22, 38, 22, 21, 30, 28, 29, 28, 27, 24, 23), group = structure(c(1L, 2L, 3L, 4L, 4L, 5L, 4L, 2L, 5L, 1L, 3L, 6L, 2L, 5L, 3L, 4L, 2L, 
3L, 5L, 7L, 1L, 5L, 3L, 3L, 2L, 6L, 4L, 3L, 5L, 3L, 2L, 7L, 4L, 7L, 4L, 3L, 4L, 8L, 6L, 3L, 2L, 8L, 7L, 6L, 5L, 3L, 5L, 6L, 4L, 2L, 2L, 6L, 5L, 6L, 4L, 4L, 5L, 3L, 6L, 3L, 3L, 4L, 6L, 5L, 6L, 6L, 2L, 6L, 1L,
 7L, 2L, 5L, 4L, 6L, 8L, 4L, 3L, 7L, 5L, 2L, 4L, 6L, 6L, 5L, 5L, 3L, 2L, 3L, 3L, 2L, 5L, 6L, 4L, 4L, 5L, 5L, 6L, 4L, 3L, 3L, 2L, 4L, 6L, 1L, 3L, 7L, 4L, 8L, 4L, 6L, 2L, 6L, 4L, 6L, 5L, 5L, 5L, 6L, 5L, 3L, 5L, 5L, 6L, 2L, 6L, 4L, 5L, 4L, 2L, 6L, 6L, 6L, 4L, 2L, 5L, 2L, 3L, 3L, 2L, 2L, 6L, 2L, 4L, 3L, 4L, 
5L, 5L, 3L, 6L, 5L, 4L, 5L, 6L, 3L, 6L, 6L, 5L, 3L, 6L, 1L, 5L, 6L, 6L, 8L, 5L, 6L, 6L, 2L, 5L, 3L, 4L, 5L, 7L, 2L, 4L, 3L, 2L, 6L, 6L, 7L, 5L, 5L, 7L, 4L, 4L, 6L, 5L, 4L, 2L, 1L, 6L, 4L, 6L, 
6L, 6L, 5L, 6L, 1L, 2L, 2L), .Label = c("HN2", "HB2", "MN1", "HN1", "HB1", "MB1", "MB2", "MN2"), class = "factor")), row.names = c(NA, -200L), groups = structure(list(.rows = structure(list(1L, 2L, 
    3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 
    40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 76L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 91L, 92L, 93L, 94L, 95L, 96L, 97L, 98L, 99L, 100L,
101L, 102L, 103L, 104L, 105L, 106L, 107L, 108L, 109L, 110L, 111L, 112L, 113L, 114L, 115L, 116L, 117L, 118L, 119L, 120L, 121L, 122L, 123L, 124L, 125L, 126L, 127L, 128L, 129L, 130L, 131L, 132L, 133L, 134L, 135L, 136L, 137L, 138L, 139L, 140L, 141L, 142L, 143L, 144L, 145L, 146L, 147L, 148L, 149L, 150L, 151L,
 152L, 153L, 154L, 155L, 156L, 157L, 158L, 159L, 160L, 161L, 162L, 163L, 164L, 165L, 166L, 167L, 168L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 176L, 177L, 178L, 179L, 180L, 181L, 182L, 183L, 184L, 185L, 186L, 187L, 188L, 189L, 190L, 191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L, 200L), 
ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -200L), class = c("tbl_df", "tbl", "data.frame")), class = c("rowwise_df", "tbl_df", "tbl", "data.frame"))

没有重新排序因子水平的箱线图:

library(ggplot2)
library(dplyr)

p1 <- df %>% ggplot(aes(x = group, y=age, fill = group)) +
  geom_boxplot() +
  theme(
    legend.position = "none"
  )

我试过的:

按照第一个教程重新排序因子水平:

library(forcats)

p2 <- df %>% mutate(group = fct_reorder(group, age, median)) %>%
 ggplot(aes(x = group, y=age, fill = group)) +
  geom_boxplot() +
  theme(
    legend.position = "none"
  )

按照第二个教程重新排序因子水平:

p3 <- df %>% mutate(group = fct_reorder(group, age, .fun="median")) %>%
 ggplot(aes(x = group, y=age, fill = group)) +
  geom_boxplot() +
  theme(
    legend.position = "none"
  )

所有三个组都产生了相同的结果,但组别未按中位年龄排序:

ggarrange(p1, p2, p3)

为什么它不起作用?

我的期望: 类似于 first tutorial solution,但对于我自己的数据和 ggplot:

您的数据按行分组。在应用 fct_reorder 之前,您需要 ungroup :

library(dplyr)
library(ggplot2)
library(forcats)

df %>% 
  ungroup() %>%
  mutate(group = fct_reorder(group, age, median)) %>%
  ggplot(aes(x = group, y=age, fill = group)) +
  geom_boxplot() +
  theme(
    legend.position = "none"
  )

你可以直接对x轴重新排序

df %>% ggplot(aes(x = fct_reorder(group, age, median), y=age, fill = group)) +
  geom_boxplot() +
  theme(
    legend.position = "none"
  )