我可以在 ggplot bar/column 图表上方添加分组线标签吗?

Can I add grouping line labels above my ggplot bar/column chart?

我有兴趣在我的 ggplot 条形图上方添加分组标签。此功能存在于数据可视化,例如系统发育树(ggtree),但我还没有找到在 ggplot 中实现它的方法。

我试过 geom_text 和 geom_label,但我还没有成功。也许有另一个包可以启用此功能?我附上了一些应该可以完全重现的示例代码。我希望 rating 变量越过列出的大陆(跨越多个大陆)。

非常感谢任何帮助!谢谢!

P.S。请原谅所有的评论-我正在写一个教学教程。

#load necessary packages
library(tidyverse)
library(stringr)
library(hrbrthemes)
library(scales)

#load data
covid<- read_csv("https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/owid-covid-data.csv", na = ".")  

#this makes a new dataframe (total_cases) that only has the latest COVID cases count and location data
total_cases <- covid %>% filter(date == "2021-05-23") %>% 
  group_by(location, total_cases) %>% 
  summarize()

#get number for world total cases. 
world <- total_cases %>%
  filter(location == "World") %>%
  select(total_cases)

#make new column that has the proportion of total world cases (number was total on that day)
total_cases$prop_total <- total_cases$total_cases/world$total_cases

#this specifies what the continents are so we can filter them out with dplyr
continents <- c("North America", "South America", "Antarctica", "Asia", "Europe", "Africa", "Australia")

#Using dyplr, we're choosing total_cases pnly for the continents
contin_cases <- total_cases %>%
  filter(location %in% continents)

#Loading a colorblind accessible palette
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

#Add a column that rates proportion of cases categorically.   
contin_cases <- contin_cases %>% 
  mutate(rating = case_when(prop_total <= 0.1 ~ 'low',
                            prop_total <= 0.2 ~ 'medium',
                            prop_total <= 1 ~ 'high'))

#Ploting it on a bar chart. 
plot1 <- ggplot(contin_cases, 
           aes(x = reorder(location, prop_total),
               y = prop_total,
               fill = location)) +
  geom_bar(stat="identity", color="white") +
  ylim(0, 1) +
  geom_text(aes(y = prop_total,
                label = round(prop_total, 4)),
            vjust = -1.5) +
  scale_fill_manual(name = "Continent", 
                    values = cbbPalette) +
  labs(title = "Proportion of total COVID-19 Cases Per Continent", 
       caption ="Figure 1. Asia leads total COVID case count as of May 23rd, 2021. No data exists in this dataset for Antarctica.") +
  ylab("Proportion of total cases") +
  xlab("") + #this makes x-axis blank
  theme_classic()+
    theme(
    plot.caption = element_text(hjust = 0, face = "italic"))

plot1

这与我要实现的目标类似:

bar chart showing total covid cases by continent as of May 2021

实现您想要的结果的一种方法是通过 geom_segment。为此,我首先准备了一个数据集,其中包含要按评级组放置在条形图顶部的段的开始和结束位置。基本上这涉及将离散位置转换为数字。

之后添加段和标签就非常简单了。

library(tidyverse)
library(hrbrthemes)
library(scales)

# Loading a colorblind accessible palette
cbbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")

width <- .45 # Half of default width of bars
df_segment <- contin_cases %>% 
  ungroup() %>% 
  # Convert location to numerics
  mutate(loc_num = as.numeric(fct_reorder(location, prop_total))) %>%
  group_by(rating) %>% 
  summarise(x = min(loc_num) - width, xend = max(loc_num) + width,
            y = max(prop_total) * 1.5, yend = max(prop_total) * 1.5)

ggplot(
  contin_cases,
  aes(
    x = reorder(location, prop_total),
    y = prop_total,
    fill = location
  )
) +
  geom_bar(stat = "identity", color = "white") +
  ylim(0, 1) +
  geom_segment(data = df_segment, aes(x = x, xend = xend, y = max(y), yend = max(yend), 
                                      color = rating, group = rating), 
               inherit.aes = FALSE, show.legend = FALSE) +
  geom_text(data = df_segment, aes(x = .5 * (x + xend), y = max(y), label = str_to_title(rating), color = rating), 
            vjust = -.5, inherit.aes = FALSE, show.legend = FALSE) +
  geom_text(aes(
    y = prop_total,
    label = round(prop_total, 4)
  ),
  vjust = -1.5
  ) +
  scale_fill_manual(
    name = "Continent",
    values = cbbPalette
  ) +
  labs(
    title = "Proportion of total COVID-19 Cases Per Continent",
    caption = "Figure 1. Asia leads total COVID case count as of May 23rd, 2021. No data exists in this dataset for Antarctica."
  ) +
  ylab("Proportion of total cases") +
  xlab("") + # this makes x-axis blank
  theme_classic() +
  theme(
    plot.caption = element_text(hjust = 0, face = "italic")
  )

数据

contin_cases <- structure(list(location = c(
  "Africa", "Asia", "Australia", "Europe",
  "North America", "South America"
), total_cases = c(
  4756650, 49204489,
  30019, 46811325, 38790782, 27740153
), prop_total = c(
  0.0284197291646085,
  0.293983843894959, 0.000179355607369132, 0.2796853202015, 0.231764691226676,
  0.165740097599109
), rating = c(
  "low", "high", "low", "high",
  "high", "medium"
)), class = c(
  "grouped_df", "tbl_df", "tbl",
  "data.frame"
), row.names = c(NA, -6L), groups = structure(list(
  location = c(
    "Africa", "Asia", "Australia", "Europe", "North America",
    "South America"
  ), .rows = structure(list(
    1L, 2L, 3L, 4L,
    5L, 6L
  ), ptype = integer(0), class = c(
    "vctrs_list_of",
    "vctrs_vctr", "list"
  ))
), row.names = c(NA, -6L), class = c(
  "tbl_df",
  "tbl", "data.frame"
), .drop = TRUE))