R 中性别过剩的人口金字塔

Population Pyramid with Gender surplus in R

在维基百科上,有一个奇妙的人口金字塔,显示了性别过剩。 我如何使用 ggplot2 和/或 plotly 在 R 中重新创建它?

它本质上是一个双层堆叠条形图,已定向 90 度。

# Here is some population data

library(wpp2019)
# Male
data(popM)
# Female
data(popF)

Wikipedia: Demographics of the United States

这不是最简洁的方法,但应该可行:

library(ggplot2)
library(wpp2019)
#> Warning: package 'wpp2019' was built under R version 4.1.1

data(popM)
data(popF)

# Assuming structure of popM and popF is parallel
df <- data.frame(
  age = factor(popM$age, unique(popM$age)),
  male = popM$`2020`,
  female = popF$`2020`
)[popM$name == "World",]

ggplot(df, aes(y = age)) +
  geom_col(aes(x = female, fill = "female surplus"), width = 1) +
  geom_col(aes(x = -male,  fill = "male surplus"), width = 1) +
  geom_col(aes(x =  pmin(male, female), fill = "female"), width = 1) +
  geom_col(aes(x = -pmin(male, female), fill = "male"), width = 1)

reprex package (v2.0.1)

创建于 2021-09-22

在下面的代码中,大部分工作都在数据整形中,而 ggplot 代码相对简单。

library(wpp2019)
library(tidyverse)
data(popM)
data(popF)

list(Male=popM, Female=popF) %>% 
  imap(~.x %>% 
         filter(name=="World") %>% 
         select(age, !!.y:=`2020`)) %>% 
  reduce(full_join) %>% 
  mutate(age = factor(age, levels=unique(age)),
         `Female surplus` = pmax(Female - Male, 0),
         `Male surplus` = pmax(Male - Female, 0),
         Male = Male - `Male surplus`,
         Female = Female - `Female surplus`) %>% 
  pivot_longer(-age) %>%
  mutate(value = case_when(grepl("Male", name) ~ -value, 
                           TRUE ~ value),
         name = factor(name, levels=c("Female surplus", "Female", 
                                      "Male surplus", "Male"))) %>% 
  ggplot(aes(value, age, fill=name)) +
    geom_col() + 
    geom_vline(xintercept=0, colour="white") + 
    scale_x_continuous(label=function(x) ifelse(x < 0, -x, x),
                       breaks=scales::pretty_breaks(6)) +
    labs(x=NULL, y=NULL, fill=NULL) +
    scale_fill_discrete(type=RColorBrewer::brewer.pal(name="RdBu", n=4)[c(1,2,4,3)],
                        breaks=c("Male surplus", "Male", "Female","Female surplus")) +
    theme_bw() +
    theme(legend.position="bottom")

作为另一种选择,您可以将垂直轴标签放置在条形之间。这个版本也使用分面,所以我们可以很容易地按性别标记分面。那么在图例中我们只需要标注出柱状图多余的部分即可。

library(ggpol)
library(ggthemes)

list(Male=popM, Female=popF) %>% 
  imap(~.x %>% 
         filter(name=="World") %>% 
         select(age, !!.y:=`2020`)) %>% 
  reduce(full_join) %>% 
  mutate(age = factor(age, levels=unique(age)),
         `Female surplus` = pmax(Female - Male, 0),
         `Male surplus` = pmax(Male - Female, 0),
         Male = Male - `Male surplus`,
         Female = Female - `Female surplus`) %>% 
  pivot_longer(-age) %>%
  mutate(facet = factor(ifelse(grepl("Female", name), "Female", "Male"),
                        c("Male","Female")),
         value = case_when(grepl("Male", name) ~ -value, 
                           TRUE ~ value),
         name = factor(name, levels=c("Female surplus", "Female", 
                                      "Male surplus", "Male"))) %>% 
  ggplot(aes(value, age, fill=name)) +
    geom_col() + 
    geom_vline(xintercept=0, colour="white") + 
    scale_x_continuous(label=function(x) ifelse(x < 0, -x, x),
                       breaks=scales::pretty_breaks(3),
                       expand=c(0,0)) +
    labs(x=NULL, y=NULL, fill=NULL) +
    facet_share(vars(facet), scales="free_x") +
    scale_fill_discrete(type=RColorBrewer::brewer.pal(name="RdBu", n=4)[c(1,2,4,3)],
                        breaks=c("Male surplus", "Female surplus")) +
    theme_clean() +
    theme(legend.position="bottom",
          legend.background=element_blank(),
          legend.key.height=unit(4,"mm"),
          legend.margin=margin(t=0), 
          plot.background=element_blank(),
          strip.text=element_text(face="bold", size=rel(0.9)))