以 Jonathan A. Schwabish (JEP 2014) 的格式可视化堆叠条形图
Visualizing stacked bar chart in the format of Jonathan A. Schwabish (JEP 2014)
我正在尝试以堆叠条形图的格式绘制以下数据 (df_input),我们还可以在其中逐行查看随时间的变化。知道怎么做吗?
df_input <- data.frame( Year= c(2010,2010,2010,2010,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15))
df_input_2 <- data.frame( Year= c(2010,2010,2010,2010,2015,2015,2015,2015,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15,20,10,30,40))
实现这一目标的一个选择是通过 geom_col
和 geom_line
。对于 geom_line
,您必须按 fill
上映射的变量进行分组,将位置设置为“堆栈”并调整 start/end 位置以考虑条的宽度。此外,您必须手动将 geom_line
的 orientation
设置为 y
:
library(ggplot2)
width <- .6 # Bar width
ggplot(df_input, aes(share, factor(Year), fill = village)) +
geom_col(width = width) +
geom_line(aes(x = share,
y = as.numeric(factor(Year)) + ifelse(Year == 2020, -width / 2, width / 2),
group = village), position = "stack", orientation = "y")
编辑 两年多了,事情变得有点棘手了。在那种情况下,我会切换到 ´geom_segment`。此外,我们必须进行一些数据整理,以准备用于 ´geom_segment´:
的数据
library(ggplot2)
library(dplyr)
# Example data with three years
df_input_2 <- data.frame( Year= c(2010,2010,2010,2010,2015,2015,2015,2015,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15,20,10,30,40))
width = .6
# Data wrangling
df_input_2 <- df_input_2 %>%
group_by(Year) %>%
arrange(desc(village)) %>%
mutate(share_cum = cumsum(share)) %>%
group_by(village) %>%
arrange(Year) %>%
mutate(Year = factor(Year),
Year_lead = lead(Year), share_cum_lead = lead(share_cum))
ggplot(df_input_2, aes(share, factor(Year), fill = village)) +
geom_col(width = width) +
geom_segment(aes(x = share_cum, xend = share_cum_lead, y = as.numeric(Year) + width / 2, yend = as.numeric(Year_lead) - width / 2, group = village))
#> Warning: Removed 4 rows containing missing values (geom_segment).
我正在尝试以堆叠条形图的格式绘制以下数据 (df_input),我们还可以在其中逐行查看随时间的变化。知道怎么做吗?
df_input <- data.frame( Year= c(2010,2010,2010,2010,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15))
df_input_2 <- data.frame( Year= c(2010,2010,2010,2010,2015,2015,2015,2015,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15,20,10,30,40))
实现这一目标的一个选择是通过 geom_col
和 geom_line
。对于 geom_line
,您必须按 fill
上映射的变量进行分组,将位置设置为“堆栈”并调整 start/end 位置以考虑条的宽度。此外,您必须手动将 geom_line
的 orientation
设置为 y
:
library(ggplot2)
width <- .6 # Bar width
ggplot(df_input, aes(share, factor(Year), fill = village)) +
geom_col(width = width) +
geom_line(aes(x = share,
y = as.numeric(factor(Year)) + ifelse(Year == 2020, -width / 2, width / 2),
group = village), position = "stack", orientation = "y")
编辑 两年多了,事情变得有点棘手了。在那种情况下,我会切换到 ´geom_segment`。此外,我们必须进行一些数据整理,以准备用于 ´geom_segment´:
的数据library(ggplot2)
library(dplyr)
# Example data with three years
df_input_2 <- data.frame( Year= c(2010,2010,2010,2010,2015,2015,2015,2015,2020,2020,2020,2020), village= c("A","B","C","D","A","B","C","D","A","B","C","D"), share = c(40,30,20,10,30,30,25,15,20,10,30,40))
width = .6
# Data wrangling
df_input_2 <- df_input_2 %>%
group_by(Year) %>%
arrange(desc(village)) %>%
mutate(share_cum = cumsum(share)) %>%
group_by(village) %>%
arrange(Year) %>%
mutate(Year = factor(Year),
Year_lead = lead(Year), share_cum_lead = lead(share_cum))
ggplot(df_input_2, aes(share, factor(Year), fill = village)) +
geom_col(width = width) +
geom_segment(aes(x = share_cum, xend = share_cum_lead, y = as.numeric(Year) + width / 2, yend = as.numeric(Year_lead) - width / 2, group = village))
#> Warning: Removed 4 rows containing missing values (geom_segment).