修改带有缺失数据的分组条形图颜色

Modifying Grouped Bar Plot Colors with Missing Data

我正在创建两组具有相似数据的分组条形图(一组用于 2017 年,一组用于 2018 年)。当我创建这些图时,我看到一个组没有出现在 2017 年(第 1 组),但出现在 2018 年。这很好,因为 2017 年没有该组的数据。但是,我无法想出一种方法来强制传说来认识这个失踪的群体。另外,我看到由于这个组在 2017 年消失了,配色方案发生了变化。无论数据中是否存在特定组,我都希望图例是统一的。因此,在这种情况下,我希望图例显示第 1、2 和 3 组,即使它们不在两个数据集中。另外,我注意到当缺少一组时,条形会变粗以容纳额外的 space。即使缺少某些组,是否有办法使条形宽度保持一致?

以下是构建数据集以及创建我目前制作的图表的代码:

library(dplyr)
library(ggplot2)

#### 2017 Data ###
Year <- rep(2017, 21)
Month <- c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Oct", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
Group <- c(rep(2,9), rep(3,12))
Count <- c(5,1,4,3,1,1,2,5,2,14,17,8,17,10,12,10,14,16,9,9,6)

dta <- as.data.frame(cbind(Year, Month, Group, Count))
dta$Year <- as.numeric(dta$Year)
dta$Month <- as.factor(dta$Month)
dta$Group <- as.factor(dta$Group)
dta$Count <- as.numeric(dta$Count)

tmp <- dta %>%
  mutate(Month = factor(Month, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")))

### 2017 Plot ###
ggplot(tmp, aes(x = Month, y = Count, fill = Group)) +
  geom_bar(stat = "identity", width = .7, position = position_dodge()) +
  geom_text(aes(label = Count), position = position_dodge(.9), vjust = "bottom") +
  ggtitle("2017") +
  guides(fill = guide_legend(title =  "Group:")) +
  scale_fill_brewer(palette = "Pastel1") +
  ylim(0, max(tmp$Count) + 5)


### 2018 Data ###
Year <- rep(2020, 24)
Month <- c("May", "Aug", "Feb", "Mar", "Apr", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
Group <- c(rep(1,2), rep(2, 10), rep(3, 12))
Count <- c(1,1,2,4,1,5,4,2,2,1,3,3,38,17,6,12,27,18,18,7,6,18,15,10)


dta <- as.data.frame(cbind(Year, Month, Group, Count))
dta$Year <- as.numeric(dta$Year)
dta$Month <- as.factor(dta$Month)
dta$Group <- as.factor(dta$Group)
dta$Count <- as.numeric(dta$Count)

tmp <- dta %>%
  mutate(Month = factor(Month, levels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")))

### 2018 Plot ###
ggplot(tmp, aes(x = Month, y = Count, fill = Group)) +
  geom_bar(stat = "identity", width = .7, position = position_dodge()) +
  geom_text(aes(label = Count), position = position_dodge(.9), vjust = "bottom") +
  ggtitle("2018") +
  guides(fill = guide_legend(title =  "Group:")) +
  scale_fill_brewer(palette = "Pastel1") +
  ylim(0, max(tmp$Count) + 5)

对于您的 2017 年绘图,您需要确保 Group 是一个因子并且包含所有因子水平。您还需要在 scale_fill_brewer 中设置 drop = FALSE。为防止数据点中缺少组时条形变宽,请在 position_dodge()

内设置 preserve = "single"
ggplot(tmp, aes(x = Month, y = Count, 
                fill = factor(Group, levels = 1:3))) +
  geom_bar(stat = "identity", width = .7, 
           position = position_dodge(preserve = "single")) +
  geom_text(aes(label = Count), 
            position = position_dodge(.9), vjust = "bottom") +
  ggtitle("2017") +
  guides(fill = guide_legend(title =  "Group:")) +
  scale_fill_brewer(palette = "Pastel1", drop = FALSE) +
  ylim(0, max(tmp$Count) + 5)

您可以 运行 与您的 2018 集相同的代码(除了更改 ggtitle),您将获得: