ggplot2 - 在具有正负 y 值的条形图中以倒序排列的有序因子堆叠

ggplot2 - ordered factor stacking in inverted order in barplot with positive and negative y-values

我正在尝试构建堆叠条形图,但我在堆叠顺序方面遇到了问题。

我有变量名称、值、计数和百分比。值为 1-7。分配给 5、6 或 7 的任何百分比都是负数,我怀疑这就是问题所在,但我无法弄清楚。当我绘制这个时,堆栈中的值顺序是 5、6、7、4、3、2、1。它应该是 7、6、5、4、3、2、1。我什至尝试给出 5、6 , 和 7 个负值,但这没有用。

这是一个简单的骨架版本。我希望生产的是“花式”。但我坚持这一件事。我总是在 ggplot2 中出错,尽管我很喜欢它。

df <- structure(list(name = c("cloud_data_available", "cloud_data_available", 
"cloud_data_available", "cloud_data_available", "cloud_data_available", 
"cloud_data_available", "cloud_data_available", "on_premise_data_available", 
"on_premise_data_available", "on_premise_data_available", "on_premise_data_available", 
"on_premise_data_available", "on_premise_data_available", "on_premise_data_available"
), value = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L), .Label = c("1", "2", "3", "4", "5", "6", "7"
), class = c("ordered", "factor")), count = c(1L, 2L, 1L, 5L, 
18L, 52L, 55L, 2L, 4L, 5L, 4L, 16L, 33L, 70L), Percent = c(0.7, 
1.5, 0.7, 3.7, -13.4, -38.8, -41, 1.5, 3, 3.7, 3, -11.9, -24.6, 
-52.2)), row.names = c(NA, -14L), groups = structure(list(name = c("cloud_data_available", 
"on_premise_data_available"), .rows = structure(list(1:7, 8:14), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = "data.frame")

ggplot(df, aes(x=name, y = Percent, fill=value, label=value))+                          
  #geom_bar(stat=", width = .5, position = position_stack(reverse = TRUE)) +
  geom_col(position = "stack", width = .5) +
  scale_x_discrete() +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  coord_flip()

这是我得到的:

提前致谢!我来这里总能学到新东西。

我不确定这样的混合值是否应该使用堆叠条形图。试试这个:

library(ggplot2)

ggplot(df, aes(x=name, y = Percent, fill=value, label = value))+        
  geom_bar(stat = "identity", position = 'dodge', swidth = .5) +
  geom_text(size = 3, position = position_dodge(0.9), vjust = -1)

堆栈的顺序由您的有序因子中的级别顺序决定value。所以你需要设置正负百分比的相反顺序。

df <- df %>%
   mutate(value2 = ordered(value, c("1","2","3","4","7","6","5")))

ggplot(df, aes(x=name, y = Percent, fill=value2, label=value2))+                          
  #geom_bar(stat=", width = .5, position = position_stack(reverse = TRUE)) +
  geom_col(position = "stack", width = .5) +
  scale_x_discrete() +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  coord_flip()

(根据 OP 评论进行编辑):为了强制执行图例中的顺序,可以使用 scale_fill_manual():

手动指定中断顺序
brks <- c("1","2","3","4","5","6","7")
colrs <- c("#960019", "#D21F3C", "#FA8072", "gray", "#D0F0C0", "#C7EA46", "#4CBB17")

ggplot(df, aes(x=name, y = Percent, fill=value2, label=value2))+                          
  #geom_bar(stat=", width = .5, position = position_stack(reverse = TRUE)) +
  geom_col(position = "stack", width = .5) +
  scale_x_discrete() +
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  coord_flip() +
  scale_fill_manual(values = colrs, breaks=brks)

这有点 hacky,但我能想到的唯一方法是将数据拆分为负值和正值,并为轴的每一侧添加单独的层。

df_neg <- filter(df, value %in% c("5", "6", "7"))
df_pos <- filter(df, value %in% c("1", "2", "3", "4"))

ggplot() +                          
  geom_col(data = df_pos, aes(x = Percent, y = name, fill = value), width = .5) +
  geom_text(data = df_pos, aes(x = Percent, y = name, label = value, group = value), size = 3, position = position_stack(vjust = 0.5)) +
  geom_col(data = df_neg, aes(x = Percent, y = name, fill = fct_rev(value)), width = .5) +
  geom_text(data = df_neg, aes(x = Percent, y = name, label = value, group = fct_rev(value)), size = 3, position = position_stack(vjust = 0.5)) +
  theme(legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 1, reverse = TRUE))

感谢@Phil 和@Alexlok 的回答。 @Phil 有效,但我选择了@Alexlok,因为它教会了我一些有关 ggplot2 工作原理的知识。为了使图例正确,最终代码为:

df <- structure(list(name = c("cloud_data_available", "cloud_data_available", 
"cloud_data_available", "cloud_data_available", "cloud_data_available", 
"cloud_data_available", "cloud_data_available", "on_premise_data_available", 
"on_premise_data_available", "on_premise_data_available", "on_premise_data_available", 
"on_premise_data_available", "on_premise_data_available", "on_premise_data_available"
), value = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L), .Label = c("1", "2", "3", "4", "5", "6", "7"
), class = c("ordered", "factor")), count = c(1L, 2L, 1L, 5L, 
18L, 52L, 55L, 2L, 4L, 5L, 4L, 16L, 33L, 70L), Percent = c(0.7, 
1.5, 0.7, 3.7, -13.4, -38.8, -41, 1.5, 3, 3.7, 3, -11.9, -24.6, 
-52.2)), row.names = c(NA, -14L), groups = structure(list(name = c("cloud_data_available", 
"on_premise_data_available"), .rows = structure(list(1:7, 8:14), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = 1:2, class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = "data.frame")

df <- df %>%
   mutate(value2 = ordered(value, c("1","2","3","4","7","6","5")))

brks <- c("1","2","3","4","5","6","7")

colrs <- c("#960019", "#D21F3C", "#FA8072", "gray", "#D0F0C0", "#C7EA46", "#4CBB17")

ggplot(df, aes(x=name, y = Percent, fill=value2, label=ifelse(abs(Percent) < 3.5, "", 
abs(Percent))))+                            
  geom_col(position = "stack", width = .5) +
  scale_x_discrete() +
  scale_fill_manual(values = colrs, breaks=brks)+
  geom_text(size = 3, position = position_stack(vjust = 0.5)) +
  coord_flip()

现在,我当然不希望标签是“value2”。我只是用它来查看堆栈的顺序。我现在将百分比作为堆栈中的值。为了美观,我想抑制非常小的数字。所以,最终产品:

再次感谢 Whosebug 社区!