ggalluvial - 对层的顺序进行排序

Question

几天来我一直在尝试对 ggalluvial 中的地层和流的顺序进行排序。我想通过不同的筛查程序（X1、X2、X3、X4）可视化患者的流程，并根据最终诊断（X4 中的值）为流程着色。

你能帮我对示例 A 和 B 的第一列中的组内的值进行排序吗？我希望每个组中的所有红色、黄色和蓝色值都堆叠在一起。

到目前为止，我已经尝试了宽幅面的各种组合，aes.flow "backwards" 和 "forwards," lode.guidance，以及 lode.ordering...

如果这在 ggalluvial 中是不可能的，但在其他包中是可能的，我也想知道。

提前致谢。

宽格式数据：

set.seed(1)
data <- tibble(
  ID = 1:879,
  X1 = sample(c("only_parent", "parent_and_3D", "only_3D"), size = 879, replace = TRUE, prob = c(0.1, 0.8, 0.1))) %>% 
  mutate(
    X2 = case_when(
      X1 == "only_parent" ~ sample(c("only_I", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
      X1 == "parent_and_3D" ~ sample(c("only_I", "both_I_and_II", "only_II", "not_identified"), size = n(), prob = c(0.05, 0.05, 0.2, 0.7), replace = TRUE),
      X1 == "only_3D"~ sample(c("only_II", "not_identified"), size = n(), prob = c(0.1, 0.9), replace = TRUE),
      TRUE ~ NA_character_)) %>% 
  mutate(
    X3 = case_when(
      X2 == "only_I" ~ "PO_only",
      X2 == "both_I_and_II" ~ sample(c("PO_and_EHL", "PO_and_F/T", "PO_and_F/T_and_EHL"), size = n(), prob = c(0.3, 0.5, 0.2), replace = TRUE),
      X2 == "only_II"~ sample(c("F/T", "F/T_and_EHL", "EHL"), size = n(), prob = c(0.1, 0.6, 0.4), replace = TRUE),
      X2 == "not_identified" ~ "not_identified",
      TRUE ~ NA_character_)) %>% 
  mutate(
    X4 = case_when(
      X3 == "PO_only"    ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
      X3 == "PO_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
      X3 == "PO_and_F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
      X3 == "PO_and_F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
      X3 == "F/T" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.1, 0.88), replace = TRUE),
      X3 == "F/T_and_EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.05, 0.2, 0.75), replace = TRUE),
      X3 == "EHL" ~ sample(c("Two_primary_ind", "One_primary_ind", "No TW"), size = n(), prob = c(0.02, 0.2, 0.88), replace = TRUE),
      X3 == "not_identified" ~ "not_identified",
      TRUE ~ NA_character_ ))

head(data)

# A tibble: 6 x 5
     ID X1            X2             X3             X4            
  <int> <chr>         <chr>          <chr>          <chr>         
1     1 parent_and_3D not_identified not_identified not_identified
2     2 parent_and_3D only_II        F/T_and_EHL    No TW         
3     3 parent_and_3D not_identified not_identified not_identified
4     4 only_parent   only_I         PO_only        No TW         
5     5 parent_and_3D only_II        F/T_and_EHL    No TW         
6     6 only_3D       not_identified not_identified not_identified

示例 A
这些值未在第一列的底部框中排序。

data_long_a <- data %>% 
  group_by(X1, X2, X3, X4) %>% 
  count() %>% 
  mutate(
    fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>% 
  ungroup  %>%
  arrange(fill_stat) %>% 
  mutate(subject = seq(1, n())) %>% 
  gather(key, value, -n , -subject, -fill_stat) %>% 
  mutate(
    key = factor(key, levels = c("X1", "X2", "X3", "X4"))) %>% 
  arrange(key, fill_stat) 



data_long_a %>% 
  filter(key %in% c("X1", "X2")) %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_flow(aes(fill = fill_stat)) +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("#BAB3B3EB", "red", "yellow", "blue"))+
  theme_void()

示例 B
第一列中的流线未排序。

data_long_b <- data %>%
  select(-X1) %>% 
  filter(X4 != "not_identified") %>% 
  group_by(X2, X3, X4) %>% 
  count() %>% 
  mutate(
    fill_stat = factor(X4, levels = c("not_identified", "No TW", "One_primary_ind", "Two_primary_ind"))) %>% 
  ungroup  %>%
  arrange(fill_stat) %>% 
  mutate(subject = seq(1, n())) %>% 
  gather(key, value, -n , -subject, -fill_stat) %>% 
  mutate(
    key = factor(key, levels = c("X2", "X3", "X4"))) %>% 
  arrange(key, fill_stat) 


data_long_b %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_flow(aes(fill = fill_stat),
            aes.flow = "backward") +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("red", "yellow", "blue"))+
  theme_void()

Answer 1

就像 Cory Brunson 在评论中写的那样："The key is aes.bind = TRUE "

示例 A：

data_long_a %>% 
  filter(key %in% c("X1", "X2")) %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_flow(aes(fill = fill_stat), aes.bind = TRUE) +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("#BAB3B3EB", "red", "yellow", "blue"))+
  theme_void()

示例 B：

data_long_b %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_flow(aes(fill = fill_stat),
            aes.bind = TRUE) +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("red", "yellow", "blue"))+
  theme_void()

Answer 2

这里的背景是，即使地层（堆叠在每个轴上的不同值）可能具有自然顺序，但代表个案或群体的冲积层通常没有。这意味着统计层（例如 stat_alluvium()）的一项工作是确定每个层中矿脉的排序。（然后这决定了层之间的流量。）

为了提高清晰度，stat_alluvium() 和 stat_flow() 使用附近轴上的病例或群组的层次来指导他们在给定轴上的定位。默认情况下，它以 "zigzag" 顺序执行此操作，改编自 the alluvial package; see the "lode guidance" documentation 以获得其他选项。

当用户想要在层内将队列分组在一起时，这种行为可能会出现问题，例如当矿脉和流量被分配美学时（通常 fill，但可选 alpha，colour 、linetype 和 size）。 aes.bind 参数通过在确定矿脉顺序时优先考虑美学之前（但不是 而不是 ）地层来解决这个问题。 =28=]

@Steen 提供了句法答案，我基本上会在此处复制。我做了一个更改，在示例 B 中从 stat_flow() 更改为 stat_alluvium()，以说明 aes.bind 可以传递给任何一个几何层，并且会被任何一个几何层正确解释。

示例 A：

data_long_a %>% 
  filter(key %in% c("X1", "X2")) %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_flow(aes(fill = fill_stat), aes.bind = TRUE) +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("#BAB3B3EB", "red", "yellow", "blue"))+
  theme_void()

示例 B：

data_long_b %>% 
  ggplot(
    aes(x = key,
        y = n,
        stratum = value, 
        alluvium = subject,
        label = value))+
  geom_alluvium(aes(fill = fill_stat),
                aes.bind = TRUE) +
  geom_stratum() +
  geom_text(stat = "stratum")+
  scale_fill_manual(values=c("red", "yellow", "blue"))+
  theme_void()

^{由 reprex package (v0.2.1)}

于 2019-07-27 创建

ggalluvial - 对层的顺序进行排序

ggalluvial - sort the order of strata

r

sankey-diagram