使用 Sankey plot 查看 R ggalluvial 和 ggplot 化妆品中的数据流

Using Sankey plot to see data flow in R ggalluvial and cosmetics by ggplot

我有治疗前(共识)和治疗后(单一药物)患者集群的数据 table,我想展示患者在治疗前后如何流入不同的集群。在这种情况下,实际的簇数并没有多大意义,重要的是对于大多数患者来说,治疗前聚集在一起在治疗后也会聚集在一起。有些人四处走动。

这里是数据截图

dummy dataset 

structure(list(Stimulation = c("3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", 
"3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S", "3S"), Patient.ID =       c("S3077497", 
"S1041120", "S162465", "S563275", "S2911623", "S3117192", "S2859024", 
"S2088278", "S3306185", "S190789", "S12146451", "S2170842", "S115594", 
"S2024203", "S1063872", "S2914138", "S303984", "S570813", "S2176683", 
"S820460", "S1235729", "S3009401", "S2590229", "S629309", "S1208256", 
"S2572773", "S3180483", "S3032079", "S3217608", "S5566943",     "S5473728", 
"S104259", "S2795346", "S2848989", "S2889801", "S2813983", "S2528246", 
"S3151923", "S2592908", "S2603793", "S5565867", "S3127064", "S675629", 
"S834679", "S3011944", "S5011583", "S2687896", "S2998620", "S651963", 
"S2104595", "S2433454", "S2565220", "S3307762", "S294778", "S995510", 
"S2476822", "S140868", "S1018263", "S2990223", "S5524130", "S1042529", 
"S999706", "S363003", "S2303087", "S868213", "S5568359", "S3174542", 
"S521782", "S3294727"), `Cluster assigned consensus` = c(2, 2, 
2, 2, 2, 5, 5, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 4, 3, 7, 4, 4, 4, 
4, 4, 4, 8, 8, 4, 7, 4, 1, 1, 1, 1, 1, 1, 1, 8, 8, 8, 8, 7, 7, 
7, 7, 7, 3, 7, 6, 6, 6, 6, 6, 8, 7, 7, 5, 7, 5, 7, 7, 7, 8, 8, 
4, 7, 4, 7), `Cluster assigned single drug` = c("1", "1", "1", 
"1", "1", "1", "1", "2", "2", "2", "2", "2", "2", "2", "2", "2", 
"2", "2", "2", "2", "3", "3", "3", "3", "3", "3", "3", "4", "4", 
"4", "4", "5", "5", "5", "5", "5", "5", "5", "6", "6", "6", "6", 
"6", "6", "6", "6", "6", "6", "6", "7", "7", "7", "7", "7", "7", 
"7", "7", "8", "8", "8", "8", "8", "8", "8", "8", "8", "8", "8", 
"8"), count = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), row.names = c(NA, -69L), class =     c("tbl_df", 
"tbl", "data.frame"))

我是第一次接触 sankey plot,所以我不是专家。我添加了计数列,所以每个患者的计数为1,流量厚度可以通过计数添加。

我从 R 教程修改而来,可视化代码在这里

library(ggplot2)
library(ggalluvial)

ggplot(data = CLL3S,
       aes(axis1 = `Cluster assigned consensus`, axis2 = `Cluster assigned single drug`, y = count)) +
  scale_x_discrete(limits = c("Consensus cluster", "Single-drug cluster"), expand = c(.1, .1)) +
  xlab("Clusters") +
  geom_alluvium(aes(fill = `Cluster assigned consensus`)) +
  geom_stratum() +
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  theme_minimal() +
  ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
          "3S stimulated patients")

这种作品,就是身材不好看:

你看到簇号被巨大的白色空框包围着。我怎样才能把它改成更小的东西?我如何将框颜色编码为不同的颜色并确保如果我更改 geom_alluvium(填充)以便数据流与框(共识框)的颜色相匹配?

你在 geom_stratum 中控制它。试试这个

library(ggplot2)
library(ggalluvial)
library(RColorBrewer)

# Define the number of colors you want
nb.cols <- 10
mycolor1 <- colorRampPalette(brewer.pal(8, "Set2"))(nb.cols)
mycolor2 <- colorRampPalette(brewer.pal(2, "Set2"))(nb.cols)

mycolors <- c("red","blue","green","orange")

ggplot(data = CLL3S,
       aes(y = count, axis1 = `Cluster assigned consensus`, axis2 = `Cluster assigned single drug` 
           )) +
  scale_x_discrete(limits = c("Consensus cluster", "Single-drug cluster"), expand = c(.1, .1)) +
  labs(x="Clusters") +
  geom_alluvium(aes(fill = `Cluster assigned consensus`)) +
  geom_stratum(width = 1/4, fill = c(mycolor1[1:8],mycolor1[1:8]), color = "red") +
  #geom_stratum() +
  geom_text(stat = "stratum", aes(label = after_stat(stratum))) +
  #scale_fill_manual(values = mycolors) +
  theme_minimal() +
  guides(fill=guide_legend(override.aes = list(color=mycolors)))+
  ggtitle("Patient flow between the Consensus clusters and Single-drug treated clusters",
          "3S stimulated patients")