自定义桑基图以适应大型数据集

Question

请运行下面的脚本，我在 R 中创建了一个 Sankey 图表，并使用来自 bupaR 库的 "patients" 数据集的数据进行绘图。请参阅快照以供参考。我面临的问题是，这个自定义情节是通过声明和建立用户（"r1"、"r2" 等）和活动（"Registration"，" X-Ray 等）。如果我有大量的用户和活动，声明每个关系将成为一项非常繁琐的任务。请帮助我动态修改情节，以便我可以复制大量代码用户和活动。

library(plotly)
library(bupaR)
value1 =  nrow(subset(patients, handling == "Registration" & employee == 
"r1"))
value2 =  nrow(subset(patients, handling == "Triage and Assessment" & 
employee == "r1"))
value3 =  nrow(subset(patients, handling == "Check-out" & employee == "r1"))
value4 =  nrow(subset(patients, handling == "Triage and Assessment" & 
employee == "r2"))
value5 =  nrow(subset(patients, handling == "Blood test" & employee == 
"r3"))
value6 =  nrow(subset(patients, handling == "Triage and Assessment" & 
employee == "r3"))
value7 =  nrow(subset(patients, handling == "X-Ray" & employee == "r3"))
value8 =  nrow(subset(patients, handling == "MRI SCAN" & employee == "r4"))
value9 =  nrow(subset(patients, handling == "X-Ray" & employee == "r4"))
value10 = nrow(subset(patients, handling == "X-Ray" & employee == "r5"))
value11 = nrow(subset(patients, handling == "Discuss Results" & employee == 
"r6"))
value12 =  nrow(subset(patients, handling == "MRI SCAN" & employee == "r6"))
value13 =  nrow(subset(patients, handling == "Check-out" & employee == 
"r7"))

trace1 <- list(
  domain = list(
    x = c(0, 1), 
    y = c(0, 1)
  ), 
  link = list(
    label = c("Case1", "Case2", "Case3", "Case4", "Case5", "Case6", 
  "Case7","Case8", "Case9", "Case10", 
              "Case11","Case12", "Case13"), 
    source = c(0,0,0,1,2,2,2,3,3,4,5,5,6), 
    target = c(7,8,13,8,9,8,11,10,11,11,12,10,13), 
    value = 
    c(value1,value2,value3,value4,value5,value6,value7,
    value8,value9,value10,value11,value12,value13)
    ), 
  node = list(label = c("R1", "R2", 
  "R3","R4","R5","R6","R7","Registration","Triage and Assessment","Blood 
  Test",
                        "MRI Scan","X-RAY","Discuss Results","Check Out")), 
  type = "sankey"
  )
  data <- list(trace1)
  p <- plot_ly()
  p <- add_trace(p, domain=trace1$domain, link=trace1$link, 
  node=trace1$node, type=trace1$type)
  p

Answer 1

应该这样做

sankeyData <- patients %>% 
  group_by(employee,handling) %>% 
  count()
sankeyNodes <- list(label = c(sankeyData$employee,sankeyData$handling))
trace2 <- list(
  domain = list(
    x = c(0, 1), 
    y = c(0, 1)
  ), 
  link = list(
    label = paste0("Case",1:nrow(sankeyData)), 
    source = sapply(sankeyData$employee,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1, 
    target = sapply(sankeyData$handling,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1, 
    value = sankeyData$n
  ), 
  node = list(label = sankeyNodes$label), 
  type = "sankey"
)
data2 <- list(trace2)
p <- plot_ly()
p <- add_trace(p, domain=trace2$domain, link=trace2$link, 
               node=trace2$node, type=trace2$type)
p

自定义桑基图以适应大型数据集

Customizing the sankey chart to cater large datasets

r

ggplot2

plotly

sankey-diagram

ggplotly