自定义桑基图以适应大型数据集
Customizing the sankey chart to cater large datasets
请 运行 下面的脚本,我在 R 中创建了一个 Sankey 图表,并使用来自 bupaR 库的 "patients" 数据集的数据进行绘图。请参阅快照以供参考。我面临的问题是,这个自定义情节是通过声明和建立用户("r1"、"r2" 等)和活动("Registration"," X-Ray 等)。如果我有大量的用户和活动,声明每个关系将成为一项非常繁琐的任务。请帮助我动态修改情节,以便我可以复制大量代码用户和活动。
library(plotly)
library(bupaR)
value1 = nrow(subset(patients, handling == "Registration" & employee ==
"r1"))
value2 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r1"))
value3 = nrow(subset(patients, handling == "Check-out" & employee == "r1"))
value4 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r2"))
value5 = nrow(subset(patients, handling == "Blood test" & employee ==
"r3"))
value6 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r3"))
value7 = nrow(subset(patients, handling == "X-Ray" & employee == "r3"))
value8 = nrow(subset(patients, handling == "MRI SCAN" & employee == "r4"))
value9 = nrow(subset(patients, handling == "X-Ray" & employee == "r4"))
value10 = nrow(subset(patients, handling == "X-Ray" & employee == "r5"))
value11 = nrow(subset(patients, handling == "Discuss Results" & employee ==
"r6"))
value12 = nrow(subset(patients, handling == "MRI SCAN" & employee == "r6"))
value13 = nrow(subset(patients, handling == "Check-out" & employee ==
"r7"))
trace1 <- list(
domain = list(
x = c(0, 1),
y = c(0, 1)
),
link = list(
label = c("Case1", "Case2", "Case3", "Case4", "Case5", "Case6",
"Case7","Case8", "Case9", "Case10",
"Case11","Case12", "Case13"),
source = c(0,0,0,1,2,2,2,3,3,4,5,5,6),
target = c(7,8,13,8,9,8,11,10,11,11,12,10,13),
value =
c(value1,value2,value3,value4,value5,value6,value7,
value8,value9,value10,value11,value12,value13)
),
node = list(label = c("R1", "R2",
"R3","R4","R5","R6","R7","Registration","Triage and Assessment","Blood
Test",
"MRI Scan","X-RAY","Discuss Results","Check Out")),
type = "sankey"
)
data <- list(trace1)
p <- plot_ly()
p <- add_trace(p, domain=trace1$domain, link=trace1$link,
node=trace1$node, type=trace1$type)
p
应该这样做
sankeyData <- patients %>%
group_by(employee,handling) %>%
count()
sankeyNodes <- list(label = c(sankeyData$employee,sankeyData$handling))
trace2 <- list(
domain = list(
x = c(0, 1),
y = c(0, 1)
),
link = list(
label = paste0("Case",1:nrow(sankeyData)),
source = sapply(sankeyData$employee,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1,
target = sapply(sankeyData$handling,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1,
value = sankeyData$n
),
node = list(label = sankeyNodes$label),
type = "sankey"
)
data2 <- list(trace2)
p <- plot_ly()
p <- add_trace(p, domain=trace2$domain, link=trace2$link,
node=trace2$node, type=trace2$type)
p
请 运行 下面的脚本,我在 R 中创建了一个 Sankey 图表,并使用来自 bupaR 库的 "patients" 数据集的数据进行绘图。请参阅快照以供参考。我面临的问题是,这个自定义情节是通过声明和建立用户("r1"、"r2" 等)和活动("Registration"," X-Ray 等)。如果我有大量的用户和活动,声明每个关系将成为一项非常繁琐的任务。请帮助我动态修改情节,以便我可以复制大量代码用户和活动。
library(plotly)
library(bupaR)
value1 = nrow(subset(patients, handling == "Registration" & employee ==
"r1"))
value2 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r1"))
value3 = nrow(subset(patients, handling == "Check-out" & employee == "r1"))
value4 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r2"))
value5 = nrow(subset(patients, handling == "Blood test" & employee ==
"r3"))
value6 = nrow(subset(patients, handling == "Triage and Assessment" &
employee == "r3"))
value7 = nrow(subset(patients, handling == "X-Ray" & employee == "r3"))
value8 = nrow(subset(patients, handling == "MRI SCAN" & employee == "r4"))
value9 = nrow(subset(patients, handling == "X-Ray" & employee == "r4"))
value10 = nrow(subset(patients, handling == "X-Ray" & employee == "r5"))
value11 = nrow(subset(patients, handling == "Discuss Results" & employee ==
"r6"))
value12 = nrow(subset(patients, handling == "MRI SCAN" & employee == "r6"))
value13 = nrow(subset(patients, handling == "Check-out" & employee ==
"r7"))
trace1 <- list(
domain = list(
x = c(0, 1),
y = c(0, 1)
),
link = list(
label = c("Case1", "Case2", "Case3", "Case4", "Case5", "Case6",
"Case7","Case8", "Case9", "Case10",
"Case11","Case12", "Case13"),
source = c(0,0,0,1,2,2,2,3,3,4,5,5,6),
target = c(7,8,13,8,9,8,11,10,11,11,12,10,13),
value =
c(value1,value2,value3,value4,value5,value6,value7,
value8,value9,value10,value11,value12,value13)
),
node = list(label = c("R1", "R2",
"R3","R4","R5","R6","R7","Registration","Triage and Assessment","Blood
Test",
"MRI Scan","X-RAY","Discuss Results","Check Out")),
type = "sankey"
)
data <- list(trace1)
p <- plot_ly()
p <- add_trace(p, domain=trace1$domain, link=trace1$link,
node=trace1$node, type=trace1$type)
p
应该这样做
sankeyData <- patients %>%
group_by(employee,handling) %>%
count()
sankeyNodes <- list(label = c(sankeyData$employee,sankeyData$handling))
trace2 <- list(
domain = list(
x = c(0, 1),
y = c(0, 1)
),
link = list(
label = paste0("Case",1:nrow(sankeyData)),
source = sapply(sankeyData$employee,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1,
target = sapply(sankeyData$handling,function(e) {which(e == sankeyNodes$label) }, USE.NAMES = FALSE) - 1,
value = sankeyData$n
),
node = list(label = sankeyNodes$label),
type = "sankey"
)
data2 <- list(trace2)
p <- plot_ly()
p <- add_trace(p, domain=trace2$domain, link=trace2$link,
node=trace2$node, type=trace2$type)
p