桑基图 link 分裂 - 是否有可能从一个节点有两个 link 流?
Sankey plot link splitting - is it possible to have two links flow from one node?
我想更改我使用 networkD3 R 包制作的 Sankey 图,以便多个 links 从一个节点流出,这是我目前所做的:
header 随机数据样本:
Study Category Class
<chr> <chr> <chr>
1 study17 cat H class B;class C
2 study32 cat A;cat B class A
3 study7 cat F class A
4 study21 cat F class C
5 study24 cat F class B;class C
6 study15 cat E;cat K class C
# example data
d <- read.csv(text = "Study,Category,Class
study17,cat H,class B;class C
study32,cat A;cat B,class A
study7,cat F,class A
study21,cat F,class C
study24,cat F,class B;class C
study15,cat E;cat K,class C")
使用这个 answer 我创建了以下 sankeyplot:
但是,如您所知,第二列和第三列现在包含“复合节点”,例如“cat A;cat B”和“class B;class C”。
我想让 2 个节点从研究 32 流出:一个到猫 A,一个到猫 B。同样,我想要两个节点从猫 F(第 5 行)流出:一个到 class B 和一到 class C.
本质上,我是在问 link 分裂是否可能?我知道我可以定期拆分它们并为每个实例创建一个新行,但这会扭曲此图像中的真实情况..
我们可以根据拆分更新矩形的大小值。这应该避免歪曲事实。
library(networkD3)
library(data.table)
setDT(d)
# make links
links <- rbind(d[, .(source = Study, target = Category) ],
d[, .(source = Category, target = Class) ])
links[, rn := .I]
# adjust value, based on "split"
links <- links[, strsplit(source, split = ";", fixed = TRUE), by = .(source, target, rn)
][, .(source = V1, target, rn)
][, strsplit(target, split = ";", fixed = TRUE), by = .(source, target, rn)
][, .(source, target = V1, rn)
][, .(source, target, value = 1/.N), by = rn]
# make nodes
nodes <- data.frame(name = unique(unlist(links[,.(source, target)])))
nodes$label <- nodes$name
# update link ids
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
# plot
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')
我想这就是您已经完成的...
library(dplyr)
library(tidyr)
library(networkD3)
data <- tibble::tribble(
~Study, ~Category, ~Class,
"study17", "cat H", "class B;class C",
"study32", "cat A;cat B", "class A",
"study7", "cat F", "class A",
"study21", "cat F", "class C",
"study24", "cat F", "class B;class C",
"study15", "cat E;cat K", "class C"
)
links <-
data %>%
mutate(row = row_number()) %>% # add a row id
pivot_longer(-row, names_to = "column", values_to = "source") %>% # gather all columns
mutate(column = match(column, names(data))) %>% # convert col names to col ids
group_by(row) %>%
mutate(target = lead(source, order_by = column)) %>% # get target from following node in row
ungroup() %>%
filter(!is.na(target)) %>% # remove links from last column in original data
mutate(source = paste0(source, '_', column)) %>%
mutate(target = paste0(target, '_', column + 1)) %>%
select(source, target)
nodes <- data.frame(name = unique(c(links$source, links$target)))
nodes$label <- sub('_[0-9]*$', '', nodes$name) # remove column id from node label
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
links$value <- 1
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')
您可以像这样重塑您的原始数据
data2 <- data %>% tidyr::separate_rows(everything(), sep = ";")
data2
#> # A tibble: 10 × 3
#> Study Category Class
#> <chr> <chr> <chr>
#> 1 study17 cat H class B
#> 2 study17 cat H class C
#> 3 study32 cat A class A
#> 4 study32 cat B class A
#> 5 study7 cat F class A
#> 6 study21 cat F class C
#> 7 study24 cat F class B
#> 8 study24 cat F class C
#> 9 study15 cat E class C
#> 10 study15 cat K class C
links <-
data2 %>%
mutate(row = row_number()) %>% # add a row id
pivot_longer(-row, names_to = "column", values_to = "source") %>% # gather all columns
mutate(column = match(column, names(data2))) %>% # convert col names to col ids
group_by(row) %>%
mutate(target = lead(source, order_by = column)) %>% # get target from following node in row
ungroup() %>%
filter(!is.na(target)) %>% # remove links from last column in original data
mutate(source = paste0(source, '_', column)) %>%
mutate(target = paste0(target, '_', column + 1)) %>%
select(source, target)
nodes <- data.frame(name = unique(c(links$source, links$target)))
nodes$label <- sub('_[0-9]*$', '', nodes$name) # remove column id from node label
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
links$value <- 1
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')
我想更改我使用 networkD3 R 包制作的 Sankey 图,以便多个 links 从一个节点流出,这是我目前所做的:
header 随机数据样本:
Study Category Class
<chr> <chr> <chr>
1 study17 cat H class B;class C
2 study32 cat A;cat B class A
3 study7 cat F class A
4 study21 cat F class C
5 study24 cat F class B;class C
6 study15 cat E;cat K class C
# example data
d <- read.csv(text = "Study,Category,Class
study17,cat H,class B;class C
study32,cat A;cat B,class A
study7,cat F,class A
study21,cat F,class C
study24,cat F,class B;class C
study15,cat E;cat K,class C")
使用这个 answer 我创建了以下 sankeyplot:
但是,如您所知,第二列和第三列现在包含“复合节点”,例如“cat A;cat B”和“class B;class C”。
我想让 2 个节点从研究 32 流出:一个到猫 A,一个到猫 B。同样,我想要两个节点从猫 F(第 5 行)流出:一个到 class B 和一到 class C.
本质上,我是在问 link 分裂是否可能?我知道我可以定期拆分它们并为每个实例创建一个新行,但这会扭曲此图像中的真实情况..
我们可以根据拆分更新矩形的大小值。这应该避免歪曲事实。
library(networkD3)
library(data.table)
setDT(d)
# make links
links <- rbind(d[, .(source = Study, target = Category) ],
d[, .(source = Category, target = Class) ])
links[, rn := .I]
# adjust value, based on "split"
links <- links[, strsplit(source, split = ";", fixed = TRUE), by = .(source, target, rn)
][, .(source = V1, target, rn)
][, strsplit(target, split = ";", fixed = TRUE), by = .(source, target, rn)
][, .(source, target = V1, rn)
][, .(source, target, value = 1/.N), by = rn]
# make nodes
nodes <- data.frame(name = unique(unlist(links[,.(source, target)])))
nodes$label <- nodes$name
# update link ids
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
# plot
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')
我想这就是您已经完成的...
library(dplyr)
library(tidyr)
library(networkD3)
data <- tibble::tribble(
~Study, ~Category, ~Class,
"study17", "cat H", "class B;class C",
"study32", "cat A;cat B", "class A",
"study7", "cat F", "class A",
"study21", "cat F", "class C",
"study24", "cat F", "class B;class C",
"study15", "cat E;cat K", "class C"
)
links <-
data %>%
mutate(row = row_number()) %>% # add a row id
pivot_longer(-row, names_to = "column", values_to = "source") %>% # gather all columns
mutate(column = match(column, names(data))) %>% # convert col names to col ids
group_by(row) %>%
mutate(target = lead(source, order_by = column)) %>% # get target from following node in row
ungroup() %>%
filter(!is.na(target)) %>% # remove links from last column in original data
mutate(source = paste0(source, '_', column)) %>%
mutate(target = paste0(target, '_', column + 1)) %>%
select(source, target)
nodes <- data.frame(name = unique(c(links$source, links$target)))
nodes$label <- sub('_[0-9]*$', '', nodes$name) # remove column id from node label
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
links$value <- 1
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')
您可以像这样重塑您的原始数据
data2 <- data %>% tidyr::separate_rows(everything(), sep = ";")
data2
#> # A tibble: 10 × 3
#> Study Category Class
#> <chr> <chr> <chr>
#> 1 study17 cat H class B
#> 2 study17 cat H class C
#> 3 study32 cat A class A
#> 4 study32 cat B class A
#> 5 study7 cat F class A
#> 6 study21 cat F class C
#> 7 study24 cat F class B
#> 8 study24 cat F class C
#> 9 study15 cat E class C
#> 10 study15 cat K class C
links <-
data2 %>%
mutate(row = row_number()) %>% # add a row id
pivot_longer(-row, names_to = "column", values_to = "source") %>% # gather all columns
mutate(column = match(column, names(data2))) %>% # convert col names to col ids
group_by(row) %>%
mutate(target = lead(source, order_by = column)) %>% # get target from following node in row
ungroup() %>%
filter(!is.na(target)) %>% # remove links from last column in original data
mutate(source = paste0(source, '_', column)) %>%
mutate(target = paste0(target, '_', column + 1)) %>%
select(source, target)
nodes <- data.frame(name = unique(c(links$source, links$target)))
nodes$label <- sub('_[0-9]*$', '', nodes$name) # remove column id from node label
links$source_id <- match(links$source, nodes$name) - 1
links$target_id <- match(links$target, nodes$name) - 1
links$value <- 1
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source_id',
Target = 'target_id', Value = 'value', NodeID = 'label')