使用 networkd3 在桑基图中隔离 "branch"
Isolating a "branch" in a sankey diagram using networkd3
我正在使用 networkD3
包中的 sankeyNetwork()
来可视化一些数据。我想知道是否有办法从头到尾“隔离”一个分支,忽略不相关的链接。
示例: 我有这个:SankeyGot
我想提取这个:SankeyWant
可重现的例子:
set.seed(9)
df <- tibble(
source = sample(stringr::words, 5) %>% rep(2),
target = c(sample(words, 7), source[1:3]),
values = rnorm(10, 10, 7) %>% round(0) %>% abs)
nodes <- data.frame(names = unique(c(df$source, df$target)))
links <- tibble(
source = match(
df$source, nodes$names) -1,
target = match(
df$target, nodes$names) -1,
value = df$values
)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "names",
iterations = 64, sinksRight = F, fontSize = 14)
我希望能够过滤掉“name”,并在上游和下游的所有级别上获取与之相关的所有内容 - 我该怎么做这个?
如果您将 sankeyNetwork 编码为一个对象,您可以使用 str(object) 将其识别为一个列表,其中包含一个名为 x
的矩阵,用于保存您的输入 df
list_sankey <- sankeyNetwork(Links = links, Nodes = nodes, Source = "source", Target = "target", Value = "value", NodeID = "names", iterations = 64, sinksRight = F, fontSize = 14)
str(list_sankey)
然后您可以过滤 x
矩阵以仅包含您想要的输入 source
和输出 target
个节点
list_sankey_filter <- list_sankey
list_sankey_filter$x$links <- list_sankey_filter$x$links %>% filter(source %in% c(4, 2, 0), target %in% c(4, 2, 0, 10))
这将为您提供以下对象。
从图中的节点计算路径是 non-trivial,但 igraph
包可以帮助 all_simple_paths()
。但是,请注意帮助文件中的警告...
Note that potentially there are exponentially many paths between two
vertices of a graph, and you may run out of memory when using this
function, if your graph is lattice-like.
(我不知道你的 words
向量是什么,所以我手动重新创建了 links
data.frame)
library(dplyr)
library(networkD3)
set.seed(9)
df <- read.csv(header = TRUE, text = "
source,target
summer,obvious
summer,structure
however,either
however,match
obvious,about
obvious,non
either,contract
either,produce
contract,paint
contract,name
")
df$values <- rnorm(10, 10, 7) %>% round(0) %>% abs()
# use graph to calculate the paths from a node
library(igraph)
graph <- graph_from_data_frame(df)
start_node <- "name"
# get nodes along a uni-directional path going IN to the start_node
connected_nodes_in <-
all_simple_paths(graph, from = start_node, mode = "in") %>%
unlist() %>%
names() %>%
unique()
# get nodes along a uni-directional path going OUT of the start_node
connected_nodes_out <-
all_simple_paths(graph, from = start_node, mode = "out") %>%
unlist() %>%
names() %>%
unique()
# combine them
connected_nodes <- unique(c(connected_nodes_in, connected_nodes_out))
# filter your data frame so it only includes links/edges that start and
# end at connected nodes
df <- df %>% filter(source %in% connected_nodes & target %in% connected_nodes)
nodes <- data.frame(names = unique(c(df$source, df$target)))
links <- tibble(
source = match(
df$source, nodes$names) -1,
target = match(
df$target, nodes$names) -1,
value = df$values
)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "names",
iterations = 64, sinksRight = F, fontSize = 14)
我正在使用 networkD3
包中的 sankeyNetwork()
来可视化一些数据。我想知道是否有办法从头到尾“隔离”一个分支,忽略不相关的链接。
示例: 我有这个:SankeyGot
我想提取这个:SankeyWant
可重现的例子:
set.seed(9)
df <- tibble(
source = sample(stringr::words, 5) %>% rep(2),
target = c(sample(words, 7), source[1:3]),
values = rnorm(10, 10, 7) %>% round(0) %>% abs)
nodes <- data.frame(names = unique(c(df$source, df$target)))
links <- tibble(
source = match(
df$source, nodes$names) -1,
target = match(
df$target, nodes$names) -1,
value = df$values
)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "names",
iterations = 64, sinksRight = F, fontSize = 14)
我希望能够过滤掉“name”,并在上游和下游的所有级别上获取与之相关的所有内容 - 我该怎么做这个?
如果您将 sankeyNetwork 编码为一个对象,您可以使用 str(object) 将其识别为一个列表,其中包含一个名为 x
的矩阵,用于保存您的输入 df
list_sankey <- sankeyNetwork(Links = links, Nodes = nodes, Source = "source", Target = "target", Value = "value", NodeID = "names", iterations = 64, sinksRight = F, fontSize = 14)
str(list_sankey)
然后您可以过滤 x
矩阵以仅包含您想要的输入 source
和输出 target
个节点
list_sankey_filter <- list_sankey
list_sankey_filter$x$links <- list_sankey_filter$x$links %>% filter(source %in% c(4, 2, 0), target %in% c(4, 2, 0, 10))
这将为您提供以下对象。
从图中的节点计算路径是 non-trivial,但 igraph
包可以帮助 all_simple_paths()
。但是,请注意帮助文件中的警告...
Note that potentially there are exponentially many paths between two vertices of a graph, and you may run out of memory when using this function, if your graph is lattice-like.
(我不知道你的 words
向量是什么,所以我手动重新创建了 links
data.frame)
library(dplyr)
library(networkD3)
set.seed(9)
df <- read.csv(header = TRUE, text = "
source,target
summer,obvious
summer,structure
however,either
however,match
obvious,about
obvious,non
either,contract
either,produce
contract,paint
contract,name
")
df$values <- rnorm(10, 10, 7) %>% round(0) %>% abs()
# use graph to calculate the paths from a node
library(igraph)
graph <- graph_from_data_frame(df)
start_node <- "name"
# get nodes along a uni-directional path going IN to the start_node
connected_nodes_in <-
all_simple_paths(graph, from = start_node, mode = "in") %>%
unlist() %>%
names() %>%
unique()
# get nodes along a uni-directional path going OUT of the start_node
connected_nodes_out <-
all_simple_paths(graph, from = start_node, mode = "out") %>%
unlist() %>%
names() %>%
unique()
# combine them
connected_nodes <- unique(c(connected_nodes_in, connected_nodes_out))
# filter your data frame so it only includes links/edges that start and
# end at connected nodes
df <- df %>% filter(source %in% connected_nodes & target %in% connected_nodes)
nodes <- data.frame(names = unique(c(df$source, df$target)))
links <- tibble(
source = match(
df$source, nodes$names) -1,
target = match(
df$target, nodes$names) -1,
value = df$values
)
sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID = "names",
iterations = 64, sinksRight = F, fontSize = 14)