使用 networkd3 在桑基图中隔离 "branch"

Isolating a "branch" in a sankey diagram using networkd3

我正在使用 networkD3 包中的 sankeyNetwork() 来可视化一些数据。我想知道是否有办法从头到尾“隔离”一个分支,忽略不相关的链接。

示例: 我有这个:SankeyGot

我想提取这个:SankeyWant

可重现的例子:

set.seed(9)

df <- tibble(
  source = sample(stringr::words, 5) %>% rep(2),
  target = c(sample(words, 7), source[1:3]), 
  values = rnorm(10, 10, 7) %>% round(0) %>% abs)

nodes <- data.frame(names = unique(c(df$source, df$target)))

links <- tibble(
  source = match(
    df$source, nodes$names) -1,
  target = match(
    df$target, nodes$names) -1,
  value = df$values
  )

sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
              Target = "target", Value = "value", NodeID = "names",
              iterations = 64, sinksRight = F, fontSize = 14)

我希望能够过滤掉“name”,并在上游和下游的所有级别上获取与之相关的所有内容 - 我该怎么做这个?

如果您将 sankeyNetwork 编码为一个对象,您可以使用 str(object) 将其识别为一个列表,其中包含一个名为 x 的矩阵,用于保存您的输入 df

list_sankey <- sankeyNetwork(Links = links, Nodes = nodes, Source = "source", Target = "target", Value = "value", NodeID = "names", iterations = 64, sinksRight = F, fontSize = 14)

str(list_sankey)

然后您可以过滤 x 矩阵以仅包含您想要的输入 source 和输出 target 个节点

list_sankey_filter <- list_sankey

list_sankey_filter$x$links <- list_sankey_filter$x$links %>% filter(source %in% c(4, 2, 0), target %in% c(4, 2, 0, 10))

这将为您提供以下对象。

从图中的节点计算路径是 non-trivial,但 igraph 包可以帮助 all_simple_paths()。但是,请注意帮助文件中的警告...

Note that potentially there are exponentially many paths between two vertices of a graph, and you may run out of memory when using this function, if your graph is lattice-like.

(我不知道你的 words 向量是什么,所以我手动重新创建了 links data.frame)

library(dplyr)
library(networkD3)

set.seed(9)

df <- read.csv(header = TRUE, text = "
source,target
summer,obvious
summer,structure
however,either
however,match
obvious,about
obvious,non
either,contract
either,produce
contract,paint
contract,name
")
df$values <- rnorm(10, 10, 7) %>% round(0) %>% abs()


# use graph to calculate the paths from a node
library(igraph)

graph <- graph_from_data_frame(df)

start_node <- "name"

# get nodes along a uni-directional path going IN to the start_node
connected_nodes_in <- 
  all_simple_paths(graph, from = start_node, mode = "in") %>% 
  unlist() %>% 
  names() %>% 
  unique()

# get nodes along a uni-directional path going OUT of the start_node
connected_nodes_out <- 
  all_simple_paths(graph, from = start_node, mode = "out") %>% 
  unlist() %>% 
  names() %>% 
  unique()

# combine them
connected_nodes <- unique(c(connected_nodes_in, connected_nodes_out))

# filter your data frame so it only includes links/edges that start and
# end at connected nodes
df <- df %>% filter(source %in% connected_nodes & target %in% connected_nodes)



nodes <- data.frame(names = unique(c(df$source, df$target)))

links <- tibble(
  source = match(
    df$source, nodes$names) -1,
  target = match(
    df$target, nodes$names) -1,
  value = df$values
)

sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
              Target = "target", Value = "value", NodeID = "names",
              iterations = 64, sinksRight = F, fontSize = 14)