如何在 R 中使用 igraph 从有向子树中获取所有叶节点?
How to get all leaf nodes from a directed subtree using igraph in R?
给定的是一棵有向树(igraph 图),该树由包含符号边列表的数据框构造而成:
library(igraph)
library(ggraph)
#Create a symbolic edge list.
edgelist_df <- data.frame("from" = c("A", "A", "A", "B", "B", "B", "C", "D", "D", "E",
"E", "F", "G", "G", "H", "I", "I", "J", "J", "J"),
"to" = c("B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
"L", "M", "N", "O", "P", "Q", "R", "S", "T", "U"))
#Create a directed tree from this edgelist.
graph <- graph_from_data_frame(d = edgelist_df, directed = TRUE)
绘制树。在这里我使用包 ggraph
和函数 ggraph
.
ggraph(graph = graph,
layout = 'dendrogram',
circular = FALSE) +
geom_edge_diagonal() +
geom_node_point() +
geom_node_text(aes(label = name),
angle = 0,
hjust = 1.5,
nudge_y = 0,
size = 5) +
theme_void()
问题是如何return一个字符向量,其中包含一个节点指定的子树中所有叶节点的名称,表示该子树的根节点。例如:
- 如果节点=“B”,则属于根为“B”的子树的所有叶节点为:“K”、“L”、
“M”、“N”和“O”。
- 如果节点 =“H”,则属于根“H”的子树的所有叶节点是:“P”。
- 如果节点=“A”,则属于根为“A”的子树(原始树)的所有叶节点为:“K”、“L”、“M”、“N” "、"O"、"P"、"Q"、"R"、"S"、"T" 和 "U"。
假设对象 graph
确实是一棵树,那么下面的函数给出了期望的结果。
determine_leaf_nodes_subtree <- function(graph, vertex){
#Determine the name of the root vertex.
root <- V(graph)$name[degree(graph = graph, v = V(graph), mode = "in") == 0]
#Determine the name(s) of the leaf vertex/vertices.
leafs <- V(graph)$name[degree(graph = graph, v = V(graph), mode = "out") == 0]
#Calculate the tree depth. That is, the largest path length of all shortest paths from root
#to leaf vertices.
sh_paths <- shortest_paths(graph = graph, from = root, to = leafs)$vpath
max_sh_path_length <- max(sapply(X = sh_paths, FUN = length))
#If 'vertex' is a leaf node itself, then return the name of that vertex.
if(vertex %in% leafs){
return(vertex)
} else {
#If 'vertex' is not a leaf node, then determine the subset of all vertices that are in the
#neighborhood of 'vertex', excluding 'vertex' itself ('mindist' = 1). The maximum order or
#'depth' is specified by ('max_sh_path_length')
vertices_subset <- neighborhood(graph = graph,
order = max_sh_path_length - 1,
nodes = V(graph)[V(graph)$name == vertex],
mode = "out",
mindist = 1)
#Extract the names of the vertices in 'vertices_subset'
vertices_subset_names <- names(unlist(vertices_subset))
#The overlap/intersection of vertex names between 'vertices_subset_names' and 'leafs' gives
#all leaf nodes that are part of the subtree with root vertex 'vertex'.
result <- intersect(x = vertices_subset_names, leafs)
return(result)
}
}
关于问题中所述的示例,此函数提供以下输出:
determine_leaf_nodes_subtree(graph = graph, vertex = "B")
[1] "K" "L" "M" "N" "O"
determine_leaf_nodes_subtree(graph = graph, vertex = "H")
[1] "P"
determine_leaf_nodes_subtree(graph = graph, vertex = "A")
[1] "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U"
你可以对ego
进行两次调用:第一次是抓取从父节点到子节点(mode="out"
)可以到达的所有节点,第二次是查看这些被选中的节点是否有任何子节点(如果不是,则它们是叶节点。
fun <- function(graph, node="B"){
path <- ego(graph, order=length(V(graph)), nodes=node, mode="out")
nms <- names(path[[1]])
nms[ego_size(graph, order=1, nodes=nms, mode="out", mindist=1) == 0]
}
这会产生
fun(graph, "B")
# [1] "K" "L" "M" "N" "O"
fun(graph, "H")
# [1] "P"
fun(graph, "A")
# [1] "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U"
事实上,彼得斯带头接听电话 ego_size
可以替换为
nms[degree(graph, v=nms, mode="out") == 0]
您可以使用 distances()
和 degree
定义函数 f
,如下所示
f <- function(g, r) {
names(V(g))[is.finite(distances(g, r, mode = "out")) & degree(g) == 1]
}
这给出了
> f(g, "B")
[1] "K" "L" "M" "N" "O"
给定的是一棵有向树(igraph 图),该树由包含符号边列表的数据框构造而成:
library(igraph)
library(ggraph)
#Create a symbolic edge list.
edgelist_df <- data.frame("from" = c("A", "A", "A", "B", "B", "B", "C", "D", "D", "E",
"E", "F", "G", "G", "H", "I", "I", "J", "J", "J"),
"to" = c("B", "C", "D", "E", "F", "G", "H", "I", "J", "K",
"L", "M", "N", "O", "P", "Q", "R", "S", "T", "U"))
#Create a directed tree from this edgelist.
graph <- graph_from_data_frame(d = edgelist_df, directed = TRUE)
绘制树。在这里我使用包 ggraph
和函数 ggraph
.
ggraph(graph = graph,
layout = 'dendrogram',
circular = FALSE) +
geom_edge_diagonal() +
geom_node_point() +
geom_node_text(aes(label = name),
angle = 0,
hjust = 1.5,
nudge_y = 0,
size = 5) +
theme_void()
问题是如何return一个字符向量,其中包含一个节点指定的子树中所有叶节点的名称,表示该子树的根节点。例如:
- 如果节点=“B”,则属于根为“B”的子树的所有叶节点为:“K”、“L”、 “M”、“N”和“O”。
- 如果节点 =“H”,则属于根“H”的子树的所有叶节点是:“P”。
- 如果节点=“A”,则属于根为“A”的子树(原始树)的所有叶节点为:“K”、“L”、“M”、“N” "、"O"、"P"、"Q"、"R"、"S"、"T" 和 "U"。
假设对象 graph
确实是一棵树,那么下面的函数给出了期望的结果。
determine_leaf_nodes_subtree <- function(graph, vertex){
#Determine the name of the root vertex.
root <- V(graph)$name[degree(graph = graph, v = V(graph), mode = "in") == 0]
#Determine the name(s) of the leaf vertex/vertices.
leafs <- V(graph)$name[degree(graph = graph, v = V(graph), mode = "out") == 0]
#Calculate the tree depth. That is, the largest path length of all shortest paths from root
#to leaf vertices.
sh_paths <- shortest_paths(graph = graph, from = root, to = leafs)$vpath
max_sh_path_length <- max(sapply(X = sh_paths, FUN = length))
#If 'vertex' is a leaf node itself, then return the name of that vertex.
if(vertex %in% leafs){
return(vertex)
} else {
#If 'vertex' is not a leaf node, then determine the subset of all vertices that are in the
#neighborhood of 'vertex', excluding 'vertex' itself ('mindist' = 1). The maximum order or
#'depth' is specified by ('max_sh_path_length')
vertices_subset <- neighborhood(graph = graph,
order = max_sh_path_length - 1,
nodes = V(graph)[V(graph)$name == vertex],
mode = "out",
mindist = 1)
#Extract the names of the vertices in 'vertices_subset'
vertices_subset_names <- names(unlist(vertices_subset))
#The overlap/intersection of vertex names between 'vertices_subset_names' and 'leafs' gives
#all leaf nodes that are part of the subtree with root vertex 'vertex'.
result <- intersect(x = vertices_subset_names, leafs)
return(result)
}
}
关于问题中所述的示例,此函数提供以下输出:
determine_leaf_nodes_subtree(graph = graph, vertex = "B")
[1] "K" "L" "M" "N" "O"
determine_leaf_nodes_subtree(graph = graph, vertex = "H")
[1] "P"
determine_leaf_nodes_subtree(graph = graph, vertex = "A")
[1] "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U"
你可以对ego
进行两次调用:第一次是抓取从父节点到子节点(mode="out"
)可以到达的所有节点,第二次是查看这些被选中的节点是否有任何子节点(如果不是,则它们是叶节点。
fun <- function(graph, node="B"){
path <- ego(graph, order=length(V(graph)), nodes=node, mode="out")
nms <- names(path[[1]])
nms[ego_size(graph, order=1, nodes=nms, mode="out", mindist=1) == 0]
}
这会产生
fun(graph, "B")
# [1] "K" "L" "M" "N" "O"
fun(graph, "H")
# [1] "P"
fun(graph, "A")
# [1] "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U"
事实上,彼得斯带头接听电话 ego_size
可以替换为
nms[degree(graph, v=nms, mode="out") == 0]
您可以使用 distances()
和 degree
定义函数 f
,如下所示
f <- function(g, r) {
names(V(g))[is.finite(distances(g, r, mode = "out")) & degree(g) == 1]
}
这给出了
> f(g, "B")
[1] "K" "L" "M" "N" "O"