如何从图中获取从节点到节点的边权重之和
How to get the sum of edge weight from a node to a node from a graph
我们可以使用 igraph
包的 strength
函数对每个顶点的相邻边的边权重求和。假设我们有如下图
可重现的数据和代码
nodeA <- c("ID_1", "ID_2", "ID_3", "ID_4", "ID_5", "ID_16", "Node_30")
nodeB <- c("ID_11", "ID_3", "ID_4", "ID_5", "ID_3", "ID_11", "Node_3")
edge_weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7)
df_1 <- data.frame(nodeA, nodeB, edge_weight)
graph1 <- graph_from_data_frame(df_1, directed = FALSE)
E(graph2)$weight <- df_1$edge_weight
plot(graph1)
edge_strengts <- strength(graph1)
我从 strength
函数得到的输出是图中每个顶点的边权重之和。
ID_1 ID_2 ID_3 ID_4 ID_5 ID_16 Node_30 ID_11 Node_3
0.50 0.90 2.20 1.50 1.20 0.09 0.70 0.59 0.70
现在,我有一个边列表,我只想获取这些列表的边权重之和(而不是获取一个顶点的边之和)。为了更清楚地说明,假设我有给定的边缘列表,
nodeA nodeB
1 ID_2 ID_4
2 ID_2 ID_5
3 ID_1 ID_16
我只想从给定的图中获取上面列出的边的边之和(注意该图是无向的)。输出应该是这样的
nodeA nodeB some_of_weight
1 ID_2 ID_4 1.70
2 ID_2 ID_5 1.40
3 ID_1 ID_16 0.59
对于第一条边 (ID_2 to ID_4
),总和为 1.70
(例如:ID_2 到 ID_3 = 0.9,而 ID_3 到 ID_4 = 0.8,所以,0.9 + 0.8 = 1.70) 等等所有边列表。
你能告诉我,我该怎么做这份工作吗?
边的多条路径
该图可以包含多条边路径(即,对于一条边,我们可以有 1 条或多条不同的路径)。比如说,我们可以通过两种方式 ID_3 to ID_4
(i) ID_3 -- ID_2 -- ID_4 (总路径权重为 2.4)和 (ii) ID_3 -- ID_5 -- ID_4(总路径权重为 1.5)。在这种情况下,我想取最大路径权重。
多个路径的可重现数据
nodeA <- c("ID_1", "ID_2", "ID_4", "ID_5", "ID_16", "Node_30", "ID_6", "ID_2")
nodeB <- c("ID_11", "ID_3", "ID_5", "ID_3", "ID_11", "Node_3", "ID_3", "ID_4")
edge_weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7, 1.5)
我会使用以下方法:
加载包
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
加载数据
nodeA <- c("ID_1", "ID_2", "ID_3", "ID_4", "ID_5", "ID_16", "Node_30")
nodeB <- c("ID_11", "ID_3", "ID_4", "ID_5", "ID_3", "ID_11", "Node_3")
df <- data.frame(nodeA, nodeB)
graph <- graph_from_data_frame(df, directed = FALSE)
E(graph)$weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7)
估计这些节点之间的所有成对距离
all_distances <- distances(
graph = graph,
v = c("ID_2", "ID_2", "ID_1"),
to = c("ID_4", "ID_5", "ID_16")
)
检查输出
all_distances
#> ID_4 ID_5 ID_16
#> ID_2 1.7 1.4 Inf
#> ID_2 1.7 1.4 Inf
#> ID_1 Inf Inf 0.59
走对角线
diag(all_distances)
#> [1] 1.70 1.40 0.59
请注意 to 参数中的重复顶点将被丢弃。为了
示例:
distances(graph, v = c("ID_2", "ID_5", "ID_4"), c("ID_3", "ID_3", "ID_5"))
#> Error in distances(graph, v = c("ID_2", "ID_5", "ID_4"), c("ID_3", "ID_3", : At structural_properties.c:4461 : Duplicate vertices in `to', this is not allowed, Invalid value
在那种情况下,您应该计算“唯一”顶点,然后取
相关匹配项:
idx_from <- c("ID_2", "ID_5", "ID_4")
idx_to <- c("ID_3", "ID_3", "ID_5") # note the duplicated names
idx_to_unique <- unique(idx_to)
all_distances <- distances(graph, idx_from, idx_to_unique)
diag(all_distances[, match(idx_to, idx_to_unique)])
#> [1] 0.9 0.5 0.7
由 reprex package (v2.0.1)
于 2021-11-06 创建
编辑:添加沿最长路径求和的示例
# packages
suppressPackageStartupMessages(library(igraph))
# data
nodeA <- c("ID_1", "ID_2", "ID_4", "ID_5", "ID_16", "Node_30", "ID_6", "ID_2")
nodeB <- c("ID_11", "ID_3", "ID_5", "ID_3", "ID_11", "Node_3", "ID_3", "ID_4")
df <- data.frame(nodeA, nodeB)
graph <- graph_from_data_frame(df, directed = FALSE)
E(graph)$weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7, 1.5)
# Sum weights along longest path
my_sum_weights <- function(graph, from, to) {
# Compute all simple paths
my_all_simple_paths <- all_simple_paths(
graph = graph,
from = from,
to = to
)
# Check if there is at least a path
if (length(my_all_simple_paths) == 0L) {
return(Inf)
}
# In that case, sum weights along each path
all_sum_weights <- numeric(length(my_all_simple_paths))
for (i in seq_along(my_all_simple_paths)) {
edges_weights <- E(graph, path = my_all_simple_paths[[i]])$weight
all_sum_weights[[i]] <- sum(edges_weights)
}
max(all_sum_weights)
}
# Examples
my_sum_weights(graph, "ID_3", "ID_4")
#> [1] 2.4
my_sum_weights(graph, "ID_6", "ID_4")
#> [1] 3.1
my_sum_weights(graph, "ID_5", "ID_2")
#> [1] 2.3
mapply(my_sum_weights, from = c("ID_3", "ID_6", "ID_1"), MoreArgs = list(to = "ID_4", graph = graph))
#> ID_3 ID_6 ID_1
#> 2.4 3.1 Inf
由 reprex package (v2.0.1)
于 2021-11-06 创建
请注意图的两个顶点之间的路径呈指数级增长,使用all_simple_paths 函数 时可能运行 内存不足。
更新
由于您正在寻找两个顶点之间的最长路径,您可以尝试下面的代码
df_2$max_sum_weight <- apply(
df_2,
1,
function(x) {
max(sapply(
all_simple_paths(graph1, x[["v"]], x[["to"]]),
function(v) {
sum(E(graph1)$weight[get.edge.ids(graph1, c(rbind(head(v, -1), v[-1])))])
}
))
}
)
这给出了
> df_2
v to max_sum_weight
1 ID_2 ID_4 2.4
2 ID_2 ID_5 2.3
3 ID_1 ID_16 1.0
4 ID_3 ID_4 2.4
数据
df_2 <- data.frame(
v = c("ID_2", "ID_2", "ID_1", "ID_3"),
to = c("ID_4", "ID_5", "ID_16", "ID_4")
)
上一个答案(最短路径)
尝试shortest.paths
transform(
df_2,
sum_of_weight = diag(shortest.paths(graph1, v, to))
)
或
transform(
df_2,
sum_of_weight = shortest.paths(graph1)[as.matrix(df_2)]
)
这给出了
v to sum_of_weight
1 ID_2 ID_4 1.70
2 ID_2 ID_5 1.40
3 ID_1 ID_16 0.59
我们可以使用 igraph
包的 strength
函数对每个顶点的相邻边的边权重求和。假设我们有如下图
可重现的数据和代码
nodeA <- c("ID_1", "ID_2", "ID_3", "ID_4", "ID_5", "ID_16", "Node_30")
nodeB <- c("ID_11", "ID_3", "ID_4", "ID_5", "ID_3", "ID_11", "Node_3")
edge_weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7)
df_1 <- data.frame(nodeA, nodeB, edge_weight)
graph1 <- graph_from_data_frame(df_1, directed = FALSE)
E(graph2)$weight <- df_1$edge_weight
plot(graph1)
edge_strengts <- strength(graph1)
我从 strength
函数得到的输出是图中每个顶点的边权重之和。
ID_1 ID_2 ID_3 ID_4 ID_5 ID_16 Node_30 ID_11 Node_3
0.50 0.90 2.20 1.50 1.20 0.09 0.70 0.59 0.70
现在,我有一个边列表,我只想获取这些列表的边权重之和(而不是获取一个顶点的边之和)。为了更清楚地说明,假设我有给定的边缘列表,
nodeA nodeB
1 ID_2 ID_4
2 ID_2 ID_5
3 ID_1 ID_16
我只想从给定的图中获取上面列出的边的边之和(注意该图是无向的)。输出应该是这样的
nodeA nodeB some_of_weight
1 ID_2 ID_4 1.70
2 ID_2 ID_5 1.40
3 ID_1 ID_16 0.59
对于第一条边 (ID_2 to ID_4
),总和为 1.70
(例如:ID_2 到 ID_3 = 0.9,而 ID_3 到 ID_4 = 0.8,所以,0.9 + 0.8 = 1.70) 等等所有边列表。
你能告诉我,我该怎么做这份工作吗?
边的多条路径
该图可以包含多条边路径(即,对于一条边,我们可以有 1 条或多条不同的路径)。比如说,我们可以通过两种方式 ID_3 to ID_4
(i) ID_3 -- ID_2 -- ID_4 (总路径权重为 2.4)和 (ii) ID_3 -- ID_5 -- ID_4(总路径权重为 1.5)。在这种情况下,我想取最大路径权重。
多个路径的可重现数据
nodeA <- c("ID_1", "ID_2", "ID_4", "ID_5", "ID_16", "Node_30", "ID_6", "ID_2")
nodeB <- c("ID_11", "ID_3", "ID_5", "ID_3", "ID_11", "Node_3", "ID_3", "ID_4")
edge_weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7, 1.5)
我会使用以下方法:
加载包
library(igraph)
#>
#> Attaching package: 'igraph'
#> The following objects are masked from 'package:stats':
#>
#> decompose, spectrum
#> The following object is masked from 'package:base':
#>
#> union
加载数据
nodeA <- c("ID_1", "ID_2", "ID_3", "ID_4", "ID_5", "ID_16", "Node_30")
nodeB <- c("ID_11", "ID_3", "ID_4", "ID_5", "ID_3", "ID_11", "Node_3")
df <- data.frame(nodeA, nodeB)
graph <- graph_from_data_frame(df, directed = FALSE)
E(graph)$weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7)
估计这些节点之间的所有成对距离
all_distances <- distances(
graph = graph,
v = c("ID_2", "ID_2", "ID_1"),
to = c("ID_4", "ID_5", "ID_16")
)
检查输出
all_distances
#> ID_4 ID_5 ID_16
#> ID_2 1.7 1.4 Inf
#> ID_2 1.7 1.4 Inf
#> ID_1 Inf Inf 0.59
走对角线
diag(all_distances)
#> [1] 1.70 1.40 0.59
请注意 to 参数中的重复顶点将被丢弃。为了 示例:
distances(graph, v = c("ID_2", "ID_5", "ID_4"), c("ID_3", "ID_3", "ID_5"))
#> Error in distances(graph, v = c("ID_2", "ID_5", "ID_4"), c("ID_3", "ID_3", : At structural_properties.c:4461 : Duplicate vertices in `to', this is not allowed, Invalid value
在那种情况下,您应该计算“唯一”顶点,然后取 相关匹配项:
idx_from <- c("ID_2", "ID_5", "ID_4")
idx_to <- c("ID_3", "ID_3", "ID_5") # note the duplicated names
idx_to_unique <- unique(idx_to)
all_distances <- distances(graph, idx_from, idx_to_unique)
diag(all_distances[, match(idx_to, idx_to_unique)])
#> [1] 0.9 0.5 0.7
由 reprex package (v2.0.1)
于 2021-11-06 创建编辑:添加沿最长路径求和的示例
# packages
suppressPackageStartupMessages(library(igraph))
# data
nodeA <- c("ID_1", "ID_2", "ID_4", "ID_5", "ID_16", "Node_30", "ID_6", "ID_2")
nodeB <- c("ID_11", "ID_3", "ID_5", "ID_3", "ID_11", "Node_3", "ID_3", "ID_4")
df <- data.frame(nodeA, nodeB)
graph <- graph_from_data_frame(df, directed = FALSE)
E(graph)$weight <- c(0.5, 0.9, 0.8, 0.7, 0.5, 0.09, 0.7, 1.5)
# Sum weights along longest path
my_sum_weights <- function(graph, from, to) {
# Compute all simple paths
my_all_simple_paths <- all_simple_paths(
graph = graph,
from = from,
to = to
)
# Check if there is at least a path
if (length(my_all_simple_paths) == 0L) {
return(Inf)
}
# In that case, sum weights along each path
all_sum_weights <- numeric(length(my_all_simple_paths))
for (i in seq_along(my_all_simple_paths)) {
edges_weights <- E(graph, path = my_all_simple_paths[[i]])$weight
all_sum_weights[[i]] <- sum(edges_weights)
}
max(all_sum_weights)
}
# Examples
my_sum_weights(graph, "ID_3", "ID_4")
#> [1] 2.4
my_sum_weights(graph, "ID_6", "ID_4")
#> [1] 3.1
my_sum_weights(graph, "ID_5", "ID_2")
#> [1] 2.3
mapply(my_sum_weights, from = c("ID_3", "ID_6", "ID_1"), MoreArgs = list(to = "ID_4", graph = graph))
#> ID_3 ID_6 ID_1
#> 2.4 3.1 Inf
由 reprex package (v2.0.1)
于 2021-11-06 创建请注意图的两个顶点之间的路径呈指数级增长,使用all_simple_paths 函数 时可能运行 内存不足。
更新
由于您正在寻找两个顶点之间的最长路径,您可以尝试下面的代码
df_2$max_sum_weight <- apply(
df_2,
1,
function(x) {
max(sapply(
all_simple_paths(graph1, x[["v"]], x[["to"]]),
function(v) {
sum(E(graph1)$weight[get.edge.ids(graph1, c(rbind(head(v, -1), v[-1])))])
}
))
}
)
这给出了
> df_2
v to max_sum_weight
1 ID_2 ID_4 2.4
2 ID_2 ID_5 2.3
3 ID_1 ID_16 1.0
4 ID_3 ID_4 2.4
数据
df_2 <- data.frame(
v = c("ID_2", "ID_2", "ID_1", "ID_3"),
to = c("ID_4", "ID_5", "ID_16", "ID_4")
)
上一个答案(最短路径)
尝试shortest.paths
transform(
df_2,
sum_of_weight = diag(shortest.paths(graph1, v, to))
)
或
transform(
df_2,
sum_of_weight = shortest.paths(graph1)[as.matrix(df_2)]
)
这给出了
v to sum_of_weight
1 ID_2 ID_4 1.70
2 ID_2 ID_5 1.40
3 ID_1 ID_16 0.59