基于节点权重构建图的优化算法
optimise algorithm for building a graph based on node weights
我正在尝试改进一个功能,以根据从某些节点属性计算出的分数来构建网络。该函数试图从最大化节点属性乘积的图中找到最佳子网。
函数从一个随机节点开始,从第一个邻居开始搜索,如果有一些邻居的节点得分满足阈值,则将neighbour/s添加到第一个节点,并继续该过程,直到没有添加了更多(添加邻居不会产生所需的分数增量)。如果第一个邻居中没有节点产生分数的增量,则该函数将查找第二个邻居。在这种情况下,很可能有几条路径连接节点(2 度邻居),在这种特定情况下,选择的路径将是最短且权重最高的路径(节点属性之一)。
我可以对代码进行一些并行处理,尽管我不知道如何在此类函数中实现它。
函数如下:
build_network <-
function (G, seed, d= 2){
net <- G
d <- d
score.fun<-function(g){
Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
k <- vcount(g)
tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
Sa <- (Za-tmp[1])/tmp[2]
}
best.fun<-function(in.nodes,out.nodes) {
score<-(-Inf); best<-character()
for(node in out.nodes){
subG.update<-induced.subgraph(net, c(in.nodes,node))
if( score.fun(subG.update) > score ){
score<-score.fun(subG.update)
best<-node
}
}
list("node"=best,"score"=score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
in.nodes <- V(subG)$name
node_num <- vcount(subG)
subsum <- score.fun(subG)
#subx <- V(subG)$name
for (rad in 1:d) {
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name))
pot.nodes <- V(net)[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
best_node<-best.fun(in.nodes, out.nodes)
new_score<-best_node$score
best_node<-best_node$node
if (new_score > subsum + 0.01) {
tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
in.nodes <- c(tmp, V(subG)$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
return(subG)
}
我正在尝试将此函数应用于约 10,000 个节点的图形。这里将是 运行 函数
的代码的近似值
### generate some example data
library(igraph)
my_graph <- erdos.renyi.game(10000, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(10000)
V(my_graph)$RWRNodeweight <- runif(10000, min=0, max=0.05)
### Run the function
sublist = list()
for (node in V(G)$name) {
subnet <- build_network(G, node, d)
sublist[[node]] <- subnet }
编辑:这是 head(genesets.length.null.stat)
的 dput
structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))
这里是 node2treePath
函数:
node2treePath <- function (G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- unlist(lapply(tmp.path, length))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))
return(collect)
}
因为你的分数函数只依赖于节点属性而不是边的属性,所以解不是唯一的;你可能想搜索一棵最好的树。如果你重组你的问题,使你的节点是边缘,反之亦然,你可能只使用例如 Djikstra 的算法来找到最好的一个。这已经在 igraph 包中作为 shortest.paths()
.
我看不懂 R 代码,但根据你的描述:如果分数阈值是常数, 那么这在 O(|V|+ |E|+|C|^2) 时间,其中|C|是 "good" 个组件的数量(这将在稍后进一步解释)。
在第一遍中,删除得分低于阈值的所有节点。然后在这个新图中找到所有连接的组件(这可以在 O(|V|+|E|) 时间内通过在每个尚未访问的节点处启动 DFS 来完成),通过将所有顶点权重相乘来计算它们的分数组件,并用其组件 ID 标记每个顶点。这已经告诉您 "good" 个组件——不需要任何 2 度连接的组件。
假设这会产生|C|组件。创建一个空哈希表 H,其中包含用于键的组件 ID 对和用于值的(长度、重量)对。现在返回你在第一遍中删除的每个顶点 v:对于每个顶点,查看它的所有邻居并记录每个不同组件的最短边(这可以使用长度-|C| 数组来存储最短边来完成到目前为止看到的每个组件)。在检查了 v 的所有邻居之后,计算它们属于的不同组件的数量 k:如果 k >= 2,则 v 可能应该用于连接这些 k(k-1)/2 对组件中的一些组件。对于可以由 v 连接的每一对不同的组件 i 和 j,根据需要使用此 2 边连接的权重和距离更新 H:也就是说,如果 i 和 j 尚未连接在一起,则记录 v 连接他们;否则,如果它们已经由某个顶点 u 加入,则仅在 v 可以做得更好时才更新 H(即,如果它使用比 u 更少的总长度和更大的权重)。这一步可以被认为是在 "component graph" 中构建最小生成树,该 "component graph" 从原始的修剪图派生。每个新 "combined" 组件的分数可以很容易地计算出来,只需将两个组成部分的分数相乘即可。
最后,简单地 return 乘积最大的组件。
对于您想要执行的逻辑(我想您可能希望以与上述答案不兼容的方式进行更改),以下代码大约快 10 倍 30%。我使用 Rprof
和 profr
并以琐碎的方式重新编码了一些慢位,例如不传递命名列表对,只是来自您的一个函数的匿名对。具有 genesets.length.null.stat
值对的数字命名列表效率非常低。我用两个数字向量替换了它。您还多次调用 'V' 函数,这是一个很大的时间消耗:如您所见,您可以调用它一次,然后根据需要查询结果。
# node2treePath is a function to retrieve the shortest path with the highest node weights
node2treePath_jw <- function(G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- vapply(tmp.path, length, integer(1))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
Vg <- V(G)
tmp.sum <- vapply(tmp.path, function(x) sum(Vg[x]$weight), numeric(1))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
sapply(selected.path, function(x) Vg[x]$name)
}
build_network_jw <- function(net, seed, d= 2){
score.fun <- function(Vg, k){
Za <- sum(Vg$weight * Vg$RWRNodeweight) / sqrt(sum(Vg$RWRNodeweight^2))
(Za - genesets_jack_a[k]) / genesets_jack_b[k]
}
best.fun_jw <- function(in.nodes, out.nodes) {
score <- (-Inf)
best <- character()
for (node in out.nodes) {
subG.update <- induced.subgraph(net, c(in.nodes,node))
Vsgu <- V(subG.update)
Vsgu_count <- vcount(subG.update)
sf <- score.fun(Vsgu, Vsgu_count)
if (sf > score) {
score <- sf
best <- node
}
}
list(best, score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
VsubG <- V(subG)
Vnet <- V(net)
in.nodes <- VsubG$name
node_num <- vcount(subG)
subsum <- score.fun(VsubG, node_num)
for (rad in 1:d) { # d = 2
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = VsubG$name))
pot.nodes <- Vnet[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
best_node <- best.fun_jw(in.nodes, out.nodes)
new_score <- best_node[[2]]
best_node <- best_node[[1]]
if (new_score > subsum + 0.01) {
tmp <- sapply(best_node, function(x) node2treePath_jw(net, VsubG$name, x))
in.nodes <- c(tmp, VsubG$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
subG
}
node2treePath <- function (G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- unlist(lapply(tmp.path, length))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))
return(collect)
}
build_network <- function (net, seed, d= 2){
#genesets.length.null.stat <- structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))
genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
names(genesets.length.null.stat) <- 1:500
score.fun<-function(g){
Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
k <- vcount(g)
tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
Sa <- (Za-tmp[1])/tmp[2]
}
best.fun <- function(in.nodes,out.nodes) {
score<-(-Inf); best<-character()
for (node in out.nodes){
subG.update<-induced.subgraph(net, c(in.nodes,node))
if (score.fun(subG.update) > score) {
score<-score.fun(subG.update)
best<-node
}
}
list("node"=best,"score"=score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
in.nodes <- V(subG)$name
node_num <- vcount(subG)
subsum <- score.fun(subG)
#subx <- V(subG)$name
for (rad in 1:d) {
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name))
pot.nodes <- V(net)[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
#message("length in.nodes = ", length(in.nodes))
#message("length out.nodes = ", length(out.nodes))
best_node<-best.fun(in.nodes, out.nodes)
new_score<-best_node$score
best_node<-best_node$node
if (new_score > subsum + 0.01) {
tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
in.nodes <- c(tmp, V(subG)$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
subG
}
library(igraph)
library(profr)
library(igraph)
library(profr)
#genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
#names(genesets.length.null.stat) <- 1:500
set.seed(1)
genesets_jack_a = runif(500) + 1:500
genesets_jack_b = runif(500) + 1:500
do_it_jw <- function(n = 1000){
my_graph <- erdos.renyi.game(n, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(n)
V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)
### Run the function
sublist = list()
Vmg <- V(my_graph)
for (node in Vmg$name) {
#message(node)
subnet <- build_network_jw(my_graph, node, 2)
sublist[[node]] <- subnet }
}
do_it <- function(n = 1000){
my_graph <- erdos.renyi.game(n, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(n)
V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)
### Run the function
sublist = list()
Vmg <- V(my_graph)
for (node in Vmg$name) {
#message(node)
subnet <- build_network(my_graph, node, 2)
sublist[[node]] <- subnet }
}
library(microbenchmark)
mb <- microbenchmark(do_it(1000), do_it_jw(1000), times = 5)
print(mb)
我正在尝试改进一个功能,以根据从某些节点属性计算出的分数来构建网络。该函数试图从最大化节点属性乘积的图中找到最佳子网。
函数从一个随机节点开始,从第一个邻居开始搜索,如果有一些邻居的节点得分满足阈值,则将neighbour/s添加到第一个节点,并继续该过程,直到没有添加了更多(添加邻居不会产生所需的分数增量)。如果第一个邻居中没有节点产生分数的增量,则该函数将查找第二个邻居。在这种情况下,很可能有几条路径连接节点(2 度邻居),在这种特定情况下,选择的路径将是最短且权重最高的路径(节点属性之一)。
我可以对代码进行一些并行处理,尽管我不知道如何在此类函数中实现它。
函数如下:
build_network <-
function (G, seed, d= 2){
net <- G
d <- d
score.fun<-function(g){
Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
k <- vcount(g)
tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
Sa <- (Za-tmp[1])/tmp[2]
}
best.fun<-function(in.nodes,out.nodes) {
score<-(-Inf); best<-character()
for(node in out.nodes){
subG.update<-induced.subgraph(net, c(in.nodes,node))
if( score.fun(subG.update) > score ){
score<-score.fun(subG.update)
best<-node
}
}
list("node"=best,"score"=score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
in.nodes <- V(subG)$name
node_num <- vcount(subG)
subsum <- score.fun(subG)
#subx <- V(subG)$name
for (rad in 1:d) {
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name))
pot.nodes <- V(net)[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
best_node<-best.fun(in.nodes, out.nodes)
new_score<-best_node$score
best_node<-best_node$node
if (new_score > subsum + 0.01) {
tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
in.nodes <- c(tmp, V(subG)$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
return(subG)
}
我正在尝试将此函数应用于约 10,000 个节点的图形。这里将是 运行 函数
的代码的近似值### generate some example data
library(igraph)
my_graph <- erdos.renyi.game(10000, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(10000)
V(my_graph)$RWRNodeweight <- runif(10000, min=0, max=0.05)
### Run the function
sublist = list()
for (node in V(G)$name) {
subnet <- build_network(G, node, d)
sublist[[node]] <- subnet }
编辑:这是 head(genesets.length.null.stat)
dput
structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))
这里是 node2treePath
函数:
node2treePath <- function (G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- unlist(lapply(tmp.path, length))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))
return(collect)
}
因为你的分数函数只依赖于节点属性而不是边的属性,所以解不是唯一的;你可能想搜索一棵最好的树。如果你重组你的问题,使你的节点是边缘,反之亦然,你可能只使用例如 Djikstra 的算法来找到最好的一个。这已经在 igraph 包中作为 shortest.paths()
.
我看不懂 R 代码,但根据你的描述:如果分数阈值是常数, 那么这在 O(|V|+ |E|+|C|^2) 时间,其中|C|是 "good" 个组件的数量(这将在稍后进一步解释)。
在第一遍中,删除得分低于阈值的所有节点。然后在这个新图中找到所有连接的组件(这可以在 O(|V|+|E|) 时间内通过在每个尚未访问的节点处启动 DFS 来完成),通过将所有顶点权重相乘来计算它们的分数组件,并用其组件 ID 标记每个顶点。这已经告诉您 "good" 个组件——不需要任何 2 度连接的组件。
假设这会产生|C|组件。创建一个空哈希表 H,其中包含用于键的组件 ID 对和用于值的(长度、重量)对。现在返回你在第一遍中删除的每个顶点 v:对于每个顶点,查看它的所有邻居并记录每个不同组件的最短边(这可以使用长度-|C| 数组来存储最短边来完成到目前为止看到的每个组件)。在检查了 v 的所有邻居之后,计算它们属于的不同组件的数量 k:如果 k >= 2,则 v 可能应该用于连接这些 k(k-1)/2 对组件中的一些组件。对于可以由 v 连接的每一对不同的组件 i 和 j,根据需要使用此 2 边连接的权重和距离更新 H:也就是说,如果 i 和 j 尚未连接在一起,则记录 v 连接他们;否则,如果它们已经由某个顶点 u 加入,则仅在 v 可以做得更好时才更新 H(即,如果它使用比 u 更少的总长度和更大的权重)。这一步可以被认为是在 "component graph" 中构建最小生成树,该 "component graph" 从原始的修剪图派生。每个新 "combined" 组件的分数可以很容易地计算出来,只需将两个组成部分的分数相乘即可。
最后,简单地 return 乘积最大的组件。
对于您想要执行的逻辑(我想您可能希望以与上述答案不兼容的方式进行更改),以下代码大约快 10 倍 30%。我使用 Rprof
和 profr
并以琐碎的方式重新编码了一些慢位,例如不传递命名列表对,只是来自您的一个函数的匿名对。具有 genesets.length.null.stat
值对的数字命名列表效率非常低。我用两个数字向量替换了它。您还多次调用 'V' 函数,这是一个很大的时间消耗:如您所见,您可以调用它一次,然后根据需要查询结果。
# node2treePath is a function to retrieve the shortest path with the highest node weights
node2treePath_jw <- function(G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- vapply(tmp.path, length, integer(1))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
Vg <- V(G)
tmp.sum <- vapply(tmp.path, function(x) sum(Vg[x]$weight), numeric(1))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
sapply(selected.path, function(x) Vg[x]$name)
}
build_network_jw <- function(net, seed, d= 2){
score.fun <- function(Vg, k){
Za <- sum(Vg$weight * Vg$RWRNodeweight) / sqrt(sum(Vg$RWRNodeweight^2))
(Za - genesets_jack_a[k]) / genesets_jack_b[k]
}
best.fun_jw <- function(in.nodes, out.nodes) {
score <- (-Inf)
best <- character()
for (node in out.nodes) {
subG.update <- induced.subgraph(net, c(in.nodes,node))
Vsgu <- V(subG.update)
Vsgu_count <- vcount(subG.update)
sf <- score.fun(Vsgu, Vsgu_count)
if (sf > score) {
score <- sf
best <- node
}
}
list(best, score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
VsubG <- V(subG)
Vnet <- V(net)
in.nodes <- VsubG$name
node_num <- vcount(subG)
subsum <- score.fun(VsubG, node_num)
for (rad in 1:d) { # d = 2
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = VsubG$name))
pot.nodes <- Vnet[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
best_node <- best.fun_jw(in.nodes, out.nodes)
new_score <- best_node[[2]]
best_node <- best_node[[1]]
if (new_score > subsum + 0.01) {
tmp <- sapply(best_node, function(x) node2treePath_jw(net, VsubG$name, x))
in.nodes <- c(tmp, VsubG$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
subG
}
node2treePath <- function (G, Tnodes, node){
tmp.path <- get.all.shortest.paths(G, node, Tnodes)$res
tmp.l <- unlist(lapply(tmp.path, length))
index <- which(tmp.l == min(tmp.l))
tmp.path = tmp.path[index]
tmp.sum <- unlist(lapply(tmp.path, function(x)return(sum(V(G)[x]$weight))))
index <- which(tmp.sum == max(tmp.sum))
selected.path = tmp.path[index]
collect <- unlist(lapply(selected.path, function(x)return(V(G)[x]$name)))
return(collect)
}
build_network <- function (net, seed, d= 2){
#genesets.length.null.stat <- structure(list(`1` = c(1.01397367504035, 1.18858228819048), `2` = c(1.61970348041337, 1.30189433386605), `3` = c(2.11767222957028, 1.36222065695878), `4` = c(2.47710421934929, 1.36968129959296), `5` = c(2.776011866622, 1.36318885187196), `6` = c(3.16885126246671, 1.42577861995897)), .Names = c("1", "2", "3", "4", "5", "6"))
genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
names(genesets.length.null.stat) <- 1:500
score.fun<-function(g){
Za <- sum(V(g)$weight*V(g)$RWRNodeweight)/sqrt(sum(V(g)$RWRNodeweight^2))
k <- vcount(g)
tmp <- genesets.length.null.stat[[as.character(k)]] # genesets.length.null.stat is a list with the median of Za and sd of Za calculated for 1000 replicates of networks of size k
Sa <- (Za-tmp[1])/tmp[2]
}
best.fun <- function(in.nodes,out.nodes) {
score<-(-Inf); best<-character()
for (node in out.nodes){
subG.update<-induced.subgraph(net, c(in.nodes,node))
if (score.fun(subG.update) > score) {
score<-score.fun(subG.update)
best<-node
}
}
list("node"=best,"score"=score)
}
subG <- induced.subgraph(net, seed)
if (!is.connected(subG)) { #the seed must be connected
stop("Input seeds are disjoint")
}
while (TRUE) {
in.nodes <- V(subG)$name
node_num <- vcount(subG)
subsum <- score.fun(subG)
#subx <- V(subG)$name
for (rad in 1:d) {
tmp.neigh <- unlist(neighborhood(net, order = rad, nodes = V(subG)$name))
pot.nodes <- V(net)[tmp.neigh]$name
out.nodes <- setdiff(pot.nodes, in.nodes)
if (length(out.nodes) == 0) break
#message("length in.nodes = ", length(in.nodes))
#message("length out.nodes = ", length(out.nodes))
best_node<-best.fun(in.nodes, out.nodes)
new_score<-best_node$score
best_node<-best_node$node
if (new_score > subsum + 0.01) {
tmp <- unlist(lapply(best_node, function(x) node2treePath(net,V(subG)$name, x))) # node2treePath is a function to retrieve the shortest path with the highest node weights
in.nodes <- c(tmp, V(subG)$name)
subG <- induced.subgraph(net, in.nodes)
break
}
}
if (node_num == vcount(subG)) break
}
subG
}
library(igraph)
library(profr)
library(igraph)
library(profr)
#genesets.length.null.stat <- lapply(1:500, function(x) c(runif(1)+x, runif(1)+x))
#names(genesets.length.null.stat) <- 1:500
set.seed(1)
genesets_jack_a = runif(500) + 1:500
genesets_jack_b = runif(500) + 1:500
do_it_jw <- function(n = 1000){
my_graph <- erdos.renyi.game(n, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(n)
V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)
### Run the function
sublist = list()
Vmg <- V(my_graph)
for (node in Vmg$name) {
#message(node)
subnet <- build_network_jw(my_graph, node, 2)
sublist[[node]] <- subnet }
}
do_it <- function(n = 1000){
my_graph <- erdos.renyi.game(n, 0.0003)
V(my_graph)$name <- 1:vcount(my_graph)
V(my_graph)$weight <- rnorm(n)
V(my_graph)$RWRNodeweight <- runif(n, min = 0, max = 0.05)
### Run the function
sublist = list()
Vmg <- V(my_graph)
for (node in Vmg$name) {
#message(node)
subnet <- build_network(my_graph, node, 2)
sublist[[node]] <- subnet }
}
library(microbenchmark)
mb <- microbenchmark(do_it(1000), do_it_jw(1000), times = 5)
print(mb)