计算 R 'by hand' 中加权网络的纽曼 Q、Q max 和分类系数,并使用 igraph 和 assortnet
calculating Newman's Q, Q max, and assortativity coefficients for weighted networks in R 'by hand' and using igraph and assortnet
我正在尝试了解如何计算纽曼模块化、分类系数、
并与各种包的值进行比较。
suppressMessages(library(dplyr))
suppressMessages(library(igraph))
suppressMessages(library(assortnet))
这里我模拟一个网络:
set.seed(12345)
netSize <- 20 # number of nodes
ids <- 1:netSize # names
# create an edgelist
df <- data.frame("from" = ids, "to" = ids)
eL <- df %>% # edited because I realized I needed to get rid of duplicate edges after using expand.grid
expand.grid() %>%
filter(from != to) %>%
mutate(key = paste0(pmin(as.character(from), as.character(to)), pmax(as.character(to), as.character(from)), sep = ""))%>%
filter(duplicated(key)) %>%
dplyr::select(-3) %>%
mutate(fromC = ifelse(from <= ceiling(netSize/2), "A", "B"),
toC = ifelse(to <= ceiling(netSize/2), "A", "B"))
# index for assigning weights based on membership
same <- which(eL[,3] == eL[,4])
diff <- which(eL[,3] != eL[,4])
eL[same, "weight"] <- rbeta(length(same), 10, 1) %>% round(2)
eL[diff, "weight"] <- rbeta(length(diff), 1, 10) %>% round(2)
# create a graph
g = graph_from_data_frame(eL[,c(as.character(1):as.character(2))], directed = FALSE) # not sure if numeric edge names poses a problem or not
E(g)$weight <- eL$weight
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = FALSE, attr = "weight") %>% as.matrix()
adjmat[lower.tri(adjmat)] <- 0
V(g)$name
#> [1] "1" "11" "2" "3" "4" "5" "6" "7" "8" "9" "10" "12" "13" "14" "15"
#> [16] "16" "17" "18" "19" "20"
community <- ifelse(as.numeric(V(g)$name) <= 10, 1, 2)
V(g)$membership <- community
plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
vertex.color = V(g)$membership)
创建要使用的克罗内克增量值矩阵
degrees <- degree(g)
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(degrees))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
创建模块化矩阵
m <- E(g) %>% length()
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- (adjmat[i,j] - degrees[i] * degrees[j] / (2 * m)) * kdmat[i,j]
}
}
确实看起来我计算错了纽曼 Q,但我看不出我的错误
(myQ <- 1/(2 * m) * sum(modmat))
#> [1] -0.2909091
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
接下来我要计算 Newman 的 Q max
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (degrees[i] * degrees[j] / (2 * m) * kdmat[i,j])
}
}
# not sure if this is correct or not
(qmax <- 1/(2 * m) * ((2 * m) - sum(kmat)))
#> [1] 0.5
如果我正确计算 q,这应该是协同系数:
myQ/qmax
#> [1] -0.5818182
这是来自 assortnet 的分类系数:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7823255
igraph 的分类系数完全不同(在我看来它不是在读取权重)。
我在这里做错了什么,还是这个功能不适用于加权网络?
igraph::assortativity_nominal(g, types = V(g)$membership, directed = FALSE)
#> [1] -0.09090909
由 reprex package (v2.0.0)
于 2021-09-17 创建
这需要一段时间才能解决。有一些错误:
- 我不应该从我的邻接矩阵中删除下三角形
- 从图中提取 adjmat 时我应该使用“sparse = TRUE”
- 我应该在计算中使用节点强度,即节点的边权重之和
- 并且对于加权网络,m等于边权重的总和,而不是边的数量!
我在某些地方修改了代码的其他小方面,但这是正确的版本。希望
这对其他人有帮助。
创建图表后提取:
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = TRUE, attr = "weight") %>% as.matrix() # use sparse = TRUE
# assign community or vertex type
community <- ifelse(as.numeric(V(g)$name) <= netSize/2, 1, 2)
V(g)$membership <- community
# plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
# vertex.color = V(g)$membership)
### Modularity calculations
创建要使用的克罗内克增量值矩阵
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(community))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
创建模块化矩阵
# (m <- E(g) %>% length()) # m is the number of edges in binary networks
(m <- sum(E(g)$weight)) # m is the total sum of edge weights in weighted networks
#> [1] 93.55
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- adjmat[i,j] - strength(g)[i] * strength(g)[j] / (2*m)
}
}
## now finish off the modularity calculation:
(myQ <- 1 / (2*m) * sum(modmat * kdmat))
#> [1] 0.3850534
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
# now the values match
接下来我想从 Newman 计算 Q max
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (sum(adjmat[i,]) * sum(adjmat[j,]) / (2 * m) * kdmat[i,j])
}
}
(qmax <- (1/(2 * m)) * ((2 * m) - sum(kmat)))
#> [1] 0.4999652
这是同配系数:
myQ/qmax # using my q value
#> [1] 0.7701604
igQ/qmax # using igraphs q value
#> [1] 0.7701604
这是来自 assortnet 的分类系数:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7701604
现在他们匹配了!!!
由 reprex package (v2.0.1)
于 2021-09-22 创建
我正在尝试了解如何计算纽曼模块化、分类系数、 并与各种包的值进行比较。
suppressMessages(library(dplyr))
suppressMessages(library(igraph))
suppressMessages(library(assortnet))
这里我模拟一个网络:
set.seed(12345)
netSize <- 20 # number of nodes
ids <- 1:netSize # names
# create an edgelist
df <- data.frame("from" = ids, "to" = ids)
eL <- df %>% # edited because I realized I needed to get rid of duplicate edges after using expand.grid
expand.grid() %>%
filter(from != to) %>%
mutate(key = paste0(pmin(as.character(from), as.character(to)), pmax(as.character(to), as.character(from)), sep = ""))%>%
filter(duplicated(key)) %>%
dplyr::select(-3) %>%
mutate(fromC = ifelse(from <= ceiling(netSize/2), "A", "B"),
toC = ifelse(to <= ceiling(netSize/2), "A", "B"))
# index for assigning weights based on membership
same <- which(eL[,3] == eL[,4])
diff <- which(eL[,3] != eL[,4])
eL[same, "weight"] <- rbeta(length(same), 10, 1) %>% round(2)
eL[diff, "weight"] <- rbeta(length(diff), 1, 10) %>% round(2)
# create a graph
g = graph_from_data_frame(eL[,c(as.character(1):as.character(2))], directed = FALSE) # not sure if numeric edge names poses a problem or not
E(g)$weight <- eL$weight
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = FALSE, attr = "weight") %>% as.matrix()
adjmat[lower.tri(adjmat)] <- 0
V(g)$name
#> [1] "1" "11" "2" "3" "4" "5" "6" "7" "8" "9" "10" "12" "13" "14" "15"
#> [16] "16" "17" "18" "19" "20"
community <- ifelse(as.numeric(V(g)$name) <= 10, 1, 2)
V(g)$membership <- community
plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
vertex.color = V(g)$membership)
创建要使用的克罗内克增量值矩阵
degrees <- degree(g)
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(degrees))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
创建模块化矩阵
m <- E(g) %>% length()
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- (adjmat[i,j] - degrees[i] * degrees[j] / (2 * m)) * kdmat[i,j]
}
}
确实看起来我计算错了纽曼 Q,但我看不出我的错误
(myQ <- 1/(2 * m) * sum(modmat))
#> [1] -0.2909091
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
接下来我要计算 Newman 的 Q max
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (degrees[i] * degrees[j] / (2 * m) * kdmat[i,j])
}
}
# not sure if this is correct or not
(qmax <- 1/(2 * m) * ((2 * m) - sum(kmat)))
#> [1] 0.5
如果我正确计算 q,这应该是协同系数:
myQ/qmax
#> [1] -0.5818182
这是来自 assortnet 的分类系数:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7823255
igraph 的分类系数完全不同(在我看来它不是在读取权重)。 我在这里做错了什么,还是这个功能不适用于加权网络?
igraph::assortativity_nominal(g, types = V(g)$membership, directed = FALSE)
#> [1] -0.09090909
由 reprex package (v2.0.0)
于 2021-09-17 创建这需要一段时间才能解决。有一些错误:
- 我不应该从我的邻接矩阵中删除下三角形
- 从图中提取 adjmat 时我应该使用“sparse = TRUE”
- 我应该在计算中使用节点强度,即节点的边权重之和
- 并且对于加权网络,m等于边权重的总和,而不是边的数量!
我在某些地方修改了代码的其他小方面,但这是正确的版本。希望 这对其他人有帮助。
创建图表后提取:
# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = TRUE, attr = "weight") %>% as.matrix() # use sparse = TRUE
# assign community or vertex type
community <- ifelse(as.numeric(V(g)$name) <= netSize/2, 1, 2)
V(g)$membership <- community
# plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
# vertex.color = V(g)$membership)
### Modularity calculations
创建要使用的克罗内克增量值矩阵
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
nrow = length(community))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name
创建模块化矩阵
# (m <- E(g) %>% length()) # m is the number of edges in binary networks
(m <- sum(E(g)$weight)) # m is the total sum of edge weights in weighted networks
#> [1] 93.55
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
for(j in 1:ncol(adjmat)){
modmat[i,j] <- adjmat[i,j] - strength(g)[i] * strength(g)[j] / (2*m)
}
}
## now finish off the modularity calculation:
(myQ <- 1 / (2*m) * sum(modmat * kdmat))
#> [1] 0.3850534
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
# now the values match
接下来我想从 Newman 计算 Q max
kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
for(j in 1:ncol(kmat)){
kmat[i,j] <- (sum(adjmat[i,]) * sum(adjmat[j,]) / (2 * m) * kdmat[i,j])
}
}
(qmax <- (1/(2 * m)) * ((2 * m) - sum(kmat)))
#> [1] 0.4999652
这是同配系数:
myQ/qmax # using my q value
#> [1] 0.7701604
igQ/qmax # using igraphs q value
#> [1] 0.7701604
这是来自 assortnet 的分类系数:
assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r
#> [1] 0.7701604
现在他们匹配了!!!
由 reprex package (v2.0.1)
于 2021-09-22 创建