计算 R 'by hand' 中加权网络的纽曼 Q、Q max 和分类系数,并使用 igraph 和 assortnet

calculating Newman's Q, Q max, and assortativity coefficients for weighted networks in R 'by hand' and using igraph and assortnet

我正在尝试了解如何计算纽曼模块化、分类系数、 并与各种包的值进行比较。

suppressMessages(library(dplyr))
suppressMessages(library(igraph))
suppressMessages(library(assortnet))

这里我模拟一个网络:

set.seed(12345)
netSize <- 20 # number of nodes
ids <- 1:netSize # names

# create an edgelist
df <- data.frame("from" = ids, "to" = ids)
eL <- df %>% # edited because I realized I needed to get rid of duplicate edges after using expand.grid       
  expand.grid() %>%
  filter(from != to) %>%
  mutate(key = paste0(pmin(as.character(from), as.character(to)), pmax(as.character(to), as.character(from)), sep = ""))%>%
  filter(duplicated(key)) %>%
  dplyr::select(-3) %>%
  mutate(fromC = ifelse(from <= ceiling(netSize/2), "A", "B"),
         toC = ifelse(to <= ceiling(netSize/2), "A", "B"))
# index for assigning weights based on membership
same <- which(eL[,3] == eL[,4])
diff <- which(eL[,3] != eL[,4])

eL[same, "weight"] <- rbeta(length(same), 10, 1) %>% round(2)
eL[diff, "weight"] <- rbeta(length(diff), 1, 10) %>% round(2)

# create a graph
g = graph_from_data_frame(eL[,c(as.character(1):as.character(2))], directed = FALSE) # not sure if numeric edge names poses a problem or not
E(g)$weight <- eL$weight

# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = FALSE, attr = "weight") %>% as.matrix()
adjmat[lower.tri(adjmat)] <- 0

V(g)$name 
#>  [1] "1"  "11" "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10" "12" "13" "14" "15"
#> [16] "16" "17" "18" "19" "20"
community <- ifelse(as.numeric(V(g)$name) <= 10, 1, 2)
V(g)$membership <- community
plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
     vertex.color = V(g)$membership)

创建要使用的克罗内克增量值矩阵

degrees <- degree(g)
kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
                nrow = length(degrees))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name

创建模块化矩阵

m <- E(g) %>% length()
modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
  for(j in 1:ncol(adjmat)){
    modmat[i,j] <- (adjmat[i,j] - degrees[i] * degrees[j] / (2 * m)) * kdmat[i,j]
  }
}

确实看起来我计算错了纽曼 Q,但我看不出我的错误

(myQ <- 1/(2 * m) * sum(modmat))
#> [1] -0.2909091
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534

接下来我要计算 Newman 的 Q max

kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
  for(j in 1:ncol(kmat)){
    kmat[i,j] <- (degrees[i] * degrees[j] / (2 * m) * kdmat[i,j])
    
  }
}
# not sure if this is correct or not
(qmax <- 1/(2 * m) * ((2 * m) - sum(kmat))) 
#> [1] 0.5

如果我正确计算 q,这应该是协同系数:

myQ/qmax 
#> [1] -0.5818182

这是来自 assortnet 的分类系数:

assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r 
#> [1] 0.7823255

igraph 的分类系数完全不同(在我看来它不是在读取权重)。 我在这里做错了什么,还是这个功能不适用于加权网络?

igraph::assortativity_nominal(g, types = V(g)$membership, directed = FALSE)
#> [1] -0.09090909

reprex package (v2.0.0)

于 2021-09-17 创建

这需要一段时间才能解决。有一些错误:

  • 我不应该从我的邻接矩阵中删除下三角形
  • 从图中提取 adjmat 时我应该使用“sparse = TRUE”
  • 我应该在计算中使用节点强度,即节点的边权重之和
  • 并且对于加权网络,m等于边权重的总和,而不是边的数量!

我在某些地方修改了代码的其他小方面,但这是正确的版本。希望 这对其他人有帮助。

创建图表后提取:


# get the adjacency matrix
adjmat <- get.adjacency(g, sparse = TRUE, attr = "weight") %>% as.matrix() # use sparse = TRUE

# assign community or vertex type
community <- ifelse(as.numeric(V(g)$name) <= netSize/2, 1, 2)
V(g)$membership <- community
# plot(g, layout = layout.fruchterman.reingold(g), edge.width = E(g)$weight*2,
#      vertex.color = V(g)$membership)


### Modularity calculations

创建要使用的克罗内克增量值矩阵

kdmat <- matrix(kronecker(community, community, FUN = function(x,y){ifelse(x==y, 1, 0)}),
                nrow = length(community))
rownames(kdmat) <- colnames(kdmat) <- V(g)$name

创建模块化矩阵

# (m <- E(g) %>% length()) # m is the number of edges in binary networks
(m <- sum(E(g)$weight)) # m is the total sum of edge weights in weighted networks
#> [1] 93.55

modmat <- adjmat # create a matrix with same dimensions
modmat[,] <- 0 # empty it
for(i in 1:nrow(adjmat)){
  for(j in 1:ncol(adjmat)){
    modmat[i,j] <- adjmat[i,j] - strength(g)[i] * strength(g)[j] / (2*m) 
  }
}

## now finish off the modularity calculation:
(myQ <- 1 / (2*m) * sum(modmat * kdmat))
#> [1] 0.3850534
(igQ <- igraph::modularity(g, membership = community, weights = E(g)$weight))
#> [1] 0.3850534
# now the values match

接下来我想从 Newman 计算 Q max

kmat <- modmat
kmat[,] <- 0
for(i in 1:nrow(kmat)){
  for(j in 1:ncol(kmat)){
    kmat[i,j] <- (sum(adjmat[i,]) * sum(adjmat[j,]) / (2 * m) * kdmat[i,j])
  }
}

(qmax <- (1/(2 * m)) * ((2 * m) - sum(kmat))) 
#> [1] 0.4999652

这是同配系数:

myQ/qmax # using my q value 
#> [1] 0.7701604
igQ/qmax # using igraphs q value
#> [1] 0.7701604

这是来自 assortnet 的分类系数:

assortnet::assortment.discrete(adjmat, types = as.factor(community), weighted = T)$r 
#> [1] 0.7701604

现在他们匹配了!!!

reprex package (v2.0.1)

于 2021-09-22 创建