在 TidyGraph 中计算 Everett-Valente 经纪分数

Computing the Everett-Valente Brokerage Score in TidyGraph

我想计算定向网络中每个节点的 Everett-Valente 经纪分数(Everett 和 Valente 2016)。该分数基于中介中心性。本质上,这控制了网络规模。代理控制 information/resource 流的能力受网络大小 and/or 连接冗余的调节。对于无向图,Everett - Valente Brokerage Score 计算如下:

  1. 计算节点介数中心性。
  2. 将每个节点的计算介数中心性加倍,并将 (n - 1) 添加到每个非悬垂条目。
  3. 将每个非零分数除以节点的度数。

我计划使用 if_else 语句来处理非挂件和零分,例如

g <- g %>%
activate(nodes) %>%
mutate(betweenness = centrality_betweenness(),
       ev_brokerage = if_else(..if_else(..)..))

我不知道如何执行ev_brokerage(条件语句)。为了将此扩展到定向案例,Everett 和 Valente (2016) 提供了以下规则:

对于电动车内经纪业务:

  1. 计算 v 的节点介数中心性
  2. 如果节点介数中心性 = 0 添加 j,其中 j = 可以到达 v 的顶点数。
  3. 将每个非零和除以v的入度

对于 EV 外经纪业务:

  1. 计算 v 的节点介数中心性
  2. 如果节点介数中心性 = 0 添加 k,其中 k = v 可以到达的顶点数。
  3. 将每个非零和除以v的出度

v 的 EV 经纪 = in-EV 和 out-EV 的平均值。

如果有人可以帮助我处理 mutate() 语句,我将不胜感激。我想知道如何在定向情况下计算出 j 和 k,并在无向情况下计算出非悬垂节点。

如果您只是将它变成一个计算 igraph 对象分数的独立函数,那么推理(和概括)起来会简单得多。然后它可以适应 tidygraph 友好的东西。

suppressPackageStartupMessages(library(tidygraph))
if_else <- dplyr::if_else
case_when <- dplyr::case_when
map2_dbl <- purrr::map2_dbl

使用无向图非常简单,因为您不需要嵌套任何控制流。

create_notable("Zachary") %>% 
  mutate(pendant = centrality_degree() == 1,               # is a node a pendant? 
         btwn = centrality_betweenness()) %>%              # raw betweenness
  mutate(ev_step1 = if_else(pendant,                        # if it's a pendant...
                            btwn * 2,                          # double betweenness...
                            btwn * 2 + (graph_order() - 1)),   # else double it AND subtract n (nodes) - 1
         ev_brok = if_else(ev_step1 == 0,                   # if it's 0...
                           ev_step1,                        # leave it as is...
                           ev_step1 / centrality_degree())  # else divide it by raw degree
         ) %>% 
  select(ev_brok, btwn, pendant)

#> # A tbl_graph: 34 nodes and 78 edges
#> #
#> # An undirected simple graph with 1 component
#> #
#> # Node Data: 34 x 3 (active)
#>   ev_brok    btwn pendant
#>     <dbl>   <dbl> <lgl>  
#> 1   30.9  231.    FALSE  
#> 2   10.00  28.5   FALSE  
#> 3   18.5   75.9   FALSE  
#> 4    7.60   6.29  FALSE  
#> 5   11.2    0.333 FALSE  
#> 6   16.2   15.8   FALSE  
#> # ... with 28 more rows
#> #
#> # Edge Data: 78 x 2
#>    from    to
#>   <int> <int>
#> 1     1     2
#> 2     1     3
#> 3     1     4
#> # ... with 75 more rows

这是一个有向图示例...

(g <- matrix(c(1, 2,
              1, 3,
              3, 4, 
              4, 1,
              2, 5,
              5, 6,   # 6 is pendant with in-tie
              7, 2,   # 7 is pendant with out-ie
              4, 8,   # 8 is pendant with in-tie
              9, 10, 
              10, 11,
              11, 12, # 12 is a pendant with in-tie
              11, 13,
              9, 13),
            ncol = 2, byrow = TRUE) %>% 
  igraph::graph_from_edgelist()) %>% plot()

与其将 ifelse() 彼此嵌套,不如将它们用 dplyr::case_when() 包裹起来(但它仍然应该放在可以测试和验证的适当函数中)。

(
res <- g %>%
  as_tbl_graph() %>% 
  mutate(btwn = centrality_betweenness(),
         in_reach = local_size(order = graph_order(), mode = "in") - 1, # reach being max. ego graph order - 1 for ego
         out_reach = local_size(order = graph_order(), mode = "out") - 1,
         in_deg = centrality_degree(mode = "in"),
         out_deg = centrality_degree(mode = "out")) %>% 
  mutate(ev_in = case_when(
    btwn == 0 ~ if_else(btwn + in_reach == 0,       # if btwn is 0 and if btwn + in_reach is 0
                       btwn + in_reach,             # then btwn + in_reach (0)
                       (btwn + in_reach) / in_deg), # else add btwn and in_reach, then divide by in_deg
    btwn != 0 ~ btwn / in_deg
    )) %>% 
  mutate(ev_out = case_when(
    btwn == 0 ~ if_else(btwn + out_reach == 0, 
                        btwn + out_reach, 
                        (btwn + out_reach) / out_deg),
    btwn != 0 ~ btwn / out_deg
    )) %>% 
    mutate(ev_brok = map2_dbl(ev_in, ev_out, ~ mean(c(.x, .y)))) %>% 
  select(ev_brok, starts_with("ev_"), btwn, everything())
)
#> # A tbl_graph: 13 nodes and 13 edges
#> #
#> # A directed simple graph with 2 components
#> #
#> # Node Data: 13 x 8 (active)
#>   ev_brok ev_in ev_out  btwn in_reach out_reach in_deg out_deg
#>     <dbl> <dbl>  <dbl> <dbl>    <dbl>     <dbl>  <dbl>   <dbl>
#> 1    5.25     7    3.5     7        2         6      1       2
#> 2    6        4    8       8        4         2      2       1
#> 3    2        2    2       2        2         6      1       1
#> 4    4.5      6    3       6        2         6      1       2
#> 5    5        5    5       5        5         1      1       1
#> 6    3        6    0       0        6         0      1       0
#> # ... with 7 more rows
#> #
#> # Edge Data: 13 x 2
#>    from    to
#>   <int> <int>
#> 1     1     2
#> 2     1     3
#> 3     3     4
#> # ... with 10 more rows

这里是完整的 table 检查数学:

res %>% as_tibble()

#> # A tibble: 13 x 8
#>    ev_brok ev_in ev_out  btwn in_reach out_reach in_deg out_deg
#>      <dbl> <dbl>  <dbl> <dbl>    <dbl>     <dbl>  <dbl>   <dbl>
#>  1    5.25   7      3.5     7        2         6      1       2
#>  2    6      4      8       8        4         2      2       1
#>  3    2      2      2       2        2         6      1       1
#>  4    4.5    6      3       6        2         6      1       2
#>  5    5      5      5       5        5         1      1       1
#>  6    3      6      0       0        6         0      1       0
#>  7    1.5    0      3       0        0         3      0       1
#>  8    1.5    3      0       0        3         0      1       0
#>  9    1      0      2       0        0         4      0       2
#> 10    2      2      2       2        1         3      1       1
#> 11    2.25   3      1.5     3        2         2      1       2
#> 12    1.5    3      0       0        3         0      1       0
#> 13    0.75   1.5    0       0        3         0      2       0

在对照 Everett 和 Valente (2016) 中提出的 campnet 示例进行检查后,定向网络的 EV 经纪分数可以计算如下:

g <- g %>%
  activate(nodes) %>%
  # compute in-degree, out-degree, and betweenness centrality 
  mutate(betweenness = centrality_betweenness(),
         in_degree = centrality_degree(mode = "in"),
         out_degree = centrality_degree(mode = "out"),
         in_reach = local_size(order = graph_order(), mode = "in") - 1,
         out_reach = local_size(order = graph_order(), mode = "out") - 1) %>%
  # compute everett-valente brokerage score
  mutate(ev_in = if_else(betweenness != 0, betweenness + in_reach, betweenness),
         ev_in = if_else(ev_in != 0, ev_in / in_degree, ev_in),
         ev_out = if_else(betweenness != 0, betweenness + out_reach, betweenness),
         ev_out = if_else(ev_out != 0, ev_out / out_degree, ev_out),
         ev_brokerage = (ev_in + ev_out) / 2) 

使用 Granovetter (1973) 在 Everett 和 Valente (2016) 中提出的假设无向网络,EV 经纪分数可以计算如下:

edgelist <- data.frame(from = c(1,1,1,2,2,2,3,3,3,3,4,4,4,4,5,5,5,6,6,6,7,7,8,8,8,8,9,
                                     9,10,10,10,11,11,11,11,11,12,12,12,13,13,13,13,14,14,
                                     14,14,15,15,15,16,16,17,17,17,18,18,18,18,19,19,20,20,
                                     20,20,20,21,21,22,22,22,23,23,23,24,24,24,25,25,25,25),
                            to = c(2,3,24,1,3,4,1,2,4,5,2,3,5,6,3,4,6,5,5,7,6,8,9,10,11,
                                   14,8,10,9,8,11,10,8,12,14,13,11,14,13,11,12,14,15,8,11,
                                   12,13,13,16,17,15,17,15,16,18,17,19,20,21,18,20,19,18,
                                   21,25,22,18,20,20,25,23,24,25,22,1,25,23,24,23,22,20))

g <- igraph::graph_from_edgelist(as.matrix(edgelist), directed = F) %>% simplify()

g <- as_tbl_graph(g) %>%
  activate(nodes) %>%
  # compute brokerage
  mutate(betweenness = centrality_betweenness(),
        degree = centrality_degree(),
        ev_condition = if_else(betweenness != 0, betweenness * 2 + graph_order() - 1, betweenness),
     ev_brokerage = if_else(ev_condition != 0, ev_condition / degree, ev_condition))

data <- g %>% as.tibble()

我没有按照 Everett 和 Valente (2016) 规范化 EV 经纪评分。