计算数千个小型网络的网络密度

Calculate network density for thousands of small networks

我有一个文本文件,其中包含数千个小型无向网络的边列表。每个网络大约有 500 到 5,000 条边。我想做的是为每个计算一些基本的网络统计数据(例如,密度、模块化、网络直径等)我知道如何一次计算一个图的这些统计数据,但我不确定如何处理这样做 20,000 多次。

你们以前做过这样的事吗?下面是我的数据的示例。我从一个包含三列的数据框开始:Group、node1、node2。例如,您将如何使用 iGraph 计算每个组的网络密度?

example <- data.frame(Group = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1),
                      node1 = c
                      node2 = c(4150157,35132297,35802950,35822464,35971341,35977775,36724507,42658186,1342343023,13844310298,2942627924,2942653049,2942754180,7044274712,416815,1445929,4150157,33520419,33581908,34830906,35111568,43196961,1842681710,116621,1448848,1560392,3699562,29403184,34189928,35828823,36431606,37010998,37131488,42517640,345167839,11944262684,85006312408,618994,3430467,4150157,33829033,35535398,35760050,35893676,36026987,36163353,36322140,36330673,36431606,37153803,38448018,38674693,40028733,43210669,142215389,345171462,842325391,1242310442,1642273983,1842618408,1842735040,18444365571,18444368411,2342469450,2942529362,84942435207,33582733,34699937,34740905,35095498,37170843,37429426,37459916,1442276323,2542490252,336374,2373672,25785717,28377471,28381751,31275173,32210712,34774129,34831978,35264681,35339388,36192248,36270794,37189105,37470315,38547411,41702403,345753997,1142268042,12044251185,50749133132,57249093692,25785717,28091086,28825723,33622713,33991814,34621727,45449121012,336374,11187798,24280236,26239045,26516669,28091086,28783350,192917,1030529,32621080,42737988,1842779467,2853307,11760748,29359483,30681096,32620905,35156133,343831931,642306276,15844399352,15844415649,21544468072,21544480101,85010812746,1527454,27927261,30081829,30576834,32614970,32643182,35943888,37298440,39782265,2942653090,5544314341,85010812746,29150,1173576,32606366,36643771,37098560,37198509,37884702,42378607,242663463,3042545311,1344097,28204729,703370,781318,990010,1655009,30492538,32180421,34555959,34670902,35063206,35600561,35642940,37203284,37217019,37367130,14044260538,22044449563,4344598211,137774,213342,28199339,30218359,31276138,34129336,37062788,38121492,342666235,2542450463,9444241558))
groups = unique(example$Group)
densities = c()
for (i in 1:length(groups)) {
    graph = make_graph(as.character(c(t(as.matrix(example[which(example$Group == groups[i]),2:3])))), directed=F)
    densities = append(densities, graph.density(graph))
}


> data.frame(group = groups, density = densities)
  group    density
1     0 0.02563226
2     2 0.05714286
3     3 0.04429679
4     1 0.05681818

如果只需要计算密度,则不一定需要igraph包。您可以手动执行此操作:

library("dplyr")
example %>% 
  group_by(Group) %>%
  summarise( n = n_distinct( union(node1, node2)), obs = n(), density = obs/(n*(n-1)/2) ) %>%
  select(Group, density)

# A tibble: 4 x 2
  Group density
  <dbl>   <dbl>
1     0  0.0256
2     1  0.0568
3     2  0.0571
4     3  0.0443

如果您还想计算不同的网络指标,一种选择是拆分数据,将每个拆分转换为 igraph 对象,然后应用执行网络分析的函数,如此处所做