如何使用 r 中的 igraph 分别计算不同时期的网络度量?
How can I calculate network measures separately for different periods using igraph in r?
这是我的交易数据:
data:
id from_id to_id amount date_trx
<fctr> <fctr> <fctr> <dbl> <date>
0 7468 5695 700.0 2005-01-04
1 6213 9379 11832.0 2005-01-08
2 7517 8170 1000.0 2005-01-10
3 6143 9845 4276.0 2005-01-12
4 6254 9640 200.0 2005-01-14
5 6669 5815 200.0 2005-01-20
6 6934 8583 49752.0 2005-01-24
7 9240 8314 19961.0 2005-01-26
8 6374 8865 1000.0 2005-01-30
9 6143 6530 13.4 2005-01-31
...
我构建了一个网络,其中节点(帐户)from_id
和 to_id
之间形成了边,边的权重由它们的交易量决定。然后我计算了网络的度量,如度中心性、中介中心性、接近中心性等
即:
relations <- data.frame(from = data$from_id,
to = data$to_id)
network <- graph_from_data_frame(relations, directed = T)
E(network)$weight <- data$amount
V(network)$degree <- degree(network, normalized=TRUE)
V(network)$betweenness <- betweenness(network, normalized=TRUE)
V(network)$closeness <- closeness(network, normalized=TRUE)
但现在我想定期计算这些措施。例如,我想按周(从第一个交易日期开始)划分我的数据,并计算每个帐户对应周的网络度量。
data$week <- unsplit(tapply(data$date_trx, data$from_id, function(x) (as.numeric(x-min(data$trx_date)) %/% 7)+1),data$from_id)
select(data, from_id, to_id, date_trx, week, amount) %>% arrange(date_trx)
from_id to_id date_trx week amount
<fctr> <fctr> <date> <dbl> <dbl>
6644 6934 2005-01-01 1 700
6753 8456 2005-01-01 1 600
9242 9333 2005-01-01 1 1000
9843 9115 2005-01-01 1 900
7075 6510 2005-01-02 1 400
8685 7207 2005-01-02 1 1100
... ... ... ... ...
9866 6697 2010-12-31 313 95.8
9866 5992 2010-12-31 313 139.1
9866 5797 2010-12-31 313 72.1
9866 9736 2010-12-31 313 278.9
9868 8644 2010-12-31 313 242.8
9869 8399 2010-12-31 313 372.2
由于我将数据划分为每周周期,现在我需要分别形成每周的账户网络,以便我可以计算每周周期的账户网络度量。我怎样才能一次做到 313 周?
一种可能性是根据周拆分数据,将每个周转换为 igraph 对象,然后使用 lapply 将中心性和度一次添加到所有图形。我的初始 data.frame 被命名为 d(见下文):
library(igraph)
head(d)
from_id to_id weight date_trx
1 D I 8 1999-09-12
2 E H 10 1999-10-20
3 A G 10 1999-09-10
4 C G 13 1999-04-15
5 E J 9 1999-06-26
6 B F 15 1999-04-30
首先获取周数:
d$week <- strftime(d$date_trx, format = "%V")
现在按周拆分:
dd <- split(d, d$week )
将每周转变为 igraph
dd <- lapply(dd, function(x) graph_from_data_frame(x, directed = T))
编写一个函数来执行您要执行的所有操作,然后将其应用于每个图形:
my.funct <- function(x) {
V(x)$degree <- degree(x, normalized=TRUE)
V(x)$betweenness <- betweenness(x, normalized=TRUE)
V(x)$closeness <- closeness(x, normalized=TRUE)
return(x)
}
dd <- lapply(dd, my.funct)
例如,第一周:
dd[[1]]
IGRAPH f515e52 DN-- 4 2 --
+ attr: name (v/c), degree (v/n), betweenness (v/n), closeness (v/n), weigth (e/n), date_trx
| (e/n), week (e/c)
+ edges from f515e52 (vertex names):
[1] B->F C->G
get.vertex.attribute(dd[[1]])
$name
[1] "B" "C" "F" "G"
$degree
[1] 0.3333333 0.3333333 0.3333333 0.3333333
$betweenness
[1] 0 0 0 0
$closeness
[1] 0.3333333 0.3333333 0.2500000 0.2500000
get.edge.attribute(dd[[1]])
$weight
[1] 9 7
$date_trx
[1] 10595 10601
$week
[1] "01" "01"
然后您可以检索所有周的所有中心性和度数:
ddd <- lapply(dd, function(x) igraph::as_data_frame(x, what = "vertices") )
# keep in mind that `split` names the objects in the list according to
# the value it used to split, therefore the name of the data.frames in
# the list is the name of the week.
library(dplyr)
ddd <- bind_rows(ddd, .id="week")
head(ddd)
week name degree betweenness closeness
1 01 E 1.4444444 0 0.2000000
2 01 D 1.5555556 0 0.1666667
3 01 B 0.7777778 0 0.2000000
4 01 A 1.0000000 0 0.2000000
5 01 C 0.7777778 0 0.1666667
6 01 F 1.0000000 0 0.1000000
以防万一,您可以使用它合并回原始边列表。
本例中使用的数据:
set.seed(123)
d <- data.frame(from_id = sample(LETTERS[1:5], 2000, replace = T),
to_id = sample(LETTERS[6:10], 2000, replace = T),
weight = rpois(2000, 10),
date_trx = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 2000, replace = T))
这是我的交易数据:
data:
id from_id to_id amount date_trx
<fctr> <fctr> <fctr> <dbl> <date>
0 7468 5695 700.0 2005-01-04
1 6213 9379 11832.0 2005-01-08
2 7517 8170 1000.0 2005-01-10
3 6143 9845 4276.0 2005-01-12
4 6254 9640 200.0 2005-01-14
5 6669 5815 200.0 2005-01-20
6 6934 8583 49752.0 2005-01-24
7 9240 8314 19961.0 2005-01-26
8 6374 8865 1000.0 2005-01-30
9 6143 6530 13.4 2005-01-31
...
我构建了一个网络,其中节点(帐户)from_id
和 to_id
之间形成了边,边的权重由它们的交易量决定。然后我计算了网络的度量,如度中心性、中介中心性、接近中心性等
即:
relations <- data.frame(from = data$from_id,
to = data$to_id)
network <- graph_from_data_frame(relations, directed = T)
E(network)$weight <- data$amount
V(network)$degree <- degree(network, normalized=TRUE)
V(network)$betweenness <- betweenness(network, normalized=TRUE)
V(network)$closeness <- closeness(network, normalized=TRUE)
但现在我想定期计算这些措施。例如,我想按周(从第一个交易日期开始)划分我的数据,并计算每个帐户对应周的网络度量。
data$week <- unsplit(tapply(data$date_trx, data$from_id, function(x) (as.numeric(x-min(data$trx_date)) %/% 7)+1),data$from_id)
select(data, from_id, to_id, date_trx, week, amount) %>% arrange(date_trx)
from_id to_id date_trx week amount
<fctr> <fctr> <date> <dbl> <dbl>
6644 6934 2005-01-01 1 700
6753 8456 2005-01-01 1 600
9242 9333 2005-01-01 1 1000
9843 9115 2005-01-01 1 900
7075 6510 2005-01-02 1 400
8685 7207 2005-01-02 1 1100
... ... ... ... ...
9866 6697 2010-12-31 313 95.8
9866 5992 2010-12-31 313 139.1
9866 5797 2010-12-31 313 72.1
9866 9736 2010-12-31 313 278.9
9868 8644 2010-12-31 313 242.8
9869 8399 2010-12-31 313 372.2
由于我将数据划分为每周周期,现在我需要分别形成每周的账户网络,以便我可以计算每周周期的账户网络度量。我怎样才能一次做到 313 周?
一种可能性是根据周拆分数据,将每个周转换为 igraph 对象,然后使用 lapply 将中心性和度一次添加到所有图形。我的初始 data.frame 被命名为 d(见下文):
library(igraph)
head(d)
from_id to_id weight date_trx
1 D I 8 1999-09-12
2 E H 10 1999-10-20
3 A G 10 1999-09-10
4 C G 13 1999-04-15
5 E J 9 1999-06-26
6 B F 15 1999-04-30
首先获取周数:
d$week <- strftime(d$date_trx, format = "%V")
现在按周拆分:
dd <- split(d, d$week )
将每周转变为 igraph
dd <- lapply(dd, function(x) graph_from_data_frame(x, directed = T))
编写一个函数来执行您要执行的所有操作,然后将其应用于每个图形:
my.funct <- function(x) {
V(x)$degree <- degree(x, normalized=TRUE)
V(x)$betweenness <- betweenness(x, normalized=TRUE)
V(x)$closeness <- closeness(x, normalized=TRUE)
return(x)
}
dd <- lapply(dd, my.funct)
例如,第一周:
dd[[1]]
IGRAPH f515e52 DN-- 4 2 --
+ attr: name (v/c), degree (v/n), betweenness (v/n), closeness (v/n), weigth (e/n), date_trx
| (e/n), week (e/c)
+ edges from f515e52 (vertex names):
[1] B->F C->G
get.vertex.attribute(dd[[1]])
$name
[1] "B" "C" "F" "G"
$degree
[1] 0.3333333 0.3333333 0.3333333 0.3333333
$betweenness
[1] 0 0 0 0
$closeness
[1] 0.3333333 0.3333333 0.2500000 0.2500000
get.edge.attribute(dd[[1]])
$weight
[1] 9 7
$date_trx
[1] 10595 10601
$week
[1] "01" "01"
然后您可以检索所有周的所有中心性和度数:
ddd <- lapply(dd, function(x) igraph::as_data_frame(x, what = "vertices") )
# keep in mind that `split` names the objects in the list according to
# the value it used to split, therefore the name of the data.frames in
# the list is the name of the week.
library(dplyr)
ddd <- bind_rows(ddd, .id="week")
head(ddd)
week name degree betweenness closeness
1 01 E 1.4444444 0 0.2000000
2 01 D 1.5555556 0 0.1666667
3 01 B 0.7777778 0 0.2000000
4 01 A 1.0000000 0 0.2000000
5 01 C 0.7777778 0 0.1666667
6 01 F 1.0000000 0 0.1000000
以防万一,您可以使用它合并回原始边列表。
本例中使用的数据:
set.seed(123)
d <- data.frame(from_id = sample(LETTERS[1:5], 2000, replace = T),
to_id = sample(LETTERS[6:10], 2000, replace = T),
weight = rpois(2000, 10),
date_trx = sample(seq(as.Date('1999/01/01'), as.Date('2000/01/01'), by="day"), 2000, replace = T))