获取自我和改变在 R 中的共同点数
Get number of ties ego and alter have in common in R
我有一个关于青少年友谊的定向网络数据集。我想制作一个边缘列表,其中包括自我与改变者共同的朋友数量(自我和改变者都被提名为朋友)。以下是一些示例数据:
有数据:
id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6
想要数据:
id alter num_common
1 3 2
1 5 0
1 9 0
2 3 1
2 5 0
3 2 1
3 5 0
3 9 0
3 6 0
这是一个可能但不是很简单的解决方案:
# your dummy data
df <- data.table::fread("id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6")
library(dplyr)
library(tidyr)
# all pairs vertically with pair ID
pairs_v <- combn(unique(c(df$id, df$alter)), 2) %>%
dplyr::as_tibble() %>%
tidyr::pivot_longer(cols = everything()) %>%
dplyr::arrange(name)
# number of comon friends per group ID
pairs_comp <- pairs_v %>%
dplyr::left_join(df, by = c("value" = "id")) %>%
dplyr::count(name, alter) %>%
dplyr::filter(n > 1 & !is.na(alter)) %>%
dplyr::count(name)
# all pairs horizontally with pair ID
pairs_h <-pairs_v %>%
dplyr::group_by(name) %>%
dplyr::mutate(G_ID = dplyr::row_number()) %>%
tidyr::pivot_wider(names_from = G_ID, values_from = "value")
# multiple left joins to get repeated comon friends for each direction of combination
df %>%
dplyr::left_join(pairs_h, by = c("id" = "1", "alter" = "2")) %>%
dplyr::left_join(pairs_comp) %>%
dplyr::left_join(pairs_h, by = c("id" = "2", "alter" = "1")) %>%
dplyr::left_join(pairs_comp, by = c("name.y" = "name")) %>%
dplyr::mutate(num_common = case_when(!is.na(n.x) ~ as.numeric(n.x),
!is.na(n.y) ~ as.numeric(n.y),
TRUE ~ 0)) %>%
dplyr::select(id, alter, num_common)
id alter num_common
1: 1 3 2
2: 1 5 0
3: 1 9 0
4: 2 3 1
5: 2 5 0
6: 3 2 1
7: 3 5 0
8: 3 9 0
9: 3 6 0
一个解决方案可能是将边缘列表转换为邻接矩阵(使用 igraph 包)并乘以其转置以计算共享邻居的数量:
el <- read.table(text= " id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6", header =T)
g <- graph_from_edgelist(as.matrix(el), directed = T)
m <- get.adjacency(g, sparse = F)
m2 <- m %*% t(m)
然后将生成的矩阵转换回边列表并将其与原始数据集合并:
el2 <- reshape2::melt(m2)
dplyr::left_join(el, el2, by = c("id" = "Var1", "alter" = "Var2"))
id alter value
1 1 3 2
2 1 5 0
3 1 9 0
4 2 3 1
5 2 5 0
6 3 2 1
7 3 5 0
8 3 9 0
9 3 6 0
为了查看同一位朋友同时指定 ego 和 alter 的频率,通过使用 t(m) %*% m
而不是 m %*% t(m)
来改变关系的方向。要忽略方向,请在 graph_from_edgelist
函数中将 directed
参数设置为 FALSE
。
我有一个关于青少年友谊的定向网络数据集。我想制作一个边缘列表,其中包括自我与改变者共同的朋友数量(自我和改变者都被提名为朋友)。以下是一些示例数据:
有数据:
id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6
想要数据:
id alter num_common
1 3 2
1 5 0
1 9 0
2 3 1
2 5 0
3 2 1
3 5 0
3 9 0
3 6 0
这是一个可能但不是很简单的解决方案:
# your dummy data
df <- data.table::fread("id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6")
library(dplyr)
library(tidyr)
# all pairs vertically with pair ID
pairs_v <- combn(unique(c(df$id, df$alter)), 2) %>%
dplyr::as_tibble() %>%
tidyr::pivot_longer(cols = everything()) %>%
dplyr::arrange(name)
# number of comon friends per group ID
pairs_comp <- pairs_v %>%
dplyr::left_join(df, by = c("value" = "id")) %>%
dplyr::count(name, alter) %>%
dplyr::filter(n > 1 & !is.na(alter)) %>%
dplyr::count(name)
# all pairs horizontally with pair ID
pairs_h <-pairs_v %>%
dplyr::group_by(name) %>%
dplyr::mutate(G_ID = dplyr::row_number()) %>%
tidyr::pivot_wider(names_from = G_ID, values_from = "value")
# multiple left joins to get repeated comon friends for each direction of combination
df %>%
dplyr::left_join(pairs_h, by = c("id" = "1", "alter" = "2")) %>%
dplyr::left_join(pairs_comp) %>%
dplyr::left_join(pairs_h, by = c("id" = "2", "alter" = "1")) %>%
dplyr::left_join(pairs_comp, by = c("name.y" = "name")) %>%
dplyr::mutate(num_common = case_when(!is.na(n.x) ~ as.numeric(n.x),
!is.na(n.y) ~ as.numeric(n.y),
TRUE ~ 0)) %>%
dplyr::select(id, alter, num_common)
id alter num_common
1: 1 3 2
2: 1 5 0
3: 1 9 0
4: 2 3 1
5: 2 5 0
6: 3 2 1
7: 3 5 0
8: 3 9 0
9: 3 6 0
一个解决方案可能是将边缘列表转换为邻接矩阵(使用 igraph 包)并乘以其转置以计算共享邻居的数量:
el <- read.table(text= " id alter
1 3
1 5
1 9
2 3
2 5
3 2
3 5
3 9
3 6", header =T)
g <- graph_from_edgelist(as.matrix(el), directed = T)
m <- get.adjacency(g, sparse = F)
m2 <- m %*% t(m)
然后将生成的矩阵转换回边列表并将其与原始数据集合并:
el2 <- reshape2::melt(m2)
dplyr::left_join(el, el2, by = c("id" = "Var1", "alter" = "Var2"))
id alter value
1 1 3 2
2 1 5 0
3 1 9 0
4 2 3 1
5 2 5 0
6 3 2 1
7 3 5 0
8 3 9 0
9 3 6 0
为了查看同一位朋友同时指定 ego 和 alter 的频率,通过使用 t(m) %*% m
而不是 m %*% t(m)
来改变关系的方向。要忽略方向,请在 graph_from_edgelist
函数中将 directed
参数设置为 FALSE
。