如何在 R 中同时对三个字段进行网络分析
How to do network analysis on three fields simultaneously in R
如何在 R 中同时对三个字段进行网络分析。下面是示例数据以及最后一列中的 desired output
。
df <- data.frame(
stringsAsFactors = FALSE,
id_1 = c("ABC","ABC","BCD",
"CDE","DEF","EFG","GHI","HIJ","IJK","JKL",
"GHI","KLM","LMN","MNO","NOP"),
id_2 = c("1A","2A","3A",
"1A","4A","5A","6A","8A","9A","10A","7A",
"12A","13A","14A","15A"),
id_3 = c("Z3","Z2","Z1",
"Z4","Z1","Z5","Z5","Z6","Z7","Z8","Z6","Z8",
"Z9","Z9","Z1"),
Name = c("Whosebug1",
"Whosebug2","Whosebug3","Whosebug4",
"Whosebug5","Whosebug6",
"Whosebug7","Whosebug8","Whosebug9",
"Whosebug10","Whosebug11","Whosebug12",
"Whosebug13","Whosebug14","Whosebug15"),
desired_output = c(1L,1L,2L,1L,2L,
3L,3L,3L,4L,5L,3L,5L,6L,6L,2L)
)
df
#> id_1 id_2 id_3 Name desired_output
#> 1 ABC 1A Z3 Whosebug1 1
#> 2 ABC 2A Z2 Whosebug2 1
#> 3 BCD 3A Z1 Whosebug3 2
#> 4 CDE 1A Z4 Whosebug4 1
#> 5 DEF 4A Z1 Whosebug5 2
#> 6 EFG 5A Z5 Whosebug6 3
#> 7 GHI 6A Z5 Whosebug7 3
#> 8 HIJ 8A Z6 Whosebug8 3
#> 9 IJK 9A Z7 Whosebug9 4
#> 10 JKL 10A Z8 Whosebug10 5
#> 11 GHI 7A Z6 Whosebug11 3
#> 12 KLM 12A Z8 Whosebug12 5
#> 13 LMN 13A Z9 Whosebug13 6
#> 14 MNO 14A Z9 Whosebug14 6
#> 15 NOP 15A Z1 Whosebug15 2
实际上我可以按照我自己的回答 中的描述使用 igraph
同时对 2 个字段执行网络分析,但我无法对 2 个字段执行此操作。
请帮忙。
我目前的方法(2次迭代),我觉得可以优化。
library(igraph)
library(tidyverse)
graph.data.frame(df) %>%
components() %>%
pluck(membership) %>%
stack() %>%
set_names(c('GRP', 'id_1')) %>%
right_join(df %>% mutate(id_1 = as.factor(id_1)), by = c('id_1')) %>%
select(GRP, id_3) %>%
graph.data.frame() %>%
components() %>%
pluck(membership) %>%
stack() %>%
set_names(c('GRP', 'id_3')) %>%
right_join(df %>% mutate(id_3 = as.factor(id_3)), by = c('id_3'))
#> GRP id_3 id_1 id_2 Name desired_output
#> 1 1 Z3 ABC 1A Whosebug1 1
#> 2 1 Z2 ABC 2A Whosebug2 1
#> 3 2 Z1 BCD 3A Whosebug3 2
#> 4 2 Z1 DEF 4A Whosebug5 2
#> 5 2 Z1 NOP 15A Whosebug15 2
#> 6 1 Z4 CDE 1A Whosebug4 1
#> 7 3 Z5 EFG 5A Whosebug6 3
#> 8 3 Z5 GHI 6A Whosebug7 3
#> 9 3 Z6 HIJ 8A Whosebug8 3
#> 10 3 Z6 GHI 7A Whosebug11 3
#> 11 4 Z7 IJK 9A Whosebug9 4
#> 12 5 Z8 JKL 10A Whosebug10 5
#> 13 5 Z8 KLM 12A Whosebug12 5
#> 14 6 Z9 LMN 13A Whosebug13 6
#> 15 6 Z9 MNO 14A Whosebug14 6
由 reprex package (v2.0.1)
于 2021-11-15 创建
创建由 id 列和行号定义的顶点之间的所有连接的列表(函数 f
)。最后你只对行之间的连接感兴趣。
f <- function(vec){
i <- last(vec)
vec <- head(vec, -1)
c(
seq_len(length(vec) - 1) %>% map(~vec[.x:(.x+1)]),
vec %>% map(~c(i, .x))
)
}
df$desired_output <- df %>%
select(matches("^id_[0-9]+$")) %>%
mutate(row = row_number()) %>%
pmap(~f(c(...))) %>%
flatten() %>%
reduce(rbind) %>%
igraph::graph_from_edgelist() %>%
components() %>%
membership() %>%
.[as.character(seq_len(nrow(df)))]
编辑
想象一下 id 之间的联系。您对行之间的连接感兴趣。为此,您需要为每一行添加顶点。这些顶点连接到该行中的所有 ID。
第 6 行示例:
6 EFG 5A Z5
我们对 id 之间的联系感兴趣(c
函数 f
中的第一部分:
[[1]]
[1] "EFG" "5A"
[[2]]
[1] "5A" "Z5"
以及行和 id 之间的连接(f
中 c
的第二部分):
[[1]]
[1] "6" "EFG"
[[2]]
[1] "6" "5A"
[[3]]
[1] "6" "Z5"
当您以这种方式创建图表时,您会得到:
并且您对连接了哪些行顶点感兴趣
注
您可以在为此结果创建图表时使用 directed = FALSE
,或者如果您对此感兴趣,可以在 components
中使用 mode = "strong"
。
更新解决方案
如果只得到所需的列,它可能是另一种选择:
library(dplyr)
library(purrr)
library(igraph)
df %>%
select(starts_with("id")) %>%
pmap_dfr(~ as.data.frame(t(combn(c(...), 2)))) %>%
graph_from_data_frame(directed = TRUE) %>%
components() %>%
groups() -> lst
df %>%
rowwise() %>%
mutate(grp = seq_len(length(lst))[map_lgl(lst, ~ id_1 %in% .x)])
# A tibble: 15 x 5
# Rowwise:
id_1 id_2 id_3 Name grp
<chr> <chr> <chr> <chr> <int>
1 ABC 1A Z3 Whosebug1 1
2 ABC 2A Z2 Whosebug2 1
3 BCD 3A Z1 Whosebug3 2
4 CDE 1A Z4 Whosebug4 1
5 DEF 4A Z1 Whosebug5 2
6 EFG 5A Z5 Whosebug6 3
7 GHI 6A Z5 Whosebug7 3
8 HIJ 8A Z6 Whosebug8 3
9 IJK 9A Z7 Whosebug9 4
10 JKL 10A Z8 Whosebug10 5
11 GHI 7A Z6 Whosebug11 3
12 KLM 12A Z8 Whosebug12 5
13 LMN 13A Z9 Whosebug13 6
14 MNO 14A Z9 Whosebug14 6
15 NOP 15A Z1 Whosebug15 2
为了让你只是绘制它:
library(dplyr)
library(purrr)
library(igraph)
df %>%
mutate(id = row_number()) %>%
select(starts_with("id"), id) %>%
pmap_dfr(~ as.data.frame(t(combn(c(...), 2)))) %>%
graph_from_data_frame(directed = TRUE) %>%
plot()
您可以试试下面的代码
transform(
df,
GRP = membership(
components(
graph_from_data_frame(
reshape(
df,
direction = "long",
idvar = c("id_1", "Name"),
varying = 2:3,
v.names = "to"
)[c("id_1", "to")]
)
)
)[id_1]
)
这给出了
id_1 id_2 id_3 Name GRP
1 ABC 1A Z3 Whosebug1 1
2 ABC 2A Z2 Whosebug2 1
3 BCD 3A Z1 Whosebug3 2
4 CDE 1A Z4 Whosebug4 1
5 DEF 4A Z1 Whosebug5 2
6 EFG 5A Z5 Whosebug6 3
7 GHI 6A Z5 Whosebug7 3
8 HIJ 8A Z6 Whosebug8 3
9 IJK 9A Z7 Whosebug9 4
10 JKL 10A Z8 Whosebug10 5
11 GHI 7A Z6 Whosebug11 3
12 KLM 12A Z8 Whosebug12 5
13 LMN 13A Z9 Whosebug13 6
14 MNO 14A Z9 Whosebug14 6
15 NOP 15A Z1 Whosebug15 2
数据
> dput(df)
structure(list(id_1 = c("ABC", "ABC", "BCD", "CDE", "DEF", "EFG",
"GHI", "HIJ", "IJK", "JKL", "GHI", "KLM", "LMN", "MNO", "NOP"
), id_2 = c("1A", "2A", "3A", "1A", "4A", "5A", "6A", "8A", "9A",
"10A", "7A", "12A", "13A", "14A", "15A"), id_3 = c("Z3", "Z2",
"Z1", "Z4", "Z1", "Z5", "Z5", "Z6", "Z7", "Z8", "Z6", "Z8", "Z9",
"Z9", "Z1"), Name = c("Whosebug1", "Whosebug2", "Whosebug3",
"Whosebug4", "Whosebug5", "Whosebug6", "Whosebug7",
"Whosebug8", "Whosebug9", "Whosebug10", "Whosebug11",
"Whosebug12", "Whosebug13", "Whosebug14", "Whosebug15"
)), row.names = c(NA, -15L), class = "data.frame")
如何在 R 中同时对三个字段进行网络分析。下面是示例数据以及最后一列中的 desired output
。
df <- data.frame(
stringsAsFactors = FALSE,
id_1 = c("ABC","ABC","BCD",
"CDE","DEF","EFG","GHI","HIJ","IJK","JKL",
"GHI","KLM","LMN","MNO","NOP"),
id_2 = c("1A","2A","3A",
"1A","4A","5A","6A","8A","9A","10A","7A",
"12A","13A","14A","15A"),
id_3 = c("Z3","Z2","Z1",
"Z4","Z1","Z5","Z5","Z6","Z7","Z8","Z6","Z8",
"Z9","Z9","Z1"),
Name = c("Whosebug1",
"Whosebug2","Whosebug3","Whosebug4",
"Whosebug5","Whosebug6",
"Whosebug7","Whosebug8","Whosebug9",
"Whosebug10","Whosebug11","Whosebug12",
"Whosebug13","Whosebug14","Whosebug15"),
desired_output = c(1L,1L,2L,1L,2L,
3L,3L,3L,4L,5L,3L,5L,6L,6L,2L)
)
df
#> id_1 id_2 id_3 Name desired_output
#> 1 ABC 1A Z3 Whosebug1 1
#> 2 ABC 2A Z2 Whosebug2 1
#> 3 BCD 3A Z1 Whosebug3 2
#> 4 CDE 1A Z4 Whosebug4 1
#> 5 DEF 4A Z1 Whosebug5 2
#> 6 EFG 5A Z5 Whosebug6 3
#> 7 GHI 6A Z5 Whosebug7 3
#> 8 HIJ 8A Z6 Whosebug8 3
#> 9 IJK 9A Z7 Whosebug9 4
#> 10 JKL 10A Z8 Whosebug10 5
#> 11 GHI 7A Z6 Whosebug11 3
#> 12 KLM 12A Z8 Whosebug12 5
#> 13 LMN 13A Z9 Whosebug13 6
#> 14 MNO 14A Z9 Whosebug14 6
#> 15 NOP 15A Z1 Whosebug15 2
实际上我可以按照我自己的回答 igraph
同时对 2 个字段执行网络分析,但我无法对 2 个字段执行此操作。
请帮忙。
我目前的方法(2次迭代),我觉得可以优化。
library(igraph)
library(tidyverse)
graph.data.frame(df) %>%
components() %>%
pluck(membership) %>%
stack() %>%
set_names(c('GRP', 'id_1')) %>%
right_join(df %>% mutate(id_1 = as.factor(id_1)), by = c('id_1')) %>%
select(GRP, id_3) %>%
graph.data.frame() %>%
components() %>%
pluck(membership) %>%
stack() %>%
set_names(c('GRP', 'id_3')) %>%
right_join(df %>% mutate(id_3 = as.factor(id_3)), by = c('id_3'))
#> GRP id_3 id_1 id_2 Name desired_output
#> 1 1 Z3 ABC 1A Whosebug1 1
#> 2 1 Z2 ABC 2A Whosebug2 1
#> 3 2 Z1 BCD 3A Whosebug3 2
#> 4 2 Z1 DEF 4A Whosebug5 2
#> 5 2 Z1 NOP 15A Whosebug15 2
#> 6 1 Z4 CDE 1A Whosebug4 1
#> 7 3 Z5 EFG 5A Whosebug6 3
#> 8 3 Z5 GHI 6A Whosebug7 3
#> 9 3 Z6 HIJ 8A Whosebug8 3
#> 10 3 Z6 GHI 7A Whosebug11 3
#> 11 4 Z7 IJK 9A Whosebug9 4
#> 12 5 Z8 JKL 10A Whosebug10 5
#> 13 5 Z8 KLM 12A Whosebug12 5
#> 14 6 Z9 LMN 13A Whosebug13 6
#> 15 6 Z9 MNO 14A Whosebug14 6
由 reprex package (v2.0.1)
于 2021-11-15 创建创建由 id 列和行号定义的顶点之间的所有连接的列表(函数 f
)。最后你只对行之间的连接感兴趣。
f <- function(vec){
i <- last(vec)
vec <- head(vec, -1)
c(
seq_len(length(vec) - 1) %>% map(~vec[.x:(.x+1)]),
vec %>% map(~c(i, .x))
)
}
df$desired_output <- df %>%
select(matches("^id_[0-9]+$")) %>%
mutate(row = row_number()) %>%
pmap(~f(c(...))) %>%
flatten() %>%
reduce(rbind) %>%
igraph::graph_from_edgelist() %>%
components() %>%
membership() %>%
.[as.character(seq_len(nrow(df)))]
编辑
想象一下 id 之间的联系。您对行之间的连接感兴趣。为此,您需要为每一行添加顶点。这些顶点连接到该行中的所有 ID。
第 6 行示例:
6 EFG 5A Z5
我们对 id 之间的联系感兴趣(c
函数 f
中的第一部分:
[[1]]
[1] "EFG" "5A"
[[2]]
[1] "5A" "Z5"
以及行和 id 之间的连接(f
中 c
的第二部分):
[[1]]
[1] "6" "EFG"
[[2]]
[1] "6" "5A"
[[3]]
[1] "6" "Z5"
当您以这种方式创建图表时,您会得到:
并且您对连接了哪些行顶点感兴趣
注
您可以在为此结果创建图表时使用 directed = FALSE
,或者如果您对此感兴趣,可以在 components
中使用 mode = "strong"
。
更新解决方案 如果只得到所需的列,它可能是另一种选择:
library(dplyr)
library(purrr)
library(igraph)
df %>%
select(starts_with("id")) %>%
pmap_dfr(~ as.data.frame(t(combn(c(...), 2)))) %>%
graph_from_data_frame(directed = TRUE) %>%
components() %>%
groups() -> lst
df %>%
rowwise() %>%
mutate(grp = seq_len(length(lst))[map_lgl(lst, ~ id_1 %in% .x)])
# A tibble: 15 x 5
# Rowwise:
id_1 id_2 id_3 Name grp
<chr> <chr> <chr> <chr> <int>
1 ABC 1A Z3 Whosebug1 1
2 ABC 2A Z2 Whosebug2 1
3 BCD 3A Z1 Whosebug3 2
4 CDE 1A Z4 Whosebug4 1
5 DEF 4A Z1 Whosebug5 2
6 EFG 5A Z5 Whosebug6 3
7 GHI 6A Z5 Whosebug7 3
8 HIJ 8A Z6 Whosebug8 3
9 IJK 9A Z7 Whosebug9 4
10 JKL 10A Z8 Whosebug10 5
11 GHI 7A Z6 Whosebug11 3
12 KLM 12A Z8 Whosebug12 5
13 LMN 13A Z9 Whosebug13 6
14 MNO 14A Z9 Whosebug14 6
15 NOP 15A Z1 Whosebug15 2
为了让你只是绘制它:
library(dplyr)
library(purrr)
library(igraph)
df %>%
mutate(id = row_number()) %>%
select(starts_with("id"), id) %>%
pmap_dfr(~ as.data.frame(t(combn(c(...), 2)))) %>%
graph_from_data_frame(directed = TRUE) %>%
plot()
您可以试试下面的代码
transform(
df,
GRP = membership(
components(
graph_from_data_frame(
reshape(
df,
direction = "long",
idvar = c("id_1", "Name"),
varying = 2:3,
v.names = "to"
)[c("id_1", "to")]
)
)
)[id_1]
)
这给出了
id_1 id_2 id_3 Name GRP
1 ABC 1A Z3 Whosebug1 1
2 ABC 2A Z2 Whosebug2 1
3 BCD 3A Z1 Whosebug3 2
4 CDE 1A Z4 Whosebug4 1
5 DEF 4A Z1 Whosebug5 2
6 EFG 5A Z5 Whosebug6 3
7 GHI 6A Z5 Whosebug7 3
8 HIJ 8A Z6 Whosebug8 3
9 IJK 9A Z7 Whosebug9 4
10 JKL 10A Z8 Whosebug10 5
11 GHI 7A Z6 Whosebug11 3
12 KLM 12A Z8 Whosebug12 5
13 LMN 13A Z9 Whosebug13 6
14 MNO 14A Z9 Whosebug14 6
15 NOP 15A Z1 Whosebug15 2
数据
> dput(df)
structure(list(id_1 = c("ABC", "ABC", "BCD", "CDE", "DEF", "EFG",
"GHI", "HIJ", "IJK", "JKL", "GHI", "KLM", "LMN", "MNO", "NOP"
), id_2 = c("1A", "2A", "3A", "1A", "4A", "5A", "6A", "8A", "9A",
"10A", "7A", "12A", "13A", "14A", "15A"), id_3 = c("Z3", "Z2",
"Z1", "Z4", "Z1", "Z5", "Z5", "Z6", "Z7", "Z8", "Z6", "Z8", "Z9",
"Z9", "Z1"), Name = c("Whosebug1", "Whosebug2", "Whosebug3",
"Whosebug4", "Whosebug5", "Whosebug6", "Whosebug7",
"Whosebug8", "Whosebug9", "Whosebug10", "Whosebug11",
"Whosebug12", "Whosebug13", "Whosebug14", "Whosebug15"
)), row.names = c(NA, -15L), class = "data.frame")