从 Tidy Dataframes 创建节点和边 Dataframes
Creating Nodes and Edges Dataframes from Tidy Dataframes
我有一个具有这种结构的数据框:
df <- data.frame(var1 = c(1,1,1,2,2,3,3,3,3),
cat1 = c("A","B","D","B","C","D","E","B","A"))`
> df
var1 cat1
1 1 A
2 1 B
3 1 D
4 2 B
5 2 C
6 3 D
7 3 E
8 3 B
9 3 A
我希望从中创建节点和边数据帧,以便我可以使用 VisNetwork 绘制网络图。此网络将显示 number/strength 不同 cat1 值之间的连接,按 var1 值分组。
我已对节点数据框进行排序:
nodes <- data.frame(id = unique(df$cat1))
> nodes
id
1 A
2 B
3 D
4 C
5 E
我需要帮助的是如何按以下方式处理 df:
对于 df 中 var1 的每个不同值,计算与 var1 的值共有的节点组,以给出最终看起来像下面的边数据帧。请注意,我并不关心沿边缘的流动方向。我只需要它们连接即可。
> edges
from to value
1 A B 2
2 A D 2
3 A E 1
4 B C 1
5 B D 2
6 B E 1
7 D E 1
感谢期待,
内维尔
更新:我发现了 一个类似的问题,并修改了该代码以提供,这接近我想要的,但还不完全...
> df %>% group_by(var1) %>%
filter(n()>=2) %>% group_by(var1) %>%
do(data.frame(t(combn(.$cat1, 2,function(x) sort(x))),
stringsAsFactors=FALSE))
# A tibble: 10 x 3
# Groups: var1 [3]
var1 X1 X2
<dbl> <chr> <chr>
1 1. A B
2 1. A D
3 1. B D
4 2. B C
5 3. D E
6 3. B D
7 3. A D
8 3. B E
9 3. A E
10 3. A B
不知道是否已经有合适的函数来完成这个任务。这是执行此操作的详细过程。有了这个,您应该能够定义自己的功能。希望对您有所帮助!
# create an adjacency matrix
mat <- table(df)
mat <- t(mat) %*% mat
as.table(mat) # look at your adjacency matrix
# since the network is not directed, we can consider only the (strictly) upper triangular matrix
mat[lower.tri(mat, diag = TRUE)] <- 0
as.table(mat) # look at the new adjacency matrix
library(dplyr)
edges <- as.data.frame(as.table(mat))
edges <- filter(edges, Freq != 0)
colnames(edges) <- c("from", "to", "value")
edges <- arrange(edges, from)
edges # output
# from to value
#1 A B 2
#2 A D 2
#3 A E 1
#4 B C 1
#5 B D 2
#6 B E 1
#7 D E 1
还有其他几种方法...
在基础 R 中...
values <- unique(df$var1[duplicated(df$var1)])
do.call(rbind,
lapply(values, function(i) {
nodes <- as.character(df$cat1[df$var1 == i])
edges <- combn(nodes, 2)
data.frame(from = edges[1, ],
to = edges[2, ],
value = i,
stringsAsFactors = F)
})
)
在 tidyverse 中...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
filter(n() >= 2) %>%
mutate(cat1 = as.character(cat1)) %>%
summarise(edges = list(data.frame(t(combn(cat1, 2)), stringsAsFactors = F))) %>%
unnest(edges) %>%
select(from = X1, to = X2, value = var1)
在 tidyverse 中使用 tidyr::complete
...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
mutate(cat1 = as.character(cat1)) %>%
mutate(i.cat1 = cat1) %>%
complete(cat1, i.cat1) %>%
filter(cat1 < i.cat1) %>%
select(from = cat1, to = i.cat1, value = var1)
在 tidyverse 中使用 tidyr::expand
...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
mutate(cat1 = as.character(cat1)) %>%
expand(cat1, to = cat1) %>%
filter(cat1 < to) %>%
select(from = cat1, to, value = var1)
我有一个具有这种结构的数据框:
df <- data.frame(var1 = c(1,1,1,2,2,3,3,3,3),
cat1 = c("A","B","D","B","C","D","E","B","A"))`
> df
var1 cat1
1 1 A
2 1 B
3 1 D
4 2 B
5 2 C
6 3 D
7 3 E
8 3 B
9 3 A
我希望从中创建节点和边数据帧,以便我可以使用 VisNetwork 绘制网络图。此网络将显示 number/strength 不同 cat1 值之间的连接,按 var1 值分组。
我已对节点数据框进行排序:
nodes <- data.frame(id = unique(df$cat1))
> nodes
id
1 A
2 B
3 D
4 C
5 E
我需要帮助的是如何按以下方式处理 df: 对于 df 中 var1 的每个不同值,计算与 var1 的值共有的节点组,以给出最终看起来像下面的边数据帧。请注意,我并不关心沿边缘的流动方向。我只需要它们连接即可。
> edges
from to value
1 A B 2
2 A D 2
3 A E 1
4 B C 1
5 B D 2
6 B E 1
7 D E 1
感谢期待, 内维尔
更新:我发现了
> df %>% group_by(var1) %>%
filter(n()>=2) %>% group_by(var1) %>%
do(data.frame(t(combn(.$cat1, 2,function(x) sort(x))),
stringsAsFactors=FALSE))
# A tibble: 10 x 3
# Groups: var1 [3]
var1 X1 X2
<dbl> <chr> <chr>
1 1. A B
2 1. A D
3 1. B D
4 2. B C
5 3. D E
6 3. B D
7 3. A D
8 3. B E
9 3. A E
10 3. A B
不知道是否已经有合适的函数来完成这个任务。这是执行此操作的详细过程。有了这个,您应该能够定义自己的功能。希望对您有所帮助!
# create an adjacency matrix
mat <- table(df)
mat <- t(mat) %*% mat
as.table(mat) # look at your adjacency matrix
# since the network is not directed, we can consider only the (strictly) upper triangular matrix
mat[lower.tri(mat, diag = TRUE)] <- 0
as.table(mat) # look at the new adjacency matrix
library(dplyr)
edges <- as.data.frame(as.table(mat))
edges <- filter(edges, Freq != 0)
colnames(edges) <- c("from", "to", "value")
edges <- arrange(edges, from)
edges # output
# from to value
#1 A B 2
#2 A D 2
#3 A E 1
#4 B C 1
#5 B D 2
#6 B E 1
#7 D E 1
还有其他几种方法...
在基础 R 中...
values <- unique(df$var1[duplicated(df$var1)])
do.call(rbind,
lapply(values, function(i) {
nodes <- as.character(df$cat1[df$var1 == i])
edges <- combn(nodes, 2)
data.frame(from = edges[1, ],
to = edges[2, ],
value = i,
stringsAsFactors = F)
})
)
在 tidyverse 中...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
filter(n() >= 2) %>%
mutate(cat1 = as.character(cat1)) %>%
summarise(edges = list(data.frame(t(combn(cat1, 2)), stringsAsFactors = F))) %>%
unnest(edges) %>%
select(from = X1, to = X2, value = var1)
在 tidyverse 中使用 tidyr::complete
...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
mutate(cat1 = as.character(cat1)) %>%
mutate(i.cat1 = cat1) %>%
complete(cat1, i.cat1) %>%
filter(cat1 < i.cat1) %>%
select(from = cat1, to = i.cat1, value = var1)
在 tidyverse 中使用 tidyr::expand
...
library(dplyr)
library(tidyr)
df %>%
group_by(var1) %>%
mutate(cat1 = as.character(cat1)) %>%
expand(cat1, to = cat1) %>%
filter(cat1 < to) %>%
select(from = cat1, to, value = var1)