从 Tidy Dataframes 创建节点和边 Dataframes

Creating Nodes and Edges Dataframes from Tidy Dataframes

我有一个具有这种结构的数据框:

df <- data.frame(var1 = c(1,1,1,2,2,3,3,3,3),
                 cat1 = c("A","B","D","B","C","D","E","B","A"))`

> df
  var1 cat1
1    1    A
2    1    B
3    1    D
4    2    B
5    2    C
6    3    D
7    3    E
8    3    B
9    3    A

我希望从中创建节点和边数据帧,以便我可以使用 VisNetwork 绘制网络图。此网络将显示 number/strength 不同 cat1 值之间的连接,按 var1 值分组。

我已对节点数据框进行排序:

nodes <- data.frame(id = unique(df$cat1))
> nodes
  id
1  A
2  B
3  D
4  C
5  E

我需要帮助的是如何按以下方式处理 df: 对于 df 中 var1 的每个不同值,计算与 var1 的值共有的节点组,以给出最终看起来像下面的边数据帧。请注意,我并不关心沿边缘的流动方向。我只需要它们连接即可。

> edges
  from to value
1    A  B     2
2    A  D     2
3    A  E     1
4    B  C     1
5    B  D     2
6    B  E     1
7    D  E     1

感谢期待, 内维尔

更新:我发现了 一个类似的问题,并修改了该代码以提供,这接近我想要的,但还不完全...

    > df %>% group_by(var1) %>%
             filter(n()>=2) %>% group_by(var1) %>%
             do(data.frame(t(combn(.$cat1, 2,function(x) sort(x))), 
                           stringsAsFactors=FALSE))

# A tibble: 10 x 3
# Groups:   var1 [3]
    var1 X1    X2   
   <dbl> <chr> <chr>
 1    1. A     B    
 2    1. A     D    
 3    1. B     D    
 4    2. B     C    
 5    3. D     E    
 6    3. B     D    
 7    3. A     D    
 8    3. B     E    
 9    3. A     E    
10    3. A     B  

不知道是否已经有合适的函数来完成这个任务。这是执行此操作的详细过程。有了这个,您应该能够定义自己的功能。希望对您有所帮助!

# create an adjacency matrix
mat <- table(df)
mat <- t(mat) %*% mat 
as.table(mat) # look at your adjacency matrix
# since the network is not directed, we can consider only the (strictly) upper triangular matrix 
mat[lower.tri(mat, diag = TRUE)] <- 0
as.table(mat) # look at the new adjacency matrix

library(dplyr)
edges <- as.data.frame(as.table(mat))
edges <- filter(edges, Freq != 0)
colnames(edges) <- c("from", "to", "value")
edges <- arrange(edges, from)
edges # output

#  from to value
#1    A  B     2
#2    A  D     2
#3    A  E     1
#4    B  C     1
#5    B  D     2
#6    B  E     1
#7    D  E     1

还有其他几种方法...

在基础 R 中...

values <- unique(df$var1[duplicated(df$var1)])

do.call(rbind,
  lapply(values, function(i) {
    nodes <- as.character(df$cat1[df$var1 == i])
    edges <- combn(nodes, 2)
    data.frame(from = edges[1, ],
               to = edges[2, ],
               value = i,
               stringsAsFactors = F)
  })
)

在 tidyverse 中...

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  filter(n() >= 2) %>%
  mutate(cat1 = as.character(cat1)) %>% 
  summarise(edges = list(data.frame(t(combn(cat1, 2)), stringsAsFactors = F))) %>%
  unnest(edges) %>% 
  select(from = X1, to = X2, value = var1)

在 tidyverse 中使用 tidyr::complete...

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  mutate(cat1 = as.character(cat1)) %>% 
  mutate(i.cat1 = cat1) %>% 
  complete(cat1, i.cat1) %>% 
  filter(cat1 < i.cat1) %>% 
  select(from = cat1, to = i.cat1, value = var1)

在 tidyverse 中使用 tidyr::expand...

library(dplyr)
library(tidyr)

df %>%
  group_by(var1) %>%
  mutate(cat1 = as.character(cat1)) %>%
  expand(cat1, to = cat1) %>% 
  filter(cat1 < to) %>% 
  select(from = cat1, to, value = var1)