计算该矩阵的行和列的组合在数据帧中出现多少次的矩阵

Matrix that counts how many times the combination of a row and column of that matrix are present in a dataframe

我对 R 还是很陌生,所以请多多包涵 :)

我需要创建一个矩阵来计算该矩阵的行和列的组合在数据框中出现的次数。

由于我的描述可能比较含糊,我在下面给出了一个例子。实际上,我的数据集将在矩阵中包含更多的水果,在数据框中包含更多的果汁,所以我正在寻找一种有效的方法来解决这个问题。

#Whosebug example
#Create empty matrix ----
newMatrix <- matrix(0, nrow = 5, ncol = 5)
colnames(newMatrix) <- c("Apple", "Pear", "Orange", "Mango", "Banana")
rownames(newMatrix) <- c("Apple", "Pear", "Orange", "Mango", "Banana")

#Create dataframe ----
newDf <- data.frame(c("Juice 1", "Juice 2", "Juice 3", "Juice 4","Juice 5"),
                    c("Banana", "Banana", "Orange", "Pear", "Apple"),
                    c("Pear", "Orange", "Pear", "Apple", "Pear"),
                    c("Orange", "Mango", NA, NA, NA))
colnames(newDf) <- c("Juice", "Fruit 1", "Fruit 2", "Fruit 3")

我想创建一个遍历 newMatrix 中每个元素的 for 循环,如果列和行的组合出现在 newDf 的一行中,则添加 +1。
所以本质上,有多少果汁是苹果和梨的组合,有多少果汁是苹果和芒果的组合,等等。

输出应如下所示:

       Apple Pear Orange Mango Banana
Apple      0    2      0     0      0
Pear       2    0      2     0      1
Orange     0    2      0     1      2
Mango      0    0      1     0      1
Banana     0    1      2     1      0

我开始尝试创建一个 for 循环,但我卡在了 if 部分:

for (i in 1:nrow(adj_matrix)){
  for (j in 1:ncol(adj_matrix)) {
    if (???)
      adj_matrix[i,j] <- adj_matrix[i,j] + 1
  }
}

有人可以帮我解决这个问题吗?将不胜感激!

使用基数 R,您可以组合您的值,然后使用 igraph 获得邻接矩阵:

library(igraph)

m <- do.call(cbind, apply(newDf[-1], 1, \(x) if(sum(complete.cases(x)) >= 2) combn(x, m = 2) else x, simplify = F))
g <- graph_from_data_frame(na.omit(t(m)), directed = F)
get.adjacency(g, sparse = F)

       Banana Pear Orange Apple Mango
Banana      0    1      2     0     1
Pear        1    0      2     2     0
Orange      2    2      0     0     1
Apple       0    2      0     0     0
Mango       1    0      1     0     0

它可能有点令人费解,但您也可以将 igraphtidyverse 包一起使用:

library(igraph)
library(tidyverse)

newDf %>% 
  pivot_longer(-Juice) %>% 
  group_by(Juice) %>% 
  summarise(new = ifelse(n() > 1, paste(combn(na.omit(value), 2), collapse = "-"), value)) %>% 
  separate_rows(new, sep = "(?:[^-]*(?:-[^-]*){1})\K-") %>% 
  separate(new, into = c("X1", "X2")) %>% 
  select(-Juice) %>% 
  graph_from_data_frame(directed = FALSE) %>% 
  get.adjacency(sparse = FALSE)

       Banana Pear Orange Apple Mango
Banana      0    1      2     0     1
Pear        1    0      2     2     0
Orange      2    2      0     0     1
Apple       0    2      0     0     0
Mango       1    0      1     0     0

for循环可以这样写

cb <- combn(2:4, 2)  ## cols combinations newDf 

## initialize adj_matrix
v <- c("Apple", "Pear", "Orange", "Mango", "Banana")
adj_matrix <- matrix(0, length(v), length(v), dimnames=list(v, v))

for (k in seq_len(nrow(newDf))) {
    for (l in seq_len(ncol(cb))) {
        x <- unlist(newDf[k, cb[, l]])
        if (length(x[!is.na(x)]) == 2) {
        adj_matrix[x[1], x[2]] <- adj_matrix[x[1], x[2]] + 1
        adj_matrix[x[2], x[1]] <- adj_matrix[x[2], x[1]] + 1
        }
    }
}

adj_matrix
#        Apple Pear Orange Mango Banana
# Apple      0    2      0     0      0
# Pear       2    0      2     0      1
# Orange     0    2      0     1      2
# Mango      0    0      1     0      1
# Banana     0    1      2     1      0

数据:

newDf <- structure(list(Juice = c("Juice 1", "Juice 2", "Juice 3", "Juice 4", 
"Juice 5"), `Fruit 1` = c("Banana", "Banana", "Orange", "Pear", 
"Apple"), `Fruit 2` = c("Pear", "Orange", "Pear", "Apple", "Pear"
), `Fruit 3` = c("Orange", "Mango", NA, NA, NA)), class = "data.frame", row.names = c(NA, 
-5L))