R - 比较两个分层列表之间的公共元素的循环

Question

一段时间以来，我一直在尝试构建一个矩阵，该矩阵由两个分层列表之间的共同元素计数填充。

这是一些虚拟数据：

site<-c('A','A','A','A','A','A','A','A','A','B','B','B','B','B','B')
group<-c('A1','A1','A2','A2','A2','A3','A3','A3','A3', 
'B1','B1','B2','B2','B2','B2')
element<-c("red","orange","blue","black","white", "black","cream","yellow","purple","red","orange","blue","white","gray","salmon")
d<-cbind(site,group,element)

我创建了一个列表结构，假设由于每个列表中 os 个元素的数量不同，它是程序化的。另外，因为我不希望每个 pos 组之间的比较，但只在站点之间进行比较。

#first level list - by site
sitelist<-split(nodmod, list(nodmod$site),drop = TRUE)
#list by group 
nestedlist <- lapply(sitelist, function(x) split(x, x[['mod']], drop = TRUE))

我的目的是创建一个 table 或矩阵，其中包含来自两个站点的组之间的共同元素数（我的原始数据有其他站点）。像这样：

    A1  A2  A3
B1  2   0   0
B2  0   2   0

这个问题的嵌套性质对我来说很有挑战性。我对列表不太熟悉，因为我已经使用数据框解决了很多问题os。我的尝试归结为这一点。我觉得它得到了 close，但是对于循环的正确语法有很多缺点。

t <- outer(1:length(d$A),
         1:length(d$B),
         FUN=function(i,j){
           sapply(1:length(i),
                  FUN=function(x) 
                    length(intersect(d$A[[i]]$element, d$B[[j]]$element)) )
         })

如有任何帮助，我们将不胜感激。如果解决了类似的问题，我们深表歉意。我已经在互联网上搜索过，但没有找到它，或者没有理解使其可以转移到我的解决方案。

Answer 1

# example dataset
site<-c('A','A','A','A','A','A','A','A','A','B','B','B','B','B','B')
group<-c('A1','A1','A2','A2','A2','A3','A3','A3','A3', 
         'B1','B1','B2','B2','B2','B2')
element<-c("red","orange","blue","black","white", "black","cream","yellow","purple","red","orange","blue","white","gray","salmon")
d<-cbind(site,group,element)

library(tidyverse)

# save as dataframe
d = data.frame(d)

expand.grid(groupA = unique(d$group[d$site=="A"]),
            groupB = unique(d$group[d$site=="B"])) %>%               # get all combinations of A and B columns
  rowwise() %>%                                                      # for each row
  mutate(counts = length(intersect(d$element[d$group==groupA], 
                                   d$element[d$group==groupB]))) %>% # count common elements
  spread(groupA, counts) %>%                                         # reshape data
  data.frame() %>%                                                   
  column_to_rownames("groupB")

#    A1 A2 A3
# B1  2  0  0
# B2  0  2  0

您可以使用将（自动）应用于每一行的向量化函数代替 rowwise，如下所示：

# create a function and vectorise it
CountCommonElements = function(x, y) length(intersect(d$element[d$group==x], d$element[d$group==y]))
CountCommonElements = Vectorize(CountCommonElements)

expand.grid(groupA = unique(d$group[d$site=="A"]),
            groupB = unique(d$group[d$site=="B"])) %>%                                                              
  mutate(counts = CountCommonElements(groupA, groupB)) %>% 
  spread(groupA, counts) %>%                                       
  data.frame() %>%                                                   
  column_to_rownames("groupB")

#    A1 A2 A3
# B1  2  0  0
# B2  0  2  0

Answer 2

考虑矩阵乘法 x %*% y（参见 ?matmult），方法是通过独特的 group[= 创建独特的 element 值的辅助矩阵21=] 值在每个相应的单元格中分配一个。然后运行矩阵乘法作为自身的转置，后跟行和列的子集：

# EMPTY MATRIX helper_mat <- matrix(0, nrow=length(unique(element)), ncol=length(unique(group)), dimnames=list(unique(element), unique(group))) # ASSIGN 1's AT SELECT LOCATIONS for(i in seq_along(site)) { helper_mat[element[i], group[i]] <- 1 } helper_mat # A1 A2 A3 B1 B2 # red 1 0 0 1 0 # orange 1 0 0 1 0 # blue 0 1 0 0 1 # black 0 1 1 0 0 # white 0 1 0 0 1 # cream 0 0 1 0 0 # yellow 0 0 1 0 0 # purple 0 0 1 0 0 # gray 0 0 0 0 1 # salmon 0 0 0 0 1 # MATRIX MULTIPLICATION WITH SUBSET final_mat <- t(helper_mat) %*% helper_mat final_mat <- final_mat[grep("B", rownames(final_mat)), grep("A", colnames(final_mat))] final_mat # A1 A2 A3 # B1 2 0 0 # B2 0 2 0

由于@Lamia，版本更短：

helper_mat <- table(element, group) final_mat <- t(helper_mat) %*% helper_mat # ALTERNATIVELY: crossprod(helper_mat) final_mat <- final_mat[grep("B", rownames(final_mat)), grep("A", colnames(final_mat))] final_mat # group # group A1 A2 A3 # B1 2 0 0 # B2 0 2 0

Answer 3

@Parfait 使用矩阵乘法的类似方法。您可能需要尝试使用数据生成将其扩展到您的应用程序：

site<-c('A','A','A','A','A','A','A','A','A','B','B','B','B','B','B')
group<-c('A1','A1','A2','A2','A2','A3','A3','A3','A3', 
         'B1','B1','B2','B2','B2','B2')
element<-c("red","orange","blue","black","white", "black","cream","yellow","purple","red","orange","blue","white","gray","salmon")

d<-data.frame(group, el = as.factor(element), stringsAsFactors = FALSE)


As <- d[group %in% paste0("A", 1:3), ]
Bs <- d[group %in% paste0("B", 1:2), ]

A_mat <- as.matrix(table(As))
B_mat <- as.matrix(table(Bs))

结果：

> A_mat
         el
group black blue cream gray orange purple red salmon white yellow
   A1     0    0     0    0      1      0   1      0     0      0
   A2     1    1     0    0      0      0   0      0     1      0
   A3     1    0     1    0      0      1   0      0     0      1


> B_mat
         el
group black blue cream gray orange purple red salmon white yellow
   B1     0    0     0    0      1      0   1      0     0      0
   B2     0    1     0    1      0      0   0      1     1      0


> B_mat %*% t(A_mat)
     group
group A1 A2 A3
   B1  2  0  0
   B2  0  2  0

R - 比较两个分层列表之间的公共元素的循环

R - A loop comparing elements in common between two hierarchical lists

loops

r

list

sapply