R,dplyr:获取矩阵列表中重复单元格的数量

R, dplyr: Get number of repeated cells in list of matrices

编辑

添加一个 4x4 矩阵的例子来测试

矩阵:

> mat1
[,1] [,2] [,3] [,4]
[1,]    0    0    1    0
[2,]    0    1    0    1
[3,]    1    1    1    1
[4,]    0    1    0    1
> mat2
[,1] [,2] [,3] [,4]
[1,]    1    1    1    0
[2,]    1    1    1    1
[3,]    1    1    1    1
[4,]    0    1    0    0
> mat3
[,1] [,2] [,3] [,4]
[1,]    1    1    0    1
[2,]    1    0    0    1
[3,]    0    0    0    1
[4,]    1    0    1    0
> mat4
[,1] [,2] [,3] [,4]
[1,]    0    0    1    0
[2,]    0    0    0    0
[3,]    0    0    1    1
[4,]    0    0    1    1


sample_list <- list(mat1, mat2, mat3, mat4)
m12     <- as.matrix(sample_list[[1]] + sample_list[[2]]) 
sum_row <- rowSums(m12*(m12 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m12 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame()%>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep2    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep2 = sum_row - num_rep) %>%
           select(., id, sum_rep2)


m123    <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]]) 
sum_row <- rowSums(m123*(m123 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m123 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep3    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep3 = sum_row - num_rep) %>%
           select(., id, sum_rep3)


m1234   <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]] + sample_list[[4]]) 
sum_row <- rowSums(m1234*(m1234 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m1234 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep4    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep4 = sum_row - num_rep) %>%
           select(., id, sum_rep4)


all_reps <- full_join(rep2, rep3) %>%
            full_join(., rep4)

最终输出:

id sum_rep2 sum_rep3 sum_rep4
1        1        3        4
2        2        4        4
3        4        5        7
4        1        1        3

原版POST

我有一个邻接矩阵列表。我正在尝试计算每次观察重复填充单元格的次数。我的目标是按顺序对矩阵执行此操作。所以,M1和M2的重复次数,然后是M1,M2,M3,等等

我正在尝试创建一个函数来使用列表中提供的尽可能多的矩阵来执行此操作。下面的代码是我一直用来一步一个脚印的代码。

示例矩阵:

set.seed(0)

mat1 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat2 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat3 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)
mat4 <- matrix(sample(0:1, 10*10, replace=TRUE),10,10) %>% 
        replace(., col(.) == row(.), 0)

sample_list <- list(mat1, mat2, mat3, mat4)

我用过的代码:

这里我按行计算单元格的总和,填充的单元格数大于1。我从总行总和中减去单元格数得到重复的总数。

m12     <- as.matrix(sample_list[[1]] + sample_list[[2]]) 
sum_row <- rowSums(m12*(m12 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m12 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame()%>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep2    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep2 = sum_row - num_rep) %>%
           select(., id, sum_rep2)


m123    <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]]) 
sum_row <- rowSums(m123*(m123 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m123 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep3    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep3 = sum_row - num_rep) %>%
           select(., id, sum_rep3)



m1234   <- as.matrix(sample_list[[1]] + sample_list[[2]] + sample_list[[3]] + sample_list[[4]]) 
sum_row <- rowSums(m1234*(m1234 > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "sum_row" = ".")
num_rep <- m1234 %>% 
           apply(.,1,function(x) sum(x > 1)) %>% as.data.frame() %>% 
           rowid_to_column(., var = "id") %>% 
           rename(., "num_rep" = ".")
rep4    <- full_join(sum_row, num_rep) %>%
           mutate(sum_rep4 = sum_row - num_rep) %>%
           select(., id, sum_rep4)


all_reps <- full_join(rep2, rep3) %>%
            full_join(., rep4)

最终输出 all_reps 为我提供了这个数据集(使用我在第一段代码中创建的随机矩阵):

  id sum_rep2 sum_rep3 sum_rep4
1   1        1        7       10
2   2        3        8       10
3   3        1        5       10
4   4        0        1        4
5   5        2        4        9
6   6        0        2        4
7   7        3        6       10
8   8        5        8       12
9   9        3        7       11
10 10        3        9       12

有没有一种方法可以使用循环或应用函数以更自动化的方式执行此操作,这也将获取更多矩阵的列表?

矩阵:

sample_list[[1]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    1    0    1    1    0    0    1    1     1
 [2,]    0    0    0    1    1    1    0    1    1     0
 [3,]    1    0    0    1    1    0    1    1    1     0
 [4,]    0    0    0    0    0    1    1    0    0     1
 [5,]    0    0    0    1    0    1    1    0    0     1
 [6,]    1    0    0    0    1    0    1    0    0     0
 [7,]    0    1    0    0    1    0    0    1    1     0
 [8,]    0    1    1    1    1    1    1    0    0     0
 [9,]    0    1    0    0    1    1    0    0    0     1
[10,]    1    1    0    1    0    1    1    0    0     0

sample_list[[2]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    0    1    0    0    0    1    1    0     0
 [2,]    1    0    1    0    1    1    0    0    1     0
 [3,]    1    0    0    0    0    0    0    0    0     1
 [4,]    1    0    0    0    0    0    0    1    0     0
 [5,]    0    1    0    0    0    0    1    0    0     1
 [6,]    0    1    1    1    0    0    0    0    0     1
 [7,]    0    1    0    0    1    1    0    1    0     1
 [8,]    0    1    0    1    1    1    1    0    1     0
 [9,]    1    0    1    1    1    1    0    1    0     1
[10,]    0    1    1    1    0    1    0    1    1     0

sample_list[[3]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    0    0    1    1    1    1    1    1     1
 [2,]    1    0    1    0    1    0    1    1    1     0
 [3,]    1    0    0    0    1    1    0    1    1     0
 [4,]    0    0    1    0    0    0    0    0    0     1
 [5,]    1    1    1    0    0    0    0    1    1     1
 [6,]    1    0    1    0    0    0    0    0    1     0
 [7,]    1    0    0    1    1    0    0    0    1     1
 [8,]    0    0    0    1    0    1    0    0    1     1
 [9,]    0    1    0    1    1    0    0    1    0     0
[10,]    1    1    0    1    0    0    1    1    1     0

sample_list[[4]]
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
 [1,]    0    1    1    0    0    0    0    0    0     1
 [2,]    0    0    0    0    1    0    0    0    1     0
 [3,]    0    0    0    1    1    1    1    0    1     0
 [4,]    0    0    1    0    1    0    0    1    0     1
 [5,]    0    0    0    1    0    1    1    1    0     1
 [6,]    0    0    0    0    1    0    1    0    0     0
 [7,]    0    0    1    1    0    1    0    1    1     0
 [8,]    1    0    1    1    0    0    1    0    1     0
 [9,]    1    0    1    0    1    1    1    0    0     0
[10,]    1    0    1    1    1    0    0    0    0     0

我认为这可以满足您的需求:

library(tidyverse)

accumulate(sample_list, `+`) %>%
  tail(-1) %>%
  map(~ rowSums(pmax(.x - 1, 0))) %>%
  bind_cols(.name_repair = ~ paste0("sum_rep", seq_along(.x)+1)) %>%
  rowid_to_column()

# A tibble: 4 x 4
  rowid sum_rep2 sum_rep3 sum_rep4
  <int>    <dbl>    <dbl>    <dbl>
1     1        0        0        0
2     2        0        1        2
3     3        1        3        4
4     4        0        0        2

示例数据:

set.seed(0)
n <- 4
sample_list <- replicate(4, `diag<-`(matrix(sample(0:1, n*n, replace=TRUE),n), 0), simplify = FALSE)