在原始 post "R - generate all possible pairwise combinations of binary vectors" 的基础上添加附加条件

Question

我的问题几乎已在以下 post 中完美解决。

但是，我有一个额外的条件要添加，这将使一些解决方案无效，我需要将它们删除。例如，考虑以下 6 对输出：

      [,1] [,2] [,3]
[1,]    1    0    0
[2,]    0    1    0  

[1,]    1    0    0
[2,]    0    0    1

[1,]    0    1    0
[2,]    1    0    0

[1,]    0    1    0
[2,]    0    0    1

[1,]    0    0    1
[2,]    1    0    0

[1,]    0    0    1
[2,]    0    1    0

在我的问题中，第 3、5 和 6 对需要被删除为无效。条件是，后面的向量不能比前一个向量早的位置有1。如果在第一个向量中，第 2 个位置有一个 1，那么在第二个向量中，1 可以在第 2 个或第 3 个位置，但不能在第一个。

是否可以在原 post 中 post 的解决方案中实现？由于我需要处理大量组合，是否有可能为此提供快速解决方案？

Answer 1

你可以在一个列表中得到所有这些独特的组合，在基数 R 中只有一行：

lapply(as.data.frame(combn(3, 2)), function(x) +rbind(1:3 == x[1], 1:3 == x[2]))
#> $V1
#>      [,1] [,2] [,3]
#> [1,]    1    0    0
#> [2,]    0    1    0
#> 
#> $V2
#>      [,1] [,2] [,3]
#> [1,]    1    0    0
#> [2,]    0    0    1
#> 
#> $V3
#>      [,1] [,2] [,3]
#> [1,]    0    1    0
#> [2,]    0    0    1

这适用于任何合理长度的向量。比如长度4：

lapply(as.data.frame(combn(4, 2)), function(x) +rbind(1:4 == x[1], 1:4 == x[2]))
#> $V1
#>      [,1] [,2] [,3] [,4]
#> [1,]    1    0    0    0
#> [2,]    0    1    0    0
#> 
#> $V2
#>      [,1] [,2] [,3] [,4]
#> [1,]    1    0    0    0
#> [2,]    0    0    1    0
#> 
#> $V3
#>      [,1] [,2] [,3] [,4]
#> [1,]    1    0    0    0
#> [2,]    0    0    0    1
#> 
#> $V4
#>      [,1] [,2] [,3] [,4]
#> [1,]    0    1    0    0
#> [2,]    0    0    1    0
#> 
#> $V5
#>      [,1] [,2] [,3] [,4]
#> [1,]    0    1    0    0
#> [2,]    0    0    0    1
#> 
#> $V6
#>      [,1] [,2] [,3] [,4]
#> [1,]    0    0    1    0
#> [2,]    0    0    0    1

编辑

任意长度的任意数量的向量的一般解决方案是：

get_unique <- function(n_vectors, length)
{
  df <- as.data.frame(combn(length, n_vectors))
  lapply(df, function(x) {
    +do.call(rbind, lapply(x, function(i) seq(length) == i))
  })
}

或者，如果允许重复：

get_unique <- function(n_vectors, length)
{
  df <- as.data.frame(cbind(combn(length, n_vectors), 
                            matrix(rep(seq(length), each = n_vectors), 
                                   ncol = length)))
  lapply(df, function(x) {
    +do.call(rbind, lapply(x, function(i) seq(length) == i))
  })
}

^{由 reprex package (v0.3.0)}

于 2020-12-12 创建

Answer 2

您可以用 1.

替换向量的第 n^th 个元素

FUN <- function(m, n, ...) {
  combn(n, m, function(i, ...) t(sapply(i, function(j, ...) `[<-`(rep(0, n), j, 1))), ...)
}
FUN(2, 3, simplify=FALSE)
# [[1]]
#       [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    1    0
# 
# [[2]]
#       [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    0    1
# 
# [[3]]
#      [,1] [,2] [,3]
# [1,]    0    1    0
# [2,]    0    0    1

圆点用于遍历可选的 simplify=FALSE 参数。如果你离开它，你会得到一个数组。不知道你喜欢什么，你可以设置一个默认。

FUN(2, 3)
# , , 1
# 
#      [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    1    0
# 
# , , 2
# 
#      [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    0    1
# 
# , , 3
# 
#      [,1] [,2] [,3]
# [1,]    0    1    0
# [2,]    0    0    1

这也适用于更多的行和列。

FUN(8, 10, simplify=FALSE)
# [[1]]
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,]    1    0    0    0    0    0    0    0    0     0
# [2,]    0    1    0    0    0    0    0    0    0     0
# [3,]    0    0    1    0    0    0    0    0    0     0
# [4,]    0    0    0    1    0    0    0    0    0     0
# [5,]    0    0    0    0    1    0    0    0    0     0
# [6,]    0    0    0    0    0    1    0    0    0     0
# [7,]    0    0    0    0    0    0    1    0    0     0
# [8,]    0    0    0    0    0    0    0    1    0     0
# 
# [[2]]
#      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
# [1,]    1    0    0    0    0    0    0    0    0     0
# [2,]    0    1    0    0    0    0    0    0    0     0
# [3,]    0    0    1    0    0    0    0    0    0     0
# [4,]    0    0    0    1    0    0    0    0    0     0
# [5,]    0    0    0    0    1    0    0    0    0     0
# [6,]    0    0    0    0    0    1    0    0    0     0
# [7,]    0    0    0    0    0    0    1    0    0     0
# [8,]    0    0    0    0    0    0    0    0    1     0
# ...

编辑 1

如果您希望将重复行作为有效矩阵，您可以使用 RcppAlgos::permuteGeneral 并检查 differences 是否都大于或等于零。

FUN2 <- function(m, n) {
  v <- RcppAlgos::permuteGeneral(n, m, rep=T)
  v <- as.data.frame(t(v[apply(v, 1, function(x) all(diff(x) >= 0)), ]))
  unname(lapply(v, function(j) t(sapply(j, function(k) `[<-`(rep(0, n), k, 1)))))
}
FUN2(2, 3)
# [[1]]
#      [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    1    0    0
# 
# [[2]]
#       [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    1    0
# 
# [[3]]
#       [,1] [,2] [,3]
# [1,]    1    0    0
# [2,]    0    0    1
# 
# [[4]]
#       [,1] [,2] [,3]
# [1,]    0    1    0
# [2,]    0    1    0
# 
# [[5]]
#       [,1] [,2] [,3]
# [1,]    0    1    0
# [2,]    0    0    1
# 
# [[6]]
#       [,1] [,2] [,3]
# [1,]    0    0    1
# [2,]    0    0    1

而且速度很快！

system.time(FUN2(5, 10))
# user  system elapsed 
# 1.31    0.00    1.40

注意， 还有一个 RcppAlgos::comboGeneral 函数类似于基础 combn 但可能更快。

编辑 2

我们可以使用 matrixStats::rowDiffs 使其更快。

FUN3 <- function(m, n) {
  v <- RcppAlgos::permuteGeneral(n, m, rep=T)
  v <- as.data.frame(t(v[apply(matrixStats::rowDiffs(v) >= 0, 1, all), ]))
  unname(lapply(v, function(j) t(sapply(j, function(k) `[<-`(rep(0, n), k, 1)))))
}
system.time(FUN3(6, 11))
# user  system elapsed 
# 3.80    0.03    3.96

在原始 post "R - generate all possible pairwise combinations of binary vectors" 的基础上添加附加条件

Adding an additional condition to original post "R - generate all possible pairwise combinations of binary vectors"

combinations

r

permutation

combinatorics

编辑 1

编辑 2