如何在 R 中优化 `expand.grid` 或 `combn` 的使用

Question

我有一个字符向量 v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")，我想将它们组合起来准备一个完整的实验设计。所以我想生成一个 data.frame ，每一行都有一组 n 元素和尽可能多的行

v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12

## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)

## TEST 2 : 
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) : 
#  valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
#  NAs introduced by coercion to integer range

似乎适用于 n <- 8 但不适用于 n <- 12。如何渡过这个问题

Answer 1

也许这对你有帮助。包含6个变量的所有组合（最少1个，最多1个），共63个：

# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")

# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})

 # total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63

# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
  z <- rep(0, length(vars))
  z[y] <- 1
  z
})})

df <- as.data.frame(t(do.call("cbind", L2)))
df

#    piment aubergine carotte oignon chou pommeDeTerre
# 1       1         0       0      0    0            0
# 2       0         1       0      0    0            0
# 3       0         0       1      0    0            0
# 4       0         0       0      1    0            0
# 5       0         0       0      0    1            0
# 6       0         0       0      0    0            1
# 7       1         1       0      0    0            0
# 8       1         0       1      0    0            0
# 9       1         0       0      1    0            0
# 10      1         0       0      0    1            0
# 11      1         0       0      0    0            1
# 12      0         1       1      0    0            0
# 13      0         1       0      1    0            0
# 14      0         1       0      0    1            0
# 15      0         1       0      0    0            1
# 16      0         0       1      1    0            0
# 17      0         0       1      0    1            0
# 18      0         0       1      0    0            1
# 19      0         0       0      1    1            0
# 20      0         0       0      1    0            1
# 21      0         0       0      0    1            1
# 22      1         1       1      0    0            0
# 23      1         1       0      1    0            0
# 24      1         1       0      0    1            0
# 25      1         1       0      0    0            1
# 26      1         0       1      1    0            0
# 27      1         0       1      0    1            0
# 28      1         0       1      0    0            1
# 29      1         0       0      1    1            0
# 30      1         0       0      1    0            1
# 31      1         0       0      0    1            1
# 32      0         1       1      1    0            0
# 33      0         1       1      0    1            0
# 34      0         1       1      0    0            1
# 35      0         1       0      1    1            0
# 36      0         1       0      1    0            1
# 37      0         1       0      0    1            1
# 38      0         0       1      1    1            0
# 39      0         0       1      1    0            1
# 40      0         0       1      0    1            1
# 41      0         0       0      1    1            1
# 42      1         1       1      1    0            0
# 43      1         1       1      0    1            0
# 44      1         1       1      0    0            1
# 45      1         1       0      1    1            0
# 46      1         1       0      1    0            1
# 47      1         1       0      0    1            1
# 48      1         0       1      1    1            0
# 49      1         0       1      1    0            1
# 50      1         0       1      0    1            1
# 51      1         0       0      1    1            1
# 52      0         1       1      1    1            0
# 53      0         1       1      1    0            1
# 54      0         1       1      0    1            1
# 55      0         1       0      1    1            1
# 56      0         0       1      1    1            1
# 57      1         1       1      1    1            0
# 58      1         1       1      1    0            1
# 59      1         1       1      0    1            1
# 60      1         1       0      1    1            1
# 61      1         0       1      1    1            1
# 62      0         1       1      1    1            1
# 63      1         1       1      1    1            1

Answer 2

您可以使用 gtools 包中的 combinations。

作为 r=5 的示例，但也适用于 r=12:

library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
       [,1]           [,2]           [,3]           [,4]           [,5]          
  [1,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "aubergine"   
  [2,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "carotte"     
  [3,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "chou"        
  [4,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "oignon"      
  [5,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "piment"      
  [6,] "aubergine"    "aubergine"    "aubergine"    "aubergine"    "pommeDeTerre"
  [7,] "aubergine"    "aubergine"    "aubergine"    "carotte"      "carotte"     
...

Answer 3

我根据您想要的行和 n 做了一个假设，您想要一个带有替换的集合，所以所有的都可以发生不止一次。我还假设您永远不会使用 n 为 12 的所有十亿组合。那么我的功能是做什么的。

它会给你一个独特的随机样本，所有样本都互不相同。

mysamples <- function(options, build = list(), samples, rows) {
  if (length(build) < rows) {
    build[[length(build) + 1]] <- sample(options, samples, replace = T)
    build <- unique(build)
    mysamples(options, build, samples, rows)
  } else {
    return(build)
  }
}

v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")

# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)

# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)

[[1]]
[1] "carotte"      "aubergine"    "piment"       "pommeDeTerre" "pommeDeTerre"

[[2]]
[1] "carotte"   "oignon"    "aubergine" "chou"      "oignon"   

[[3]]
[1] "piment"       "carotte"      "chou"         "pommeDeTerre" "carotte"     

[[4]]
[1] "oignon"       "oignon"       "aubergine"    "carotte"      "pommeDeTerre"

[[5]]
[1] "oignon"    "chou"      "piment"    "aubergine" "piment"   

[[6]]
[1] "chou"      "aubergine" "chou"      "aubergine" "oignon"   

[[7]]
[1] "chou"      "aubergine" "carotte"   "carotte"   "carotte"  

[[8]]
[1] "aubergine" "aubergine" "carotte"   "carotte"   "oignon"   

[[9]]
[1] "carotte"   "carotte"   "carotte"   "carotte"   "aubergine"

[[10]]
[1] "piment"    "aubergine" "aubergine" "chou"      "oignon" 


mysamples(options = v, samples = 50, rows = 2)

[[1]]
 [1] "pommeDeTerre" "carotte"      "aubergine"    "aubergine"    "pommeDeTerre" "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "chou"         "chou"         "carotte"      "pommeDeTerre"
[14] "piment"       "carotte"      "oignon"       "piment"       "chou"         "chou"         "pommeDeTerre" "piment"       "oignon"       "carotte"      "aubergine"    "pommeDeTerre" "piment"      
[27] "aubergine"    "pommeDeTerre" "chou"         "pommeDeTerre" "pommeDeTerre" "carotte"      "oignon"       "piment"       "oignon"       "piment"       "chou"         "pommeDeTerre" "carotte"     
[40] "carotte"      "oignon"       "chou"         "oignon"       "pommeDeTerre" "chou"         "oignon"       "oignon"       "oignon"       "carotte"      "chou"        

[[2]]
 [1] "aubergine"    "piment"       "oignon"       "piment"       "oignon"       "oignon"       "piment"       "chou"         "chou"         "carotte"      "chou"         "pommeDeTerre" "piment"      
[14] "chou"         "chou"         "piment"       "aubergine"    "pommeDeTerre" "chou"         "aubergine"    "chou"         "piment"       "carotte"      "pommeDeTerre" "chou"         "pommeDeTerre"
[27] "oignon"       "pommeDeTerre" "piment"       "oignon"       "piment"       "oignon"       "carotte"      "oignon"       "pommeDeTerre" "oignon"       "piment"       "piment"       "carotte"     
[40] "piment"       "aubergine"    "chou"         "oignon"       "oignon"       "pommeDeTerre" "oignon"       "oignon"       "aubergine"    "piment"       "aubergine"

如何在 R 中优化 `expand.grid` 或 `combn` 的使用

how to optimize the use of `expand.grid` or `combn` in R

r

combn