如何在 R 中优化 `expand.grid` 或 `combn` 的使用
how to optimize the use of `expand.grid` or `combn` in R
我有一个字符向量 v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
,我想将它们组合起来准备一个完整的实验设计。所以我想生成一个 data.frame
,每一行都有一组 n
元素和尽可能多的行
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12
## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)
## TEST 2 :
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) :
# valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
# NAs introduced by coercion to integer range
似乎适用于 n <- 8
但不适用于 n <- 12
。如何渡过这个问题
也许这对你有帮助。包含6个变量的所有组合(最少1个,最多1个),共63个:
# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})
# total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63
# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
z <- rep(0, length(vars))
z[y] <- 1
z
})})
df <- as.data.frame(t(do.call("cbind", L2)))
df
# piment aubergine carotte oignon chou pommeDeTerre
# 1 1 0 0 0 0 0
# 2 0 1 0 0 0 0
# 3 0 0 1 0 0 0
# 4 0 0 0 1 0 0
# 5 0 0 0 0 1 0
# 6 0 0 0 0 0 1
# 7 1 1 0 0 0 0
# 8 1 0 1 0 0 0
# 9 1 0 0 1 0 0
# 10 1 0 0 0 1 0
# 11 1 0 0 0 0 1
# 12 0 1 1 0 0 0
# 13 0 1 0 1 0 0
# 14 0 1 0 0 1 0
# 15 0 1 0 0 0 1
# 16 0 0 1 1 0 0
# 17 0 0 1 0 1 0
# 18 0 0 1 0 0 1
# 19 0 0 0 1 1 0
# 20 0 0 0 1 0 1
# 21 0 0 0 0 1 1
# 22 1 1 1 0 0 0
# 23 1 1 0 1 0 0
# 24 1 1 0 0 1 0
# 25 1 1 0 0 0 1
# 26 1 0 1 1 0 0
# 27 1 0 1 0 1 0
# 28 1 0 1 0 0 1
# 29 1 0 0 1 1 0
# 30 1 0 0 1 0 1
# 31 1 0 0 0 1 1
# 32 0 1 1 1 0 0
# 33 0 1 1 0 1 0
# 34 0 1 1 0 0 1
# 35 0 1 0 1 1 0
# 36 0 1 0 1 0 1
# 37 0 1 0 0 1 1
# 38 0 0 1 1 1 0
# 39 0 0 1 1 0 1
# 40 0 0 1 0 1 1
# 41 0 0 0 1 1 1
# 42 1 1 1 1 0 0
# 43 1 1 1 0 1 0
# 44 1 1 1 0 0 1
# 45 1 1 0 1 1 0
# 46 1 1 0 1 0 1
# 47 1 1 0 0 1 1
# 48 1 0 1 1 1 0
# 49 1 0 1 1 0 1
# 50 1 0 1 0 1 1
# 51 1 0 0 1 1 1
# 52 0 1 1 1 1 0
# 53 0 1 1 1 0 1
# 54 0 1 1 0 1 1
# 55 0 1 0 1 1 1
# 56 0 0 1 1 1 1
# 57 1 1 1 1 1 0
# 58 1 1 1 1 0 1
# 59 1 1 1 0 1 1
# 60 1 1 0 1 1 1
# 61 1 0 1 1 1 1
# 62 0 1 1 1 1 1
# 63 1 1 1 1 1 1
您可以使用 gtools
包中的 combinations
。
作为 r=5
的示例,但也适用于 r=12
:
library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
[,1] [,2] [,3] [,4] [,5]
[1,] "aubergine" "aubergine" "aubergine" "aubergine" "aubergine"
[2,] "aubergine" "aubergine" "aubergine" "aubergine" "carotte"
[3,] "aubergine" "aubergine" "aubergine" "aubergine" "chou"
[4,] "aubergine" "aubergine" "aubergine" "aubergine" "oignon"
[5,] "aubergine" "aubergine" "aubergine" "aubergine" "piment"
[6,] "aubergine" "aubergine" "aubergine" "aubergine" "pommeDeTerre"
[7,] "aubergine" "aubergine" "aubergine" "carotte" "carotte"
...
我根据您想要的行和 n 做了一个假设,您想要一个带有替换的集合,所以所有的都可以发生不止一次。我还假设您永远不会使用 n 为 12 的所有十亿组合。那么我的功能是做什么的。
它会给你一个独特的随机样本,所有样本都互不相同。
mysamples <- function(options, build = list(), samples, rows) {
if (length(build) < rows) {
build[[length(build) + 1]] <- sample(options, samples, replace = T)
build <- unique(build)
mysamples(options, build, samples, rows)
} else {
return(build)
}
}
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)
# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)
[[1]]
[1] "carotte" "aubergine" "piment" "pommeDeTerre" "pommeDeTerre"
[[2]]
[1] "carotte" "oignon" "aubergine" "chou" "oignon"
[[3]]
[1] "piment" "carotte" "chou" "pommeDeTerre" "carotte"
[[4]]
[1] "oignon" "oignon" "aubergine" "carotte" "pommeDeTerre"
[[5]]
[1] "oignon" "chou" "piment" "aubergine" "piment"
[[6]]
[1] "chou" "aubergine" "chou" "aubergine" "oignon"
[[7]]
[1] "chou" "aubergine" "carotte" "carotte" "carotte"
[[8]]
[1] "aubergine" "aubergine" "carotte" "carotte" "oignon"
[[9]]
[1] "carotte" "carotte" "carotte" "carotte" "aubergine"
[[10]]
[1] "piment" "aubergine" "aubergine" "chou" "oignon"
mysamples(options = v, samples = 50, rows = 2)
[[1]]
[1] "pommeDeTerre" "carotte" "aubergine" "aubergine" "pommeDeTerre" "oignon" "carotte" "aubergine" "pommeDeTerre" "chou" "chou" "carotte" "pommeDeTerre"
[14] "piment" "carotte" "oignon" "piment" "chou" "chou" "pommeDeTerre" "piment" "oignon" "carotte" "aubergine" "pommeDeTerre" "piment"
[27] "aubergine" "pommeDeTerre" "chou" "pommeDeTerre" "pommeDeTerre" "carotte" "oignon" "piment" "oignon" "piment" "chou" "pommeDeTerre" "carotte"
[40] "carotte" "oignon" "chou" "oignon" "pommeDeTerre" "chou" "oignon" "oignon" "oignon" "carotte" "chou"
[[2]]
[1] "aubergine" "piment" "oignon" "piment" "oignon" "oignon" "piment" "chou" "chou" "carotte" "chou" "pommeDeTerre" "piment"
[14] "chou" "chou" "piment" "aubergine" "pommeDeTerre" "chou" "aubergine" "chou" "piment" "carotte" "pommeDeTerre" "chou" "pommeDeTerre"
[27] "oignon" "pommeDeTerre" "piment" "oignon" "piment" "oignon" "carotte" "oignon" "pommeDeTerre" "oignon" "piment" "piment" "carotte"
[40] "piment" "aubergine" "chou" "oignon" "oignon" "pommeDeTerre" "oignon" "oignon" "aubergine" "piment" "aubergine"
我有一个字符向量 v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
,我想将它们组合起来准备一个完整的实验设计。所以我想生成一个 data.frame
,每一行都有一组 n
元素和尽可能多的行
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
n <- 12
## TEST 1 : crach R
tmp <- data.frame(matrix(rep(v,n), ncol = n))
expand.grid(tmp)
## TEST 2 :
temp = t(combn(rep(v,nbslot), nbslot))
#Error in matrix(r, nrow = len.r, ncol = count) :
# valeur 'ncol' incorrecte (trop grande ou NA)
#De plus : Warning message:
#In combn(rep(v, nbslot), nbslot) :
# NAs introduced by coercion to integer range
似乎适用于 n <- 8
但不适用于 n <- 12
。如何渡过这个问题
也许这对你有帮助。包含6个变量的所有组合(最少1个,最多1个),共63个:
# variables
vars <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# combinations by number of variables
L <- lapply(seq(vars), FUN = function(m){combn(x = seq(vars), m = m)})
# total number of combinations
(S <- sum(unlist(lapply(L, ncol)))) # 63
# a data.frame of the combinations
L2 <- lapply(L, FUN = function(x){apply(x, 2, function(y){
z <- rep(0, length(vars))
z[y] <- 1
z
})})
df <- as.data.frame(t(do.call("cbind", L2)))
df
# piment aubergine carotte oignon chou pommeDeTerre
# 1 1 0 0 0 0 0
# 2 0 1 0 0 0 0
# 3 0 0 1 0 0 0
# 4 0 0 0 1 0 0
# 5 0 0 0 0 1 0
# 6 0 0 0 0 0 1
# 7 1 1 0 0 0 0
# 8 1 0 1 0 0 0
# 9 1 0 0 1 0 0
# 10 1 0 0 0 1 0
# 11 1 0 0 0 0 1
# 12 0 1 1 0 0 0
# 13 0 1 0 1 0 0
# 14 0 1 0 0 1 0
# 15 0 1 0 0 0 1
# 16 0 0 1 1 0 0
# 17 0 0 1 0 1 0
# 18 0 0 1 0 0 1
# 19 0 0 0 1 1 0
# 20 0 0 0 1 0 1
# 21 0 0 0 0 1 1
# 22 1 1 1 0 0 0
# 23 1 1 0 1 0 0
# 24 1 1 0 0 1 0
# 25 1 1 0 0 0 1
# 26 1 0 1 1 0 0
# 27 1 0 1 0 1 0
# 28 1 0 1 0 0 1
# 29 1 0 0 1 1 0
# 30 1 0 0 1 0 1
# 31 1 0 0 0 1 1
# 32 0 1 1 1 0 0
# 33 0 1 1 0 1 0
# 34 0 1 1 0 0 1
# 35 0 1 0 1 1 0
# 36 0 1 0 1 0 1
# 37 0 1 0 0 1 1
# 38 0 0 1 1 1 0
# 39 0 0 1 1 0 1
# 40 0 0 1 0 1 1
# 41 0 0 0 1 1 1
# 42 1 1 1 1 0 0
# 43 1 1 1 0 1 0
# 44 1 1 1 0 0 1
# 45 1 1 0 1 1 0
# 46 1 1 0 1 0 1
# 47 1 1 0 0 1 1
# 48 1 0 1 1 1 0
# 49 1 0 1 1 0 1
# 50 1 0 1 0 1 1
# 51 1 0 0 1 1 1
# 52 0 1 1 1 1 0
# 53 0 1 1 1 0 1
# 54 0 1 1 0 1 1
# 55 0 1 0 1 1 1
# 56 0 0 1 1 1 1
# 57 1 1 1 1 1 0
# 58 1 1 1 1 0 1
# 59 1 1 1 0 1 1
# 60 1 1 0 1 1 1
# 61 1 0 1 1 1 1
# 62 0 1 1 1 1 1
# 63 1 1 1 1 1 1
您可以使用 gtools
包中的 combinations
。
作为 r=5
的示例,但也适用于 r=12
:
library(gtools)
combinations(length(v),v,r=5,repeats.allowed = T)
[,1] [,2] [,3] [,4] [,5]
[1,] "aubergine" "aubergine" "aubergine" "aubergine" "aubergine"
[2,] "aubergine" "aubergine" "aubergine" "aubergine" "carotte"
[3,] "aubergine" "aubergine" "aubergine" "aubergine" "chou"
[4,] "aubergine" "aubergine" "aubergine" "aubergine" "oignon"
[5,] "aubergine" "aubergine" "aubergine" "aubergine" "piment"
[6,] "aubergine" "aubergine" "aubergine" "aubergine" "pommeDeTerre"
[7,] "aubergine" "aubergine" "aubergine" "carotte" "carotte"
...
我根据您想要的行和 n 做了一个假设,您想要一个带有替换的集合,所以所有的都可以发生不止一次。我还假设您永远不会使用 n 为 12 的所有十亿组合。那么我的功能是做什么的。
它会给你一个独特的随机样本,所有样本都互不相同。
mysamples <- function(options, build = list(), samples, rows) {
if (length(build) < rows) {
build[[length(build) + 1]] <- sample(options, samples, replace = T)
build <- unique(build)
mysamples(options, build, samples, rows)
} else {
return(build)
}
}
v <- c("piment","aubergine","carotte","oignon","chou","pommeDeTerre")
# note that trying all combinations first would be 11441304000 combinations!
mysamples(options = v, samples = 50, rows = 1000)
# smaller sample allowing to show all results below
mysamples(options = v, samples = 5, rows = 10)
[[1]]
[1] "carotte" "aubergine" "piment" "pommeDeTerre" "pommeDeTerre"
[[2]]
[1] "carotte" "oignon" "aubergine" "chou" "oignon"
[[3]]
[1] "piment" "carotte" "chou" "pommeDeTerre" "carotte"
[[4]]
[1] "oignon" "oignon" "aubergine" "carotte" "pommeDeTerre"
[[5]]
[1] "oignon" "chou" "piment" "aubergine" "piment"
[[6]]
[1] "chou" "aubergine" "chou" "aubergine" "oignon"
[[7]]
[1] "chou" "aubergine" "carotte" "carotte" "carotte"
[[8]]
[1] "aubergine" "aubergine" "carotte" "carotte" "oignon"
[[9]]
[1] "carotte" "carotte" "carotte" "carotte" "aubergine"
[[10]]
[1] "piment" "aubergine" "aubergine" "chou" "oignon"
mysamples(options = v, samples = 50, rows = 2)
[[1]]
[1] "pommeDeTerre" "carotte" "aubergine" "aubergine" "pommeDeTerre" "oignon" "carotte" "aubergine" "pommeDeTerre" "chou" "chou" "carotte" "pommeDeTerre"
[14] "piment" "carotte" "oignon" "piment" "chou" "chou" "pommeDeTerre" "piment" "oignon" "carotte" "aubergine" "pommeDeTerre" "piment"
[27] "aubergine" "pommeDeTerre" "chou" "pommeDeTerre" "pommeDeTerre" "carotte" "oignon" "piment" "oignon" "piment" "chou" "pommeDeTerre" "carotte"
[40] "carotte" "oignon" "chou" "oignon" "pommeDeTerre" "chou" "oignon" "oignon" "oignon" "carotte" "chou"
[[2]]
[1] "aubergine" "piment" "oignon" "piment" "oignon" "oignon" "piment" "chou" "chou" "carotte" "chou" "pommeDeTerre" "piment"
[14] "chou" "chou" "piment" "aubergine" "pommeDeTerre" "chou" "aubergine" "chou" "piment" "carotte" "pommeDeTerre" "chou" "pommeDeTerre"
[27] "oignon" "pommeDeTerre" "piment" "oignon" "piment" "oignon" "carotte" "oignon" "pommeDeTerre" "oignon" "piment" "piment" "carotte"
[40] "piment" "aubergine" "chou" "oignon" "oignon" "pommeDeTerre" "oignon" "oignon" "aubergine" "piment" "aubergine"