从 for (i in range) 向 mclapply() 传递一个参数
Passing mclapply() a parameter from for (i in range)
我正在尝试这样做:
nmf.sub <- function(n){
sub.data.matrix <- data.matrix[, (index[n, ])] ## the index is a permutation of the original matrix at a 0.8 resampling proportion (doesn't really matter)
temp.result <- nmf(sub.data.matrix, rank = 2, seed = 12345) ## want to change 2 to i
return(temp.result)
}
class.list <- list()
for (i in nmf.rank){ ## nmf.rank is 2:4
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations, function(n) nmf.sub(n)) ## resamp.iterations is 10, nmf.sub is defined above
}
但是我不想让 temp.result 的 nmf 中的 rank = 2,而是让 rank = i
知道如何将那个参数传递给它吗?只是通过 mclapply 作为 function(n, i) 传递它是行不通的。
您似乎有两个循环:一个用于 nmf.rank
中的 i,一个用于 1:resamp.iterations
中的 n。因此,您需要将 i
和 n
都传递给 nmf.sub
,例如比如:
nmf.sub <- function(n, i){
## the index is a permutation of the original matrix at a 0.8
## resampling proportion (doesn't really matter)
sub.data.matrix <- data.matrix[, (index[n, ])]
## want to change 2 to i
temp.result <- nmf(sub.data.matrix, rank = i, seed = 12345)
return(temp.result)
}
resamp.iterations <- 10
nmf.rank <- 2:4
res <- lapply(nmf.rank, function(i){
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations,
function(n) nmf.sub(n,i))
})
## then you can flatten/reshape res
关于您对效率的评论(如下):大部分数值计算是在 nmf() 函数中执行的,因此循环设置正确,因为每个 process/core 都得到一个数值计算密集的工作。但是,为了加快计算速度,您可以考虑使用先前计算的结果,而不是种子 12345(除非由于与您的问题相关的某些原因,必须使用后一个种子)。在以下示例中,我的执行时间减少了 30-40%:
library(NMF)
RNGkind("L'Ecuyer-CMRG") ## always use this when using mclapply()
nr <- 19
nc <- 2e2
set.seed(123)
data.matrix <- matrix(rexp(nc*nr),nr,nc)
resamp.iterations <- 10
nmf.rank <- 2:4
index <- t(sapply(1:resamp.iterations, function(n) sample.int(nc,nc*0.8)))
nmf.sub <- function(n, i){
sub.data.matrix <- data.matrix[ ,index[n, ]]
temp.result <- nmf(sub.data.matrix, rank = i, seed = 12345)
return(temp.result)
}
## version 1
system.time({
res <- lapply(nmf.rank, function(i){
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations,
function(n) nmf.sub(n,i))
})
})
## version 2: swap internal and external loops
system.time({
res <-
mclapply(mc.cores=16, 1:resamp.iterations, function(n){
res2 <- nmf(data.matrix[ ,index[n, ]], rank=2, seed = 12345)
res3 <- nmf(data.matrix[ ,index[n, ]], rank=3, seed = 12345)
res4 <- nmf(data.matrix[ ,index[n, ]], rank=4, seed = 12345)
list(res2,res3,res4)
})
})
## version 3: use previous calculation as starting point
## ==> 30-40% reduction in computing time
system.time({
res <-
mclapply(mc.cores=16, 1:resamp.iterations, function(n){
res2 <- nmf(data.matrix[ ,index[n, ]], rank=2, seed = 12345)
res3 <- nmf(data.matrix[ ,index[n, ]], rank=3, seed = res2)
res4 <- nmf(data.matrix[ ,index[n, ]], rank=4, seed = res3)
list(res2,res3,res4)
})
})
我正在尝试这样做:
nmf.sub <- function(n){
sub.data.matrix <- data.matrix[, (index[n, ])] ## the index is a permutation of the original matrix at a 0.8 resampling proportion (doesn't really matter)
temp.result <- nmf(sub.data.matrix, rank = 2, seed = 12345) ## want to change 2 to i
return(temp.result)
}
class.list <- list()
for (i in nmf.rank){ ## nmf.rank is 2:4
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations, function(n) nmf.sub(n)) ## resamp.iterations is 10, nmf.sub is defined above
}
但是我不想让 temp.result 的 nmf 中的 rank = 2,而是让 rank = i
知道如何将那个参数传递给它吗?只是通过 mclapply 作为 function(n, i) 传递它是行不通的。
您似乎有两个循环:一个用于 nmf.rank
中的 i,一个用于 1:resamp.iterations
中的 n。因此,您需要将 i
和 n
都传递给 nmf.sub
,例如比如:
nmf.sub <- function(n, i){
## the index is a permutation of the original matrix at a 0.8
## resampling proportion (doesn't really matter)
sub.data.matrix <- data.matrix[, (index[n, ])]
## want to change 2 to i
temp.result <- nmf(sub.data.matrix, rank = i, seed = 12345)
return(temp.result)
}
resamp.iterations <- 10
nmf.rank <- 2:4
res <- lapply(nmf.rank, function(i){
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations,
function(n) nmf.sub(n,i))
})
## then you can flatten/reshape res
关于您对效率的评论(如下):大部分数值计算是在 nmf() 函数中执行的,因此循环设置正确,因为每个 process/core 都得到一个数值计算密集的工作。但是,为了加快计算速度,您可以考虑使用先前计算的结果,而不是种子 12345(除非由于与您的问题相关的某些原因,必须使用后一个种子)。在以下示例中,我的执行时间减少了 30-40%:
library(NMF)
RNGkind("L'Ecuyer-CMRG") ## always use this when using mclapply()
nr <- 19
nc <- 2e2
set.seed(123)
data.matrix <- matrix(rexp(nc*nr),nr,nc)
resamp.iterations <- 10
nmf.rank <- 2:4
index <- t(sapply(1:resamp.iterations, function(n) sample.int(nc,nc*0.8)))
nmf.sub <- function(n, i){
sub.data.matrix <- data.matrix[ ,index[n, ]]
temp.result <- nmf(sub.data.matrix, rank = i, seed = 12345)
return(temp.result)
}
## version 1
system.time({
res <- lapply(nmf.rank, function(i){
results.list <- mclapply(mc.cores = 16, 1:resamp.iterations,
function(n) nmf.sub(n,i))
})
})
## version 2: swap internal and external loops
system.time({
res <-
mclapply(mc.cores=16, 1:resamp.iterations, function(n){
res2 <- nmf(data.matrix[ ,index[n, ]], rank=2, seed = 12345)
res3 <- nmf(data.matrix[ ,index[n, ]], rank=3, seed = 12345)
res4 <- nmf(data.matrix[ ,index[n, ]], rank=4, seed = 12345)
list(res2,res3,res4)
})
})
## version 3: use previous calculation as starting point
## ==> 30-40% reduction in computing time
system.time({
res <-
mclapply(mc.cores=16, 1:resamp.iterations, function(n){
res2 <- nmf(data.matrix[ ,index[n, ]], rank=2, seed = 12345)
res3 <- nmf(data.matrix[ ,index[n, ]], rank=3, seed = res2)
res4 <- nmf(data.matrix[ ,index[n, ]], rank=4, seed = res3)
list(res2,res3,res4)
})
})