R:如何在 R 3.2.1 中使用格子并行化多面板绘图?
R: How to Parallelize multi-panel plotting with lattice in R 3.2.1?
我是 R 编程的新手,想知道如何 运行 在 12 个网格对象 上并行 plot
lattice
包。
基本上,经过很多预处理步骤后,我有以下命令:
plot(adhd_plot, split = c(1,1,4,3)) #plot adhd trellis object at 1,1 in a grid of 4 by 3 i.e 4 COLUMNS x 3 ROWS
plot(bpd_plot, split = c(2,1,4,3), newpage = F) #plot bpd trellis object in 2nd Column in a grid of 4colx3row
plot(bmi_plot, split = c(3,1,4,3), newpage = F)
plot(dbp_plot, split = c(4,1,4,3), newpage = F)
plot(height_plot, split = c(1,2,4,3), newpage = F)
plot(hdl_plot, split = c(2,2,4,3), newpage = F)
plot(ldl_plot, split = c(3,2,4,3), newpage = F)
plot(ra_plot, split = c(4,2,4,3), newpage = F)
plot(sbp_plot, split = c(1,3,4,3), newpage = F)
plot(scz_plot, split = c(2,3,4,3), newpage = F)
plot(tc_plot, split = c(3,3,4,3), newpage = F)
plot(tg_plot, split = c(4,3,4,3), newpage = F)
问题是,虽然上述命令有效,但它们在 Mac OSX 上需要很长时间 (>4hrs) 才能生成如下图以下 :
由于我的 Mac 有 8 个核心,我认为我应该尝试将 plot 命令拆分到不同的核心以加快绘图速度。
在搜索其他并行化问题后,我找到了 doParallel
包并认为我可以在其中实现 parLapply
函数,如下所示:
library(doParallel)
detectCores()
cl <- makeCluster(6) #6 out of 8 cores
registerdoParallel(cl)
parLapply(cl, list_of_all_trellis_objects, plot)
但是,我不确定如何使用上述 parLapply
命令中的 split
参数将绘图放置在网格上的不同位置。
我一定需要12个地块分开放置,不叠加,怎么办?
感谢您完成我的查询,期待您的提示和解决方案。
如评论中所述,无法并行写入绘图设备。
一些加快单个地块绘制的解决方法:
减少QQ剧情点数,见:
应用这些技巧可以更快地加载数据:
http://cbio.ensmp.fr/~thocking/reading-large-text-files-into-R.html
您可以尝试 draw/save 并行绘制多个图(其中每个图都使用第 1 点和第 2 点中的方法),但写入磁盘可能会导致严重的瓶颈。
编辑:
这里是绘制快速qq图的粗略代码:
https://github.com/vforget/fastqq
代码如下:
find_conf_intervals = function(row){
i = row[1]
len = row[2]
if (i < 10000 | i %% 100 == 0){
return(c(-log10(qbeta(0.95,i,len-i+1)), -log10(qbeta(0.05,i,len-i+1))))
} else { # Speed up
return(c(NA,NA))
}
}
confidence.intervals <- function(e){
xspace = 0.078
print("1")
ci = apply(cbind( 1:length(e), rep(length(e),length(e))), MARGIN=1, FUN=find_conf_intervals)
print("2")
bks = append(seq(10000,length(e),100),length(e)+1)
print("3")
for (i in 1:(length(bks)-1)){
ci[1, bks[i]:(bks[i+1]-1)] = ci[1, bks[i]]
ci[2, bks[i]:(bks[i+1]-1)] = ci[2, bks[i]]
}
colnames(ci) = names(e)
## Extrapolate to make plotting prettier (doesn't affect intepretation at data points)
slopes = c((ci[1,1] - ci[1,2]) / (e[1] - e[2]), (ci[2,1] - ci[2,2]) / (e[1] - e[2]))
print("4")
extrap_x = append(e[1]+xspace,e) ## extrapolate slightly for plotting purposes only
extrap_y = cbind( c(ci[1,1] + slopes[1]*xspace, ci[2,1] + slopes[2]*xspace), ci)
print("5")
polygon(c(extrap_x, rev(extrap_x)), c(extrap_y[1,], rev(extrap_y[2,])),
col = "grey81", border = "grey81")
}
quant.subsample <- function(y, m=100, e=1) {
## m: size of a systematic sample
## e: number of extreme values at either end to use
x <- sort(y)
n <- length(x)
quants <- (1 + sin(1:m / (m+1) * pi - pi/2))/2
sort(c(x[1:e], quantile(x, probs=quants), x[(n+1-e):n]))
## Returns m + 2*e sorted values from the EDF of y
}
get.points <- function(pv) {
suppressWarnings(as.numeric(pv))
names(d) = names(pv)
d = d[!is.na(d)]
d = d[d>0 & d<1]
d = d[order(d,decreasing=F)]
y = -log10(d)
x = -log10( ppoints(length(d) ))
m <- 0.001 * length(x)
e <- floor(0.0005 * length(x))
return(list(x=quant.subsample(x, m, e), y=quant.subsample(y, m, e)))
}
fqq <- function(x, y, ...) {
plot(0,
col=FALSE,
xlim=range(x),
ylim=range(y),
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
...)
abline(0,1,col=2)
points(x,y, ...)
}
args <- commandArgs(trailingOnly = TRUE)
pv.f = args[1]
qq.f = args[2]
nrows = as.numeric(args[3])
message(Sys.time())
message("READING")
d <- read.table(pv.f, header=TRUE, sep=" ", nrows=nrows, colClasses=c("numeric"))
message(Sys.time())
message("LAMBDA")
chisq <- qchisq(1-d$P_VAL,1)
lambda = median(chisq)/qchisq(0.5,1)
message(Sys.time())
message("PLOTING")
p <- get.points(d$P_VAL)
png(file=qq.f)
fqq(p$x, p$y, main=paste(pv.f, lambda, sep="\n"), cex.axis=1.5, cex.lab=1.5)
dev.off()
message(Sys.time())
我是 R 编程的新手,想知道如何 运行 在 12 个网格对象 上并行 plot
lattice
包。
基本上,经过很多预处理步骤后,我有以下命令:
plot(adhd_plot, split = c(1,1,4,3)) #plot adhd trellis object at 1,1 in a grid of 4 by 3 i.e 4 COLUMNS x 3 ROWS
plot(bpd_plot, split = c(2,1,4,3), newpage = F) #plot bpd trellis object in 2nd Column in a grid of 4colx3row
plot(bmi_plot, split = c(3,1,4,3), newpage = F)
plot(dbp_plot, split = c(4,1,4,3), newpage = F)
plot(height_plot, split = c(1,2,4,3), newpage = F)
plot(hdl_plot, split = c(2,2,4,3), newpage = F)
plot(ldl_plot, split = c(3,2,4,3), newpage = F)
plot(ra_plot, split = c(4,2,4,3), newpage = F)
plot(sbp_plot, split = c(1,3,4,3), newpage = F)
plot(scz_plot, split = c(2,3,4,3), newpage = F)
plot(tc_plot, split = c(3,3,4,3), newpage = F)
plot(tg_plot, split = c(4,3,4,3), newpage = F)
问题是,虽然上述命令有效,但它们在 Mac OSX 上需要很长时间 (>4hrs) 才能生成如下图以下 :
在搜索其他并行化问题后,我找到了 doParallel
包并认为我可以在其中实现 parLapply
函数,如下所示:
library(doParallel)
detectCores()
cl <- makeCluster(6) #6 out of 8 cores
registerdoParallel(cl)
parLapply(cl, list_of_all_trellis_objects, plot)
但是,我不确定如何使用上述 parLapply
命令中的 split
参数将绘图放置在网格上的不同位置。
我一定需要12个地块分开放置,不叠加,怎么办?
感谢您完成我的查询,期待您的提示和解决方案。
如评论中所述,无法并行写入绘图设备。
一些加快单个地块绘制的解决方法:
减少QQ剧情点数,见:
应用这些技巧可以更快地加载数据:
http://cbio.ensmp.fr/~thocking/reading-large-text-files-into-R.html
您可以尝试 draw/save 并行绘制多个图(其中每个图都使用第 1 点和第 2 点中的方法),但写入磁盘可能会导致严重的瓶颈。
编辑:
这里是绘制快速qq图的粗略代码:
https://github.com/vforget/fastqq
代码如下:
find_conf_intervals = function(row){
i = row[1]
len = row[2]
if (i < 10000 | i %% 100 == 0){
return(c(-log10(qbeta(0.95,i,len-i+1)), -log10(qbeta(0.05,i,len-i+1))))
} else { # Speed up
return(c(NA,NA))
}
}
confidence.intervals <- function(e){
xspace = 0.078
print("1")
ci = apply(cbind( 1:length(e), rep(length(e),length(e))), MARGIN=1, FUN=find_conf_intervals)
print("2")
bks = append(seq(10000,length(e),100),length(e)+1)
print("3")
for (i in 1:(length(bks)-1)){
ci[1, bks[i]:(bks[i+1]-1)] = ci[1, bks[i]]
ci[2, bks[i]:(bks[i+1]-1)] = ci[2, bks[i]]
}
colnames(ci) = names(e)
## Extrapolate to make plotting prettier (doesn't affect intepretation at data points)
slopes = c((ci[1,1] - ci[1,2]) / (e[1] - e[2]), (ci[2,1] - ci[2,2]) / (e[1] - e[2]))
print("4")
extrap_x = append(e[1]+xspace,e) ## extrapolate slightly for plotting purposes only
extrap_y = cbind( c(ci[1,1] + slopes[1]*xspace, ci[2,1] + slopes[2]*xspace), ci)
print("5")
polygon(c(extrap_x, rev(extrap_x)), c(extrap_y[1,], rev(extrap_y[2,])),
col = "grey81", border = "grey81")
}
quant.subsample <- function(y, m=100, e=1) {
## m: size of a systematic sample
## e: number of extreme values at either end to use
x <- sort(y)
n <- length(x)
quants <- (1 + sin(1:m / (m+1) * pi - pi/2))/2
sort(c(x[1:e], quantile(x, probs=quants), x[(n+1-e):n]))
## Returns m + 2*e sorted values from the EDF of y
}
get.points <- function(pv) {
suppressWarnings(as.numeric(pv))
names(d) = names(pv)
d = d[!is.na(d)]
d = d[d>0 & d<1]
d = d[order(d,decreasing=F)]
y = -log10(d)
x = -log10( ppoints(length(d) ))
m <- 0.001 * length(x)
e <- floor(0.0005 * length(x))
return(list(x=quant.subsample(x, m, e), y=quant.subsample(y, m, e)))
}
fqq <- function(x, y, ...) {
plot(0,
col=FALSE,
xlim=range(x),
ylim=range(y),
xlab=expression(Expected~~-log[10](italic(p))),
ylab=expression(Observed~~-log[10](italic(p))),
...)
abline(0,1,col=2)
points(x,y, ...)
}
args <- commandArgs(trailingOnly = TRUE)
pv.f = args[1]
qq.f = args[2]
nrows = as.numeric(args[3])
message(Sys.time())
message("READING")
d <- read.table(pv.f, header=TRUE, sep=" ", nrows=nrows, colClasses=c("numeric"))
message(Sys.time())
message("LAMBDA")
chisq <- qchisq(1-d$P_VAL,1)
lambda = median(chisq)/qchisq(0.5,1)
message(Sys.time())
message("PLOTING")
p <- get.points(d$P_VAL)
png(file=qq.f)
fqq(p$x, p$y, main=paste(pv.f, lambda, sep="\n"), cex.axis=1.5, cex.lab=1.5)
dev.off()
message(Sys.time())