如何并行化涉及栅格的嵌套 for 循环?
How to parallelize a nested for loop involving rasters?
我正在处理栅格数据,并尝试为不同位置的栅格堆栈中的每个栅格裁剪和屏蔽各种缓冲区。结果是栅格列表的列表。我得到了适用于一小部分数据的代码,但现在我正在对整个数据集进行尝试,但它的运行速度非常慢。参见示例代码:
# Example data ------------------------------------------------------------
#create example raster stack
r1 = raster(nrows=1000,ncol=1000,xmn=60,xmx=90,ymn=0,ymx=25)
rr = lapply(1:100, function(i) setValues(r1,runif(ncell(r1))))
rrstack=stack()
for (i in 1:length(rr)){
stacknext=rr[[i]]
rrstack=stack(rrstack,stacknext)
}
#create example shapefile list
lats=runif(26,min=0,max=25)
lons=runif(26,min=60,max=90)
exnames=paste0("city_",letters)
coords=data.frame(names=exnames,lats=lats,lons=lons)
coords_sf = st_as_sf(coords,coords=c("lons","lats"),crs=4326,dim ="XY")
circle1=st_buffer(coords_sf, 1E3)
circle100=st_buffer(coords_sf,1E5)
circle500=st_buffer(coords_sf,5E5)
circlist=list(circle1=circle1,circle100=circle100,circle500=circle500)
circlist_reproj=lapply(circlist,function(x) st_transform(x,crs(rrstack[[1]])))
start <- proc.time()
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
mystack <- stack()
for(k in 1:nrow(circlist_reproj[[1]])) {
for(j in 1:length(circlist_reproj)) {
for (i in 1:nlayers(rrstack)){
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
}
dellist[[j]] <- mystack
mystack <- stack()
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
basetime <- proc.time()-start
#time taken for computation
basetime
user system elapsed
940.173 84.366 1029.688
如您所见,对于一组比我拥有的小的数据,计算需要一段时间。我想尝试并行化处理,但无法弄清楚如何这样做。我现在有两个问题。首先,由于嵌套 for 循环的性质,我不确定应该将哪个 for
循环更改为 foreach
。根据 post,它看起来像是第一个,但我不确定它是否代表所有嵌套的 for 循环。当我进行第一个 for
循环时 foreach
然后我得到错误 Error in { : task 1 failed - "could not find function "nlayers""
然后我尝试在 foreach
调用中添加包参数导致嵌套的 for 循环看起来喜欢
foreach(k = 1:nrow(circlist_reproj[[1]], .packages='raster')) %dopar% {
for(j in 1:length(circlist_reproj)) {
for (i in 1:nlayers(rrstack)) {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
}
dellist[[j]] <- mystack
mystack <- stack()
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
然后给出错误
unused argument (.packages = "raster")
所以我不确定如何正确地将 .packages
参数应用于 foreach
函数。我在这里做错了什么?
编辑
根据@HenrikB 的评论,我查看了我的代码并重新编写了它。我现在有以下 foreach
循环。现在代码完成了,但它导致所有空值。
cores <- detectCores()
cl <- makeCluster(cores[1]-2) #not to overload your computer
registerDoParallel(cl)
start <- proc.time()
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
mystack <- stack()
foreach(k = 1:nrow(circlist_reproj[[1]])) %:%
foreach(j = 1:length(circlist_reproj))%:%
foreach (i = 1:nlayers(rrstack), .packages=c('raster','sf')) %dopar% {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
dellist[[j]] <- mystack
mystack <- stack()
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
partime <- proc.time()-start
在接受@Henrik 的评论并稍微修改我的代码后,我能够想出一个通过并行化解决问题的解决方案,但是它比基础解决方案慢。但那是另一个 post。这是解决方案:
cores <- detectCores()
cl <- makeCluster(cores[1]-2) #not to overload your computer
registerDoParallel(cl)
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
for(k in 1:nrow(circlist_reproj[[1]])) {
for(j in 1:length(circlist_reproj)) {
parrasterstack <- foreach(i=1:nlayers(rrstack),.packages=c('raster','sf')) %dopar% {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
raster::crop(maskraster,circlist_reproj[[j]][k,])
}
parrasterstack <- stack(parrasterstack)
dellist[[j]] <- parrasterstack
parrasterstack <- NULL
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
stopCluster(cl)
我正在处理栅格数据,并尝试为不同位置的栅格堆栈中的每个栅格裁剪和屏蔽各种缓冲区。结果是栅格列表的列表。我得到了适用于一小部分数据的代码,但现在我正在对整个数据集进行尝试,但它的运行速度非常慢。参见示例代码:
# Example data ------------------------------------------------------------
#create example raster stack
r1 = raster(nrows=1000,ncol=1000,xmn=60,xmx=90,ymn=0,ymx=25)
rr = lapply(1:100, function(i) setValues(r1,runif(ncell(r1))))
rrstack=stack()
for (i in 1:length(rr)){
stacknext=rr[[i]]
rrstack=stack(rrstack,stacknext)
}
#create example shapefile list
lats=runif(26,min=0,max=25)
lons=runif(26,min=60,max=90)
exnames=paste0("city_",letters)
coords=data.frame(names=exnames,lats=lats,lons=lons)
coords_sf = st_as_sf(coords,coords=c("lons","lats"),crs=4326,dim ="XY")
circle1=st_buffer(coords_sf, 1E3)
circle100=st_buffer(coords_sf,1E5)
circle500=st_buffer(coords_sf,5E5)
circlist=list(circle1=circle1,circle100=circle100,circle500=circle500)
circlist_reproj=lapply(circlist,function(x) st_transform(x,crs(rrstack[[1]])))
start <- proc.time()
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
mystack <- stack()
for(k in 1:nrow(circlist_reproj[[1]])) {
for(j in 1:length(circlist_reproj)) {
for (i in 1:nlayers(rrstack)){
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
}
dellist[[j]] <- mystack
mystack <- stack()
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
basetime <- proc.time()-start
#time taken for computation
basetime
user system elapsed
940.173 84.366 1029.688
如您所见,对于一组比我拥有的小的数据,计算需要一段时间。我想尝试并行化处理,但无法弄清楚如何这样做。我现在有两个问题。首先,由于嵌套 for 循环的性质,我不确定应该将哪个 for
循环更改为 foreach
。根据 for
循环时 foreach
然后我得到错误 Error in { : task 1 failed - "could not find function "nlayers""
然后我尝试在 foreach
调用中添加包参数导致嵌套的 for 循环看起来喜欢
foreach(k = 1:nrow(circlist_reproj[[1]], .packages='raster')) %dopar% {
for(j in 1:length(circlist_reproj)) {
for (i in 1:nlayers(rrstack)) {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
}
dellist[[j]] <- mystack
mystack <- stack()
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
然后给出错误
unused argument (.packages = "raster")
所以我不确定如何正确地将 .packages
参数应用于 foreach
函数。我在这里做错了什么?
编辑
根据@HenrikB 的评论,我查看了我的代码并重新编写了它。我现在有以下 foreach
循环。现在代码完成了,但它导致所有空值。
cores <- detectCores()
cl <- makeCluster(cores[1]-2) #not to overload your computer
registerDoParallel(cl)
start <- proc.time()
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
mystack <- stack()
foreach(k = 1:nrow(circlist_reproj[[1]])) %:%
foreach(j = 1:length(circlist_reproj))%:%
foreach (i = 1:nlayers(rrstack), .packages=c('raster','sf')) %dopar% {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
maskraster <- raster::crop(maskraster,circlist_reproj[[j]][k,])
mystack <- stack(mystack,maskraster)
dellist[[j]] <- mystack
mystack <- stack()
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
partime <- proc.time()-start
在接受@Henrik 的评论并稍微修改我的代码后,我能够想出一个通过并行化解决问题的解决方案,但是它比基础解决方案慢。但那是另一个 post。这是解决方案:
cores <- detectCores()
cl <- makeCluster(cores[1]-2) #not to overload your computer
registerDoParallel(cl)
citlist <- vector(mode='list',length=nrow(circlist_reproj[[1]]))
dellist <- vector(mode='list',length=length(circlist_reproj))
for(k in 1:nrow(circlist_reproj[[1]])) {
for(j in 1:length(circlist_reproj)) {
parrasterstack <- foreach(i=1:nlayers(rrstack),.packages=c('raster','sf')) %dopar% {
maskraster <- raster::mask(rrstack[[i]],circlist_reproj[[j]][k,])
raster::crop(maskraster,circlist_reproj[[j]][k,])
}
parrasterstack <- stack(parrasterstack)
dellist[[j]] <- parrasterstack
parrasterstack <- NULL
}
citlist[[k]] <- dellist
dellist <- vector(mode='list',length=length(circlist_reproj))
}
stopCluster(cl)