foreach 和 dopar 不创建对象,但返回 NULL

foreach and dopar not creating objects, but returning NULL

我正在使用 Maxent 进行一些空间分析。我有一个很长的脚本,其中包含我在列表中收集的许多输出。它适用于 rasterstack 中的 for 循环和低分辨率气候预测器(在我的核心 i5、6gb 笔记本中)。但是我需要使用一组高分辨率的栅格,所有的问题都来自这个问题。即使使用 16 核、32gb 的虚拟机,处理速度也非常慢,3 天后,内存不足,运行 在我的循环(有 92 种)中大约 50 圈后关闭。我正在尝试通过收集垃圾来清理内存并使用 doParallel 来改进此脚本。在新脚本完全 运行 低分辨率预测器后,我将尝试使用高分辨率预测器

因此,我将脚本更改为使用 foreach 而不是 for,并使用 %dopar%

但到目前为止,我得到的结果是:

 [[1]]
 NULL

 [[2]]
 NULL

 [[3]]
 NULL

 [[4]]
 NULL

我看到了另一个关于同一问题的问题,但是那个人需要的非常简单的解决方案并不适用于我。所以,非常欢迎任何提示

#install.packages("dismo")
library(dismo)
#install.packages("scales")
library(scales)
#install.packages("rgdal")
library(rgdal)
#install.packages("rgeos")
library(rgeos)
#install.packages("rJava")
library(rJava)
#install.packages("foreach")
library(foreach)
#install.packages("doParallel")
library(doParallel)

#Colors to use in the plots
MyRbw2<-c('#f4f4f4','#3288bd','#66c2a5','#e6f598','#fee08b','#f46d43','#9e0142')
colfunc_myrbw2<-colorRampPalette(MyRbw2)

#Create empty lists to recieve outputs
xm_list<-list()
xm_spc_list<-list()
e_spc_list<-list()
px_spc_list<-list()
tr_spc_list<-list()
spc_pol1<-list()
spc_pol5<-list()
tr<-list()


#Create empty data frame to recieve treshold values for each species
tr_df<-data.frame(matrix(NA, nrow=92, ncol=7))
tr_df[,1]<-as.character(tree_list)
names(tr_df)<- c('spp',"kappa","spec_sens","no_omission","prevalence","equal_sens_spec","sensitivity")


# Assigning objects to run Maxent
data_points <- tree_cd_points # this is a list with SpatialPoints for 92 species
data_list <- tree_list # list with the species names
counts_data<- counts_tree_cd # number of points for each species
predictors2<-predictors_low # rasterStack of Bioclim layers (climatic variables), low resolution

#Stablishing extent for Maxent predictions
xmin=-120; xmax=-35; ymin2=-40; ymax=35
limits2 <- c(xmin, xmax, ymin2, ymax)

# Making the cluster for doParallel
cores<-detectCores() # I have 16
cl <- makeCluster(cores[1]-1) #not to overload your computer
registerDoParallel(cl)

#Just to keep track of time
ptime1 <- proc.time()



pdf("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/treesp_maxent_20170823.pdf", 
    paper = "letter", height = 11, width=8,5, pointsize=12,pagecentre = TRUE)
#I have 92 species, but I'll run just the first 4 to test
foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar% {  #Runs only species with 5 or more points to avoid maxent problems

  if (counts_data$n[i]>4) { #If the species has more than 4 occurrence points, run maxent
    tryCatch({ #makes the loop go on despite errors


      #Sets train, test and total points for Maxent
      group <- kfold(x=data_points[[i]], 5)
      pres_train<- data_points[[i]][group != 1, ]
      pres_test <- data_points[[i]][group == 1, ]
      spoints<- data_points[[i]]

      #Sets background points for Maxent
      backg <- randomPoints(predictors2, n=20000, ext=limits2, extf = 1.25)
      colnames(backg) = c('lon', 'lat')
      group <- kfold(backg, 5)
      backg_train <- backg[group != 1, ]
      backg_test <- backg[group == 1, ]



      #The maxent itself (put the xm in the empty list that I created earlier to store all xms)
      xm_spc_list[[i]] <- maxent(x=predictors2, p=spoints, a=backg ,
                   factors='ecoreg',
                   args=c('visible=true',
                          'betamultiplier=1',
                          'randomtestpoints=20',
                          'randomseed=true',
                          'linear=true',
                          'quadratic=true',
                          'product=true',
                          'hinge=true',
                          'threads=4',
                          'responsecurves=true',
                          'jackknife=true',
                          'removeduplicates=false',
                          'extrapolate=true',
                          'pictures=true',
                          'cache=true',
                          'maximumiterations=5000',
                          'askoverwrite=false'),
                   path=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i]), overwrite=TRUE)


      par(mfrow=c(1,1),mar = c(2,2, 2, 2))
      plot(xm_spc_list[[i]], main=paste(data_list[i]))
      response(xm_spc_list[[i]])


      #Evaluating how good is the model and putting the evaluation values in a list
      e_spc_list[[i]] <- evaluate(pres_test, backg_test, xm_spc_list[[i]], predictors2) 



      #Predicting the climatic envelopes and Sending to a list os predictions
      px_spc_list[[i]] <- predict(predictors2, xm_spc_list[[i]], ext=limits2,  progress='text', 
                    filename=paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\s+', '_', data_list[i]),"_pred.grd"), overwrite=TRUE)



      tr_df[i,2:7]<-threshold(e_spc_list[[i]])
      tr[[i]]<-threshold(e_spc_list[[i]], 'spec_sens')


      #Pol 1 will be the regular polygon, default treshold
      spc_pol1[[i]] <- rasterToPolygons(px_spc_list[[i]]>tr[[i]],function(x) x == 1,dissolve=T)
      writeOGR(obj = spc_pol1[[i]], dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm1/",data_list[i]), driver = "ESRI Shapefile",
               layer = paste0(gsub('\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )




      #Pol 5 will be a 100km^2 circle around the occurrence points
      circ <- circles(spoints, d=5642,lonlat=TRUE)
      circ <- circ@polygons
      crs(circ)<-crs(wrld_cropped)
      circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

      #To write de polygon to a file, the function writeOGR needs an object SPDF, so...
      #Getting Polygon IDs
      circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
      #Making the IDs row names 
      row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
      # Make spatial polygon data frame
      circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

      #Save the polygon, finally
      writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i]), driver = "ESRI Shapefile",
               layer = paste0(gsub('\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE ) 

      spc_pol5[[i]]<-circ_SPDF

      #Now the plots 
      par(mfrow=c(2,3),mar = c(2,1, 1, 1))

      plot(px_spc_list[[i]], axes=FALSE, legend=TRUE, legend.shrink=1, col=colfunc_myrbw2(20), main=paste((data_list[i]),' - Maxent'))
      plot(wrld_cropped,add=TRUE, border='dark grey',axes=FALSE)
      points(data_points[[i]], pch=21,col="white", bg='hotpink', lwd=0.5, cex=0.7)

      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main='px>tr')  
      plot(spc_pol1[[i]] , main=paste((data_list[i]),' - Range'), add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8),axes=FALSE)
      points(data_points[[i]], pch="°",col="black",  cex=0.7)

      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=paste(data_list[i],"circles"))  
      plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
    }, error=function(e){cat("Warning message:",conditionMessage(e), "\n")})


    #But sometimes, even with >4 occurrence points, Maxent fails... 
    #So I'll make sure that if I have >4 points but maxent didn't work, I get the circles anyway
    f<-paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm/",data_list[i],"/",gsub('\s+', '_', data_list[i]),"_pred.grd", sep="")

    gc() #Just collecting garbage to speed up the process

    if (!file.exists(f)){ # then, if f (maxent output) doesn't exist, create the circles at least

      spoints<- data_points[[i]]

      circ <- circles(spoints, d=5642,lonlat=TRUE)
      circ <- circ@polygons
      crs(circ)<-crs(wrld_cropped)
      circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

      #To write de polygon to a file, the function writeOGR needs an object SPDF, so...
      #Getting Polygon IDs
      circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
      #Making the IDs row names 
      row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
      # Make spatial polygon data frame
      circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

      #Save the polygon, finally
      #dir.create(paste("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""))
      writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
               layer = paste0(gsub('\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )  

      spc_pol5[[i]]<-circ_SPDF


      plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])  
      plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
      #plot(spoints,pch=21,col="white", bg='hotpink', lwd=0.1, cex=0.5, add=TRUE)

    }


  } else  { #If the species does not have more than 4 points, 
            #do not run maxent, but create a circles polygon

    spoints<- data_points[[i]]

    #For the circle to have 100km2, d should be 5641.9 ... 
    circ <- circles(spoints, d=5642,lonlat=TRUE)
    circ <- circ@polygons
    crs(circ)<-crs(wrld_cropped)
    circ <- gIntersection(wrld_cropped, circ, byid = TRUE, drop_lower_td = TRUE)

    circ_df<- as.data.frame(sapply(slot(circ, "polygons"), function(x) slot(x, "ID")))
    row.names(circ_df) <- sapply(slot(circ, "polygons"), function(x) slot(x, "ID"))
    circ_SPDF <- SpatialPolygonsDataFrame(circ, data =circ_df)

    writeOGR(obj = circ_SPDF, dsn = paste0("C:/Users/thai/Desktop/Ecologicos/w2/SpDistModel/SEM9/spp/xm5/",data_list[i],sep=""), driver = "ESRI Shapefile",
             layer = paste0(gsub('\s+', '_', data_list[i]),"_pol"), overwrite_layer = TRUE )  

    par(mfrow=c(1,1),mar = c(2,2, 2, 2))
    plot(wrld_cropped,  border='dark grey', col="#f9f9f9",axes=FALSE, main=data_list[i])  
    plot(circ,  add=TRUE, col=alpha("green3",0.8),border=alpha("green3",0.8) )
    spc_pol5[[i]]<-circ_SPDF

    gc() #collecting garbage before a nuw run
  }

}
dev.off()
dev.off() #to close that pdf I started before the loop


ptime2<- proc.time() - ptime1 #just checking the time
ptime2

您可以调用 foreach 指定一个 "collector" 变量,例如:

results <- foreach(i=1:4, .packages=c("dismo","scales","rgdal","rgeos","rJava")) %dopar%

然后,在 foreach 循环结束之前,您可以将所有结果变量收集到一个公共列表中,return 它们:

out <- list(xm_spc_list= xm_spc_list,
            e_spc_list = e_spc_list, 
            px_spc_list = px_spc_list, 
            ...  =  ...,
            ...  =  ....)
return(out)
}

请注意,在 foreach 中,您可以避免使用 xm_spc_list[[i]] <- 等结构,因为 foreach 将通过 "binding" 列表(有序)列表中的结果为您处理。

要从 foreach 之后的 results 列表列表中检索 "single" 输出,您可以使用如下内容:

xm_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 1)))
e_spc_list <- data.table::rbindlist(do.call(c,lapply(results, "[", 2)))
....
....

HTH(虽然无法测试,但给出了手头的例子)