R 中光栅处理的内存问题

Memory issue with raster processing in R

我已经编写了一个脚本来自动处理我的栅格数据,但它在完成一半后就失败了。该循环在目录 unzipped 中查找包含 .tif 文件的子目录,并将那些带有一些底图的子目录堆叠在 /base_layers 中。它一直在失败:

Error in matrix(unlist(ini), ncol = 2, byrow = TRUE) : 'data' must be of a vector type, was 'NULL'

In writeBin(v, x@file@con, size = x@file@dsize) : problem writing to connection

Error in .local(.Object, ...) : GDAL Error 3: Free disk space available is 875151360 bytes, whereas 2473962400 are at least necessary. You can disable this check by defining the CHECK_DISK_FREE_SPACE configuration option to FALSE.

我想已修复,查看答案 运行 在具有 Ubuntu、24 GB 内存和 50 GB 存储空间和 4 个 vCPU 的 kubernetes pod 中以及在具有 2 个 vCPU 的 VM 实例 运行 Ubuntu 18.04 中, 8 GB 内存和 50 GB 存储空间。由于内存问题,pods 中途失败,VM 也没有完成。代码通常在到达掩模栅格阶段时失败,但仅在 运行 通过循环的几次迭代后才会失败(因此它在开始时一切正常)。

如果有人能指出此脚本中是否可能出现内存碎片,或者如何清除 OS 内存,我将永远感激不已!

脚本:

library(raster)
library(rgdal)
input = "/mnt/nfs/data/unzipped"
output = "/mnt/nfs/data/training" #path to where the data should go

#get paths to basemaps
DEM = raster("/mnt/nfs/data/base_layers/Clipped_filled_dem.tif")
flow_accum = raster("/mnt/nfs/data/base_layers/accumulation.tif")
slope = raster("/mnt/nfs/data/base_layers/Clipped_slope.tif")
aspect = raster("/mnt/nfs/data/base_layers/Clipped_filled_aspect.tif") 
ruggedness = raster("/mnt/nfs/data/base_layers/Clipped_TRI.tif")

#get directories that have data that needs to be processed
directory = list.dirs(path = input, recursive = FALSE)
for(direct in directory) {
    subdirect = list.dirs(path = direct,recursive = FALSE)
    for(sub in subdirect){

        files_for_raster <- list.files(path = sub, pattern = "*.tif$", full.names = TRUE)
        rasterstack = stack(files_for_raster)

        # name of datapull
        name = gsub(paste(direct,"/",sep=""),"",sub)
        print(c("working in",name))

        # crop DEM to the extent of the satellite image
        DEMcrop = crop(DEM,rasterstack) #extent can be a raster
        flow_accumcrop = crop(flow_accum,rasterstack)
        slopecrop = crop(slope,rasterstack)
        aspectcrop = crop(aspect,rasterstack)
        ruggednesscrop = crop(ruggedness,rasterstack)
        print(c("cropped"))
        print(object.size(DEMcrop))
        print(object.size(rasterstack))

        # resample rasters, this will take a bit
        DEMcrop = resample(DEMcrop,rasterstack) 
        flow_accumcrop = resample(flow_accumcrop,rasterstack)
        slopecrop = resample(slopecrop,rasterstack)
        aspectcrop = resample(aspectcrop,rasterstack)
        ruggednesscrop = resample(ruggednesscrop,rasterstack)
        print(c("resampled"))
        print(object.size(DEMcrop))
        print(object.size(rasterstack))

        # mask layers 
        DEMcrop = mask(DEMcrop,raster::subset(rasterstack,1)) 
        flow_accumcrop = mask(flow_accumcrop,raster::subset(rasterstack,1))
        slopecrop = mask(slopecrop,raster::subset(rasterstack,1))
        aspectcrop = mask(aspectcrop,raster::subset(rasterstack,1))
        ruggednesscrop = mask(ruggednesscrop,raster::subset(rasterstack,1))
        print(c("masked"))
        print(object.size(DEMcrop))
        print(object.size(rasterstack))

        # add baselayers to the raster stack
        finalstack = addLayer(rasterstack,DEMcrop,flow_accumcrop,slopecrop,aspectcrop,ruggednesscrop)
        print(names(finalstack))
        print(nlayers(finalstack))
        bands<-c("band1","band2","band3","band4","band5","band6","band7")
        type<-c("quality","sr_ndvi","DEM","flow_accum","slope","aspect","TRI")
        band_info<-data.frame(bands,type)

        print("finalstack")

        # create new output directory and save raster there
        output_subdirect = gsub(paste(input,"/",sep=""),"",sub)
        dir.create(file.path(output,output_subdirect), recursive = TRUE)
        Sys.chmod(file.path(output,output_subdirect), mode = "777", use_umask = FALSE)
        print("created directory")
        write = file.path(output,output_subdirect)
        writeRaster(finalstack, format="GTiff", filename=file.path(write,name,fsep="/"), options=c("INTERLEAVE=BAND","COMPRESS=NONE"), overwrite=TRUE)
        write.csv(band_info, file = paste(file.path(write,name,fsep="/"),".csv",sep=""))
        print("done processing")
        rm(rasterstack,DEMcrop,flow_accumcrop,slopecrop,aspectcrop,ruggednesscrop)
        gc()
        print(gc())
        system("sudo sysctl -w vm.drop_caches=3")
    }
}

# useful functions
#mystack = stack("path/to/multilayer.tif") # multilayer.tif is an existing raster stack
#band1 = subset(mystack,subset=1) # subsets bands from raster stack
#removeTmpFiles(h=0) # removes temp files, can be used after writing raster stackes to delete all temp raster files
#rasterbrick<-brick(rasterstack) #can make a raster brick from a raster stack

我添加了 print(object.size()) 以尝试查看我的对象在整个代码执行过程中是否在变大。

该文件正在将大型光栅写入 /tmp 文件夹中的磁盘,每次执行超过 20Gb。很快就把磁盘填满了。仍然担心为什么将光栅写入现有文件而不是清除它们。可以在脚本期间清除 tmp 目录。

这是对光栅包内存问题的很好解释。 https://discuss.ropensci.org/t/how-to-avoid-space-hogging-raster-tempfiles/864