优化 R 中的空间查询 - 森林中树冠分布的生物量

Optimizing spatial query in R - crown-distributed biomass in forest

我正在尝试计算森林图中树冠与方形网格单元重叠的面积。以下是一个可重现的例子:

# A. Define objects
    require(sp)
    require(raster)
    require(rgdal)
    require(rgeos)
    require(dismo)
    radius=25   # max search radius around 10 x 10 m cells
    res <- vector() # where to store results

    # Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
    set.seed(0) 
    survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))  
    coordinates(survey) <- ~x+y

    # Define 10 x 10 subplots
    grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10))) 
    survey$subplot <- over(survey,grid10)   

# B. Now find fraction of tree crown overlapping each subplot
    for (i in 1:100) {
        # Extract centroïd of each the ith cell
        centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
        corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))


        # Find trees in a max radius (define above)
        tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]


        # Define tree crown based on tree diameter
        tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter

        # Compute the distance from each tree to cell's borders
        pDist <- vector()
        for (k in 1:nrow(tem))  {
            pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
        }

        # Keeps only trees whose crown is lower than the above distance (=overlap)
        overlap.trees <- tem[which(pDist<=tem$crownr),]
        overlap.trees$crowna <-overlap.trees$crownr^2*pi  # compute crown area

        # Creat polygons from overlapping crowns
        c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
        crown <- polygons(c1)
        Crown <-    SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))

        # Create a fine grid points to retrieve the fraction of overlapping crowns
        max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search

        finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
        coordinates(finegrid) <- ~ x+y
        A <- extract(Crown,finegrid)
        Crown@data$ID <- seq(1,length(crown),1)
        B <- as.data.frame(table(A$poly.ID))
        if (nrow(B)>0) {
        B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
        B$overlap <- B$Freq/B$crown.area
        B$overlap[B$overlap>1] <- 1
        res[i] <- sum(B$overlap) } else {
        res[i] <- 0 }
    }

# C. Check the result
    res  # sum of crown fraction overlapping each cell (works fine)

此算法需要大约 3 分钟才能 运行 处理 100 个单元格。我有一个包含 35000 个单元格的大型数据集,因此 150*7=1050 分钟或 17.5 小时。 任何提示系and/or优化此算法??

在使用 profvis 包进行快速分析后,似乎只需更改几行即可进行一些改进。这不是一个详尽的优化,我相信还有更多的改进是可能的。

我变了

pDist <- vector()
for (k in 1:nrow(tem))  {
    pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}

pDist <- rep(NA, nrow(tem))
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem))  {
  pDist[k] <- gDistance(tem[k,], my.poly)
}

因为不需要每次都创建 SpatialPolygons 对象。如下面的分析图像所示(顶部已优化),这可能会很昂贵。

这是一些应该 运行 并行的代码。

# load only necessary package for code until parSapplyLB
# LB is load-balancing, which means it will distribute task to cores
# which are idle. This is great if jobs take an uneven amount of time
# to run.

library(parallel)
library(sp)

system.time({

  # prepare the cluster, default is PSOCK on windows but can be FORK form *nix
  cl <- makeCluster(4)
  # worker is just a new instance of fresh vanilla R so you need to load the 
  # necessary libraries to all the workers
  clusterEvalQ(cl = cl, library(sp))
  clusterEvalQ(cl = cl, library(raster))
  clusterEvalQ(cl = cl, library(rgdal))
  clusterEvalQ(cl = cl, library(rgeos))
  clusterEvalQ(cl = cl, library(dismo))

  radius <- 25   # max search radius around 10 x 10 m cells
  # res <- rep(NA, 100) # where to store results

  # Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
  set.seed(0) 
  survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))  
  coordinates(survey) <- ~x+y

  # Define 10 x 10 subplots
  grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10))) 
  survey$subplot <- over(survey,grid10)   

  # Export needed variables to workers
  clusterExport(cl = cl, varlist = c("survey", "radius"))

  # this function is your former for() loop, increase X = 1:100 to suit your needs

  res <- parSapplyLB(cl = cl, X = 1:100, FUN = function(i, survey) {
  # B. Now find fraction of tree crown overlapping each subplot
    # Extract centroïd of each the ith cell
    centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
    corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))

    # Find trees in a max radius (define above)
    tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]

    # Define tree crown based on tree diameter
    tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter

    # Compute the distance from each tree to cell's borders
    pDist <- vector()
    my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
    for (k in 1:nrow(tem))  {
      pDist[k] <- gDistance(tem[k,], my.poly)
    }

    # Keeps only trees whose crown is lower than the above distance (=overlap)
    overlap.trees <- tem[which(pDist<=tem$crownr),]
    overlap.trees$crowna <-overlap.trees$crownr^2*pi  # compute crown area

    # Creat polygons from overlapping crowns
    c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
    crown <- polygons(c1)
    Crown <-    SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))

    # Create a fine grid points to retrieve the fraction of overlapping crowns
    max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search

    finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
    coordinates(finegrid) <- ~ x+y
    A <- extract(Crown,finegrid)
    Crown@data$ID <- seq(1,length(crown),1)
    B <- as.data.frame(table(A$poly.ID))
    if (nrow(B)>0) {
      B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
      B$overlap <- B$Freq/B$crown.area
      B$overlap[B$overlap>1] <- 1
      res <- sum(B$overlap) } else {
        res <- 0 }
  }, survey = survey)
stopCluster(cl = cl)
})

对于那些对树木、树冠和生物量感兴趣的人,有人建议我使用一种更快的方法来计算林分中树冠分布的生物量(感谢 H. Muller-Landau)。人们需要在逐个茎和 1x1m 网格的基础上进行思考。下面的代码需要 6 分钟到 运行,而之前的代码需要几个小时。希望感兴趣!

# Create a fake 1-ha forest stand:
 trees <- data.frame(x=sample(99.5,1000,replace=T),y=sample(99.5,1000,replace=T),dbh=sample(100,1000,replace=T))  


# Create a 1x1m cell matrix where to  store the result
cdagb=matrix(0,nrow=100,ncol=100)


#Calculate the crownradius for every stem (fake proportion)
trees$crownradius = 2*trees$dbh^0.5

#Calculate the index of the 1x1 m quadrat in which the tree stem falls 
trees$quadx=ceiling(trees$x)
trees$quady=ceiling(trees$y)

# Run the algo stem-by-stem
for (i in 1:nrow(trees)) {
    # xdisp and ydisp are the integer cell position differences in x and y that should be checked to see if the crown of the focal tree overlaps
    xdisp=seq(ceiling(trees$quadx[i]-trees$crownradius[i]),floor((trees$quadx[i]+trees$crownradius[i])),1)
    xdisp[xdisp>=1000] <- 1000 +(1000 - xdisp[xdisp>=1000])  # mirror values on edges onto adjacent cells
    xdisp[xdisp<1] <-  -xdisp[xdisp<1] + 1 # avoid XY to be 0
    ydisp=seq(ceiling(trees$quady[i]-trees$crownradius[i]),floor((trees$quady[i]+trees$crownradius[i])),1)
    ydisp[ydisp>=500] <- 500 +(500 - ydisp[ydisp>=500])
    ydisp[ydisp<1] <-  -ydisp[ydisp<1] + 1

    # Calculate the square of the x and y distances from the focal tree to the center of each of these cells
    xdistsqr=(xdisp-trees$quadx[i])^2
    ydistsqr=(ydisp-trees$quady[i])^2
    nx=length(xdisp)
    ny=length(ydisp)

    # Calculate the distance from the center of each cell in the neighborhood to the focal tree               
    distmatrix=matrix(sqrt(rep(xdistsqr,each=ny)+rep(ydistsqr,nx)),nrow=nx,ncol=ny) 

    # includes only trees that overlap the grid cells
    incmatrix=ifelse(distmatrix<trees$crownradius[i],1,0)
    ncells=sum(incmatrix)
    agbpercell=trees$agb[i]/ncells   # divide the biomass by cell
    addagbmatrix=incmatrix*agbpercell  # relloacte biomass by cell

    # add the biomass divided in square meter to each grid point
    cdagb[xdisp,ydisp] = cdagb[xdisp,ydisp] + addagbmatrix
}