优化 R 中的空间查询 - 森林中树冠分布的生物量
Optimizing spatial query in R - crown-distributed biomass in forest
我正在尝试计算森林图中树冠与方形网格单元重叠的面积。以下是一个可重现的例子:
# A. Define objects
require(sp)
require(raster)
require(rgdal)
require(rgeos)
require(dismo)
radius=25 # max search radius around 10 x 10 m cells
res <- vector() # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# B. Now find fraction of tree crown overlapping each subplot
for (i in 1:100) {
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown@data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res[i] <- sum(B$overlap) } else {
res[i] <- 0 }
}
# C. Check the result
res # sum of crown fraction overlapping each cell (works fine)
此算法需要大约 3 分钟才能 运行 处理 100 个单元格。我有一个包含 35000 个单元格的大型数据集,因此 150*7=1050 分钟或 17.5 小时。
任何提示系and/or优化此算法??
在使用 profvis
包进行快速分析后,似乎只需更改几行即可进行一些改进。这不是一个详尽的优化,我相信还有更多的改进是可能的。
我变了
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
到
pDist <- rep(NA, nrow(tem))
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
因为不需要每次都创建 SpatialPolygons
对象。如下面的分析图像所示(顶部已优化),这可能会很昂贵。
这是一些应该 运行 并行的代码。
# load only necessary package for code until parSapplyLB
# LB is load-balancing, which means it will distribute task to cores
# which are idle. This is great if jobs take an uneven amount of time
# to run.
library(parallel)
library(sp)
system.time({
# prepare the cluster, default is PSOCK on windows but can be FORK form *nix
cl <- makeCluster(4)
# worker is just a new instance of fresh vanilla R so you need to load the
# necessary libraries to all the workers
clusterEvalQ(cl = cl, library(sp))
clusterEvalQ(cl = cl, library(raster))
clusterEvalQ(cl = cl, library(rgdal))
clusterEvalQ(cl = cl, library(rgeos))
clusterEvalQ(cl = cl, library(dismo))
radius <- 25 # max search radius around 10 x 10 m cells
# res <- rep(NA, 100) # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# Export needed variables to workers
clusterExport(cl = cl, varlist = c("survey", "radius"))
# this function is your former for() loop, increase X = 1:100 to suit your needs
res <- parSapplyLB(cl = cl, X = 1:100, FUN = function(i, survey) {
# B. Now find fraction of tree crown overlapping each subplot
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown@data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res <- sum(B$overlap) } else {
res <- 0 }
}, survey = survey)
stopCluster(cl = cl)
})
对于那些对树木、树冠和生物量感兴趣的人,有人建议我使用一种更快的方法来计算林分中树冠分布的生物量(感谢 H. Muller-Landau)。人们需要在逐个茎和 1x1m 网格的基础上进行思考。下面的代码需要 6 分钟到 运行,而之前的代码需要几个小时。希望感兴趣!
# Create a fake 1-ha forest stand:
trees <- data.frame(x=sample(99.5,1000,replace=T),y=sample(99.5,1000,replace=T),dbh=sample(100,1000,replace=T))
# Create a 1x1m cell matrix where to store the result
cdagb=matrix(0,nrow=100,ncol=100)
#Calculate the crownradius for every stem (fake proportion)
trees$crownradius = 2*trees$dbh^0.5
#Calculate the index of the 1x1 m quadrat in which the tree stem falls
trees$quadx=ceiling(trees$x)
trees$quady=ceiling(trees$y)
# Run the algo stem-by-stem
for (i in 1:nrow(trees)) {
# xdisp and ydisp are the integer cell position differences in x and y that should be checked to see if the crown of the focal tree overlaps
xdisp=seq(ceiling(trees$quadx[i]-trees$crownradius[i]),floor((trees$quadx[i]+trees$crownradius[i])),1)
xdisp[xdisp>=1000] <- 1000 +(1000 - xdisp[xdisp>=1000]) # mirror values on edges onto adjacent cells
xdisp[xdisp<1] <- -xdisp[xdisp<1] + 1 # avoid XY to be 0
ydisp=seq(ceiling(trees$quady[i]-trees$crownradius[i]),floor((trees$quady[i]+trees$crownradius[i])),1)
ydisp[ydisp>=500] <- 500 +(500 - ydisp[ydisp>=500])
ydisp[ydisp<1] <- -ydisp[ydisp<1] + 1
# Calculate the square of the x and y distances from the focal tree to the center of each of these cells
xdistsqr=(xdisp-trees$quadx[i])^2
ydistsqr=(ydisp-trees$quady[i])^2
nx=length(xdisp)
ny=length(ydisp)
# Calculate the distance from the center of each cell in the neighborhood to the focal tree
distmatrix=matrix(sqrt(rep(xdistsqr,each=ny)+rep(ydistsqr,nx)),nrow=nx,ncol=ny)
# includes only trees that overlap the grid cells
incmatrix=ifelse(distmatrix<trees$crownradius[i],1,0)
ncells=sum(incmatrix)
agbpercell=trees$agb[i]/ncells # divide the biomass by cell
addagbmatrix=incmatrix*agbpercell # relloacte biomass by cell
# add the biomass divided in square meter to each grid point
cdagb[xdisp,ydisp] = cdagb[xdisp,ydisp] + addagbmatrix
}
我正在尝试计算森林图中树冠与方形网格单元重叠的面积。以下是一个可重现的例子:
# A. Define objects
require(sp)
require(raster)
require(rgdal)
require(rgeos)
require(dismo)
radius=25 # max search radius around 10 x 10 m cells
res <- vector() # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# B. Now find fraction of tree crown overlapping each subplot
for (i in 1:100) {
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown@data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res[i] <- sum(B$overlap) } else {
res[i] <- 0 }
}
# C. Check the result
res # sum of crown fraction overlapping each cell (works fine)
此算法需要大约 3 分钟才能 运行 处理 100 个单元格。我有一个包含 35000 个单元格的大型数据集,因此 150*7=1050 分钟或 17.5 小时。 任何提示系and/or优化此算法??
在使用 profvis
包进行快速分析后,似乎只需更改几行即可进行一些改进。这不是一个详尽的优化,我相信还有更多的改进是可能的。
我变了
pDist <- vector()
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,],SpatialPolygons(list(Polygons(list(Polygon(corner)),1))))
}
到
pDist <- rep(NA, nrow(tem))
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
因为不需要每次都创建 SpatialPolygons
对象。如下面的分析图像所示(顶部已优化),这可能会很昂贵。
这是一些应该 运行 并行的代码。
# load only necessary package for code until parSapplyLB
# LB is load-balancing, which means it will distribute task to cores
# which are idle. This is great if jobs take an uneven amount of time
# to run.
library(parallel)
library(sp)
system.time({
# prepare the cluster, default is PSOCK on windows but can be FORK form *nix
cl <- makeCluster(4)
# worker is just a new instance of fresh vanilla R so you need to load the
# necessary libraries to all the workers
clusterEvalQ(cl = cl, library(sp))
clusterEvalQ(cl = cl, library(raster))
clusterEvalQ(cl = cl, library(rgdal))
clusterEvalQ(cl = cl, library(rgeos))
clusterEvalQ(cl = cl, library(dismo))
radius <- 25 # max search radius around 10 x 10 m cells
# res <- rep(NA, 100) # where to store results
# Create a fake set of trees with x,y coordinates and trunk diameter (=dbh)
set.seed(0)
survey <- data.frame(x=sample(99,1000,replace=T),y=sample(99,1000,replace=T),dbh=sample(100,1000,replace=T))
coordinates(survey) <- ~x+y
# Define 10 x 10 subplots
grid10 <- SpatialGrid(GridTopology(c(5,5),c(10,10),c(10,10)))
survey$subplot <- over(survey,grid10)
# Export needed variables to workers
clusterExport(cl = cl, varlist = c("survey", "radius"))
# this function is your former for() loop, increase X = 1:100 to suit your needs
res <- parSapplyLB(cl = cl, X = 1:100, FUN = function(i, survey) {
# B. Now find fraction of tree crown overlapping each subplot
# Extract centroïd of each the ith cell
centro <- expand.grid(x=seq(5,95,10),y=seq(5,95,10))[i,]
corner <- data.frame(x=c(centro$x-5,centro$x+5,centro$x+5,centro$x-5),y=c(centro$y-5,centro$y-5,centro$y+5,centro$y+5))
# Find trees in a max radius (define above)
tem <- survey[which((centro$x-survey$x)^2+(centro$y-survey$y)^2<=radius^2),]
# Define tree crown based on tree diameter
tem$crownr <- exp(-.438+.658*log(tem$dbh/10)) # crown radius in meter
# Compute the distance from each tree to cell's borders
pDist <- vector()
my.poly <- SpatialPolygons(list(Polygons(list(Polygon(corner)),1)))
for (k in 1:nrow(tem)) {
pDist[k] <- gDistance(tem[k,], my.poly)
}
# Keeps only trees whose crown is lower than the above distance (=overlap)
overlap.trees <- tem[which(pDist<=tem$crownr),]
overlap.trees$crowna <-overlap.trees$crownr^2*pi # compute crown area
# Creat polygons from overlapping crowns
c1 <- circles(coordinates(overlap.trees),overlap.trees$crownr, lonlat=F, dissolve=F)
crown <- polygons(c1)
Crown <- SpatialPolygonsDataFrame(polygons(c1),data=data.frame(dbh=overlap.trees$dbh,crown.area=overlap.trees$crowna))
# Create a fine grid points to retrieve the fraction of overlapping crowns
max.dist <- ceiling(sqrt(which.max((centro$x - overlap.trees$x)^2 + (centro$y - overlap.trees$y)^2))) # max distance to narrow search
finegrid <- as.data.frame(expand.grid(x=seq(centro$x-max.dist,centro$x+max.dist,1),y=seq(centro$y-max.dist,centro$y+max.dist,1)))
coordinates(finegrid) <- ~ x+y
A <- extract(Crown,finegrid)
Crown@data$ID <- seq(1,length(crown),1)
B <- as.data.frame(table(A$poly.ID))
if (nrow(B)>0) {
B <- merge(B,Crown@data,by.x="Var1",by.y="ID",all.x=T)
B$overlap <- B$Freq/B$crown.area
B$overlap[B$overlap>1] <- 1
res <- sum(B$overlap) } else {
res <- 0 }
}, survey = survey)
stopCluster(cl = cl)
})
对于那些对树木、树冠和生物量感兴趣的人,有人建议我使用一种更快的方法来计算林分中树冠分布的生物量(感谢 H. Muller-Landau)。人们需要在逐个茎和 1x1m 网格的基础上进行思考。下面的代码需要 6 分钟到 运行,而之前的代码需要几个小时。希望感兴趣!
# Create a fake 1-ha forest stand:
trees <- data.frame(x=sample(99.5,1000,replace=T),y=sample(99.5,1000,replace=T),dbh=sample(100,1000,replace=T))
# Create a 1x1m cell matrix where to store the result
cdagb=matrix(0,nrow=100,ncol=100)
#Calculate the crownradius for every stem (fake proportion)
trees$crownradius = 2*trees$dbh^0.5
#Calculate the index of the 1x1 m quadrat in which the tree stem falls
trees$quadx=ceiling(trees$x)
trees$quady=ceiling(trees$y)
# Run the algo stem-by-stem
for (i in 1:nrow(trees)) {
# xdisp and ydisp are the integer cell position differences in x and y that should be checked to see if the crown of the focal tree overlaps
xdisp=seq(ceiling(trees$quadx[i]-trees$crownradius[i]),floor((trees$quadx[i]+trees$crownradius[i])),1)
xdisp[xdisp>=1000] <- 1000 +(1000 - xdisp[xdisp>=1000]) # mirror values on edges onto adjacent cells
xdisp[xdisp<1] <- -xdisp[xdisp<1] + 1 # avoid XY to be 0
ydisp=seq(ceiling(trees$quady[i]-trees$crownradius[i]),floor((trees$quady[i]+trees$crownradius[i])),1)
ydisp[ydisp>=500] <- 500 +(500 - ydisp[ydisp>=500])
ydisp[ydisp<1] <- -ydisp[ydisp<1] + 1
# Calculate the square of the x and y distances from the focal tree to the center of each of these cells
xdistsqr=(xdisp-trees$quadx[i])^2
ydistsqr=(ydisp-trees$quady[i])^2
nx=length(xdisp)
ny=length(ydisp)
# Calculate the distance from the center of each cell in the neighborhood to the focal tree
distmatrix=matrix(sqrt(rep(xdistsqr,each=ny)+rep(ydistsqr,nx)),nrow=nx,ncol=ny)
# includes only trees that overlap the grid cells
incmatrix=ifelse(distmatrix<trees$crownradius[i],1,0)
ncells=sum(incmatrix)
agbpercell=trees$agb[i]/ncells # divide the biomass by cell
addagbmatrix=incmatrix*agbpercell # relloacte biomass by cell
# add the biomass divided in square meter to each grid point
cdagb[xdisp,ydisp] = cdagb[xdisp,ydisp] + addagbmatrix
}