r 循环优化（getMinBBox with sf and data.table）

Question

我对如何加速我的代码有一些疑问。

我有一个包含超过 100000 个多边形的 shp，我想为每个多边形计算最小边界框。我在这里找到了这部分的一些代码：https://rdrr.io/cran/flightplanning/src/R/utils.R

代码一次从一个多边形的矩阵形式的坐标计算最小边界框。

我使用循环和 data.table 但是对于更大的 shp（50000+ 多边形）这个过程真的很耗时。

是否可以在 data.table 中将 getMinBBox() 与 lapply 一起使用？

谢谢

我的代码：

library(sf)
library(data.table)

nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc

nc=st_cast(nc,"POLYGON")
cor=as.data.frame(st_coordinates(nc))
cor
setDT(cor)
setDT(nc)
data <- data.table(width=numeric(), height=numeric())

for(i in 1:nrow(nc)){
    l=as.matrix(cor[L2==i,1:2])
    l=getMinBBox(l)
    data=rbind(data,t(l[2:3]))}

nc=cbind(nc,data)
nc


## execution time for each part of code inside loop
library(microbenchmark)
mb=microbenchmark(
l<-as.matrix(cor[L2==1,1:2]),
ll<-getMinBBox(l),
data<-rbind(data,t(ll[2:3]))
)

Unit: microseconds
                              expr    min      lq     mean  median      uq    max neval cld
 l <- as.matrix(cor[L2 == 1, 1:2]) 2144.7 2478.40 2606.981 2574.05 2711.55 4396.3   100   c
               ll <- getMinBBox(l)  885.3  976.05 1050.203 1054.10 1103.55 1788.3   100  b 
   data <- rbind(data, t(ll[2:3]))  152.6  182.95  207.617  201.60  224.05  546.1   100 a  




## getMinBBox code from https://rdrr.io/cran/flightplanning/src/R/utils.R


getMinBBox <- function(xy) {
  stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)

  ## rotating calipers algorithm using the convex hull
  H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
  n    <- length(H)      ## number of hull vertices
  hull <- xy[H, ]        ## hull vertices

  ## unit basis vectors for all subspaces spanned by the hull edges
  hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
  hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
  huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length

  ## unit basis vectors for the orthogonal subspaces
  ## rotation by 90 deg -> y' = x, x' = -y
  ouDir <- cbind(-huDir[ , 2], huDir[ , 1])

  ## project hull vertices on the subspaces spanned by the hull edges, and on
  ## the subspaces spanned by their orthogonal complements - in subspace coords
  projMat <- rbind(huDir, ouDir) %*% t(hull)

  ## range of projections and corresponding width/height of bounding rectangle
  rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
  rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
  widths  <- numeric(n)
  heights <- numeric(n)

  for(i in seq(along=numeric(n))) {
    rangeH[i, ] <- range(projMat[  i, ])

    ## the orthogonal subspace is in the 2nd half of the matrix
    rangeO[i, ] <- range(projMat[n+i, ])
    widths[i]   <- abs(diff(rangeH[i, ]))
    heights[i]  <- abs(diff(rangeO[i, ]))
  }

  ## extreme projections for min-area rect in subspace coordinates
  ## hull edge leading to minimum-area
  eMin  <- which.min(widths*heights)
  hProj <- rbind(   rangeH[eMin, ], 0)
  oProj <- rbind(0, rangeO[eMin, ])

  ## move projections to rectangle corners
  hPts <- sweep(hProj, 1, oProj[ , 1], "+")
  oPts <- sweep(hProj, 1, oProj[ , 2], "+")

  ## corners in standard coordinates, rows = x,y, columns = corners
  ## in combined (4x2)-matrix: reverse point order to be usable in polygon()
  ## basis formed by hull edge and orthogonal subspace
  basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
  hCorn <- basis %*% hPts
  oCorn <- basis %*% oPts
  pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))

  ## angle of longer edge pointing up
  dPts <- diff(pts)
  e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
  eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
  deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees

  return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}

#' Provided an angle, calculate the corresponding minimum bounding box
#'
#' @param vertices the vertices which to get the bounding box from
#' @param alpha the angle to rotate the bounding box
#'
#' @importFrom grDevices chull
getBBoxAngle = function(vertices, alpha) {
  centroid = apply(vertices, 2, mean)

  angleRadians = (90-alpha) * pi/180

  rotatedX = cos(angleRadians) * (vertices[, 1] - centroid[1]) -
             sin(angleRadians) * (vertices[, 2] - centroid[2]) +
             centroid[1]
  rotatedY = sin(angleRadians) * (vertices[, 1] - centroid[1]) +
             cos(angleRadians) * (vertices[, 2] - centroid[2]) +
             centroid[2]

  bBox = cbind(range(rotatedX), range(rotatedY))
  height = diff(bBox[,2])
  width = diff(bBox[,1])

  bBox = rbind(bBox[1,],
               c(bBox[1, 1], bBox[2, 2]),
               (bBox[2, ]),
               c(bBox[2, 1], bBox[1, 2]))

  bBoxUnrotatedX = cos(-angleRadians) * (bBox[, 1] - centroid[1]) -
                   sin(-angleRadians) * (bBox[, 2] - centroid[2]) +
                   centroid[1]
  bBoxUnrotatedY = sin(-angleRadians) * (bBox[, 1] - centroid[1]) +
                   cos(-angleRadians) * (bBox[, 2] - centroid[2]) +
                   centroid[2]
  unrotatedBbox = cbind(bBoxUnrotatedX, bBoxUnrotatedY)
  list(angle=alpha, height=height, width=width, pts=unrotatedBbox)
}

#' Given a xy matrix of points, adjust the
#' points to avoid acute angles < 80 degrees
#'
#' @param xy xy dataframe
#' @param angle angle of the flight lines
#' @param minAngle the minimum angle to below which will be projected
adjustAcuteAngles = function(xy, angle, minAngle = 80) {
  xy_mat = as.matrix(xy[,1:2])
  rads = angle*pi/180
  nPoints = nrow(xy)

  # Using cosine rule on vectors
  # acos(theta) = (AB_vec x BC_vec) / (|AB| * |BC|)
  vectors = xy[-1,]-xy[-nPoints,]

  # Final angles for each point
  angles = getAngles(xy_mat)

  # Mask angles less than minimum
  mask = angles <= minAngle
  mask[is.na(mask)] = FALSE
  # Index of point with angles less than minimum
  indices = (seq_along(mask)+1)[mask]

  indicesOffset = (indices %% 2 == 1) * 2 - 1

  # Calculate perpendicular lines for points
  x = xy[indices, 1]
  y = xy[indices, 2]
  nIndices = length(indices)
  a = rep(tan(rads), nIndices)
  b = y - a * x
  a_perpendicular = -1/a
  b_perpendicular = y - a_perpendicular*x


  b2 = b_perpendicular
  a2 = a_perpendicular

  # Intersect previous straight line with the perpendicular
  x = xy[indices-indicesOffset, 1]
  y = xy[indices-indicesOffset, 2]
  a1 = a
  b1 = y - a1 * x
  xs = (b2-b1)/(a1-a2)
  ys = a2*xs+b2

  # Replace angled points
  xy[indices-indicesOffset, 1] = xs
  xy[indices-indicesOffset, 2] = ys
  return(xy)
}

#' Create outer curves for the flight lines
#'
#' @param waypoints the waypoints of the flight plan
#' @param angle angle for the flight lines
#' @param flightLineDistance the distance between the flight lines in meters
outerCurvePoints = function(waypoints, angle, flightLineDistance) {
  mask = getAngles(waypoints) == 180
  mask[is.na(mask)] = TRUE

  rads = angle*pi/180
  angularCoef = tan(rads)
  nWaypoints = nrow(waypoints)

  oddPoints = seq(1, nWaypoints, 2)
  evenPoints = seq(2, nWaypoints, 2)

  offsetX = waypoints[evenPoints,1]-waypoints[oddPoints,1]

  intercept = waypoints[oddPoints, 2] - angularCoef*waypoints[oddPoints, 1]
  xMove = rep(flightLineDistance / sqrt(1 + angularCoef**2), nWaypoints)
  isNegative = rep(offsetX < 0, each=2)
  isNegative[seq(1, nWaypoints, 2)] = !isNegative[seq(1, nWaypoints, 2)]
  xMove[isNegative] = -xMove[isNegative]
  yMove = xMove * angularCoef

  xs = waypoints[, 1] + xMove
  ys = waypoints[, 2] + yMove

  curvePoints = as.data.frame(cbind(xs, ys))
  curvePoints$index = seq(1, nWaypoints)
  curvePoints$before = (curvePoints$index %% 2) == 1
  curvePoints = curvePoints[!c(FALSE, mask),]

  return(curvePoints)
}

#' Get angles for each point considering
#' the two neighbors points
#'
#' @param waypoints the waypoints of the flight plan
getAngles = function(waypoints) {
  nPoints = nrow(waypoints)
  vectors = waypoints[-1,]-waypoints[-nPoints,]
  dists = sqrt(apply(vectors ** 2, 1, sum))

  ab = vectors[-nPoints+1,]
  bc = vectors[-1,]
  dist_ab = dists[-nPoints+1]
  dist_bc = dists[-1]
  dotProds = sapply(seq_len(nPoints-2), function(x) ab[x,] %*% bc[x,])
  # Final angles for each point
  angles = suppressWarnings(180-round(acos(dotProds/(dist_ab*dist_bc))*180/pi,2))
  angles
}

#' Class for Flight Parameters
setClass("Flight Parameters",
         slots = c(
           flight.line.distance  = "numeric",
           flight.speed.kmh      = "numeric",
           front.overlap         = "numeric",
           gsd                   = "numeric",
           height                = "numeric",
           ground.height         = "numeric",
           minimum.shutter.speed = "character",
           photo.interval        = "numeric"
         ),
         prototype = list(
           flight.line.distance  = NA_real_,
           flight.speed.kmh      = NA_real_,
           front.overlap         = NA_real_,
           gsd                   = NA_real_,
           ground.height         = NA_real_,
           height                = NA_real_,
           minimum.shutter.speed = NA_character_,
           photo.interval        = NA_real_
         )
)

Answer 1

The example code is so small that some approaches might only be faster if you were looking at larger data.tables. It seems to me that you could shave some time off by doing a split-combine approach, which, in principle, you could 运行 in parallel, e.g. by using mclapply instead of lapply (on linux/macos). You are probably also a bit faster if you use rbindlist rather than rbind and transpose. On my machine this approach almost halves the execution time on this tiny set. Using mclapply(co, gmb2, mc.cores=4L) or similar may be beneficial if the individual L2 groups are larger, but only hurts performance with this small dataset. Short of changing the code of the getMinBBox function, this would at least reduce the time somewhat and may be worth a try.

As a side note: using openblas or Intel's MKL instead of the default R libraries can make a big difference in the matrix calculations, so you should definitely make sure you use those.

# function code
    getMinBBox <- function(xy) {
        stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)

        ## rotating calipers algorithm using the convex hull
        H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
        n    <- length(H)      ## number of hull vertices
        hull <- xy[H, ]        ## hull vertices

        ## unit basis vectors for all subspaces spanned by the hull edges
        hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
        hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
        huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length

        ## unit basis vectors for the orthogonal subspaces
        ## rotation by 90 deg -> y' = x, x' = -y
        ouDir <- cbind(-huDir[ , 2], huDir[ , 1])

        ## project hull vertices on the subspaces spanned by the hull edges, and on
        ## the subspaces spanned by their orthogonal complements - in subspace coords
        projMat <- rbind(huDir, ouDir) %*% t(hull)

        ## range of projections and corresponding width/height of bounding rectangle
        rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
        rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
        widths  <- numeric(n)
        heights <- numeric(n)

        for(i in seq(along=numeric(n))) {
            rangeH[i, ] <- range(projMat[  i, ])

            ## the orthogonal subspace is in the 2nd half of the matrix
            rangeO[i, ] <- range(projMat[n+i, ])
            widths[i]   <- abs(diff(rangeH[i, ]))
            heights[i]  <- abs(diff(rangeO[i, ]))
        }

        ## extreme projections for min-area rect in subspace coordinates
        ## hull edge leading to minimum-area
        eMin  <- which.min(widths*heights)
        hProj <- rbind(   rangeH[eMin, ], 0)
        oProj <- rbind(0, rangeO[eMin, ])

        ## move projections to rectangle corners
        hPts <- sweep(hProj, 1, oProj[ , 1], "+")
        oPts <- sweep(hProj, 1, oProj[ , 2], "+")

        ## corners in standard coordinates, rows = x,y, columns = corners
        ## in combined (4x2)-matrix: reverse point order to be usable in polygon()
        ## basis formed by hull edge and orthogonal subspace
        basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
        hCorn <- basis %*% hPts
        oCorn <- basis %*% oPts
        pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))

        ## angle of longer edge pointing up
        dPts <- diff(pts)
        e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
        eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
        deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees

        return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
    }

#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type:  MULTIPOLYGON
#> dimension:      XY
#> bbox:           xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID):    4267
#> proj4string:    +proj=longlat +datum=NAD27 +no_defs
            height                = NA_real_,
            minimum.shutter.speed = NA_character_,
            photo.interval        = NA_real_
        )
    )


# common code:
lapply(c("sf", "data.table", "microbenchmark"),
    require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)

# your code (wrapped in a function):
getbox1 <- function(){
    data <- data.table(width=numeric(), height=numeric())
    for(i in 1:nrow(nc)){
        l=as.matrix(coor[L2==i,1:2])
        ll=getMinBBox(l)
        data=rbind(data,t(ll[2:3]))}
    data
}

## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb2))
}

microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#>  expr      min       lq      mean   median       uq      max neval cld
#>    b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236   100   b
#>    b2 3.956369 4.186426  9.919638 4.562312 19.13847 47.27808   100  a

A final edit: You can squeeze some more performance out of the getMinBBox function by simplifying it a bit.

# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
    stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
    H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
    n    <- length(H)      ## number of hull vertices
    hull <- xy[H, ]        ## hull vertices
    hDir  <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
    hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
    huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length
    ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
    projMat <- rbind(huDir, ouDir) %*% t(hull)
    rangeH  <- matrix(numeric(n*2), ncol=2)  ## hull edge
    rangeO  <- matrix(numeric(n*2), ncol=2)  ## orthogonal subspace
    widths  <- numeric(n)
    heights <- numeric(n)
    for(i in seq(along=numeric(n))) {
        rangeH[i, ] <- range(projMat[  i, ])
        rangeO[i, ] <- range(projMat[n+i, ])
        widths[i]   <- abs(diff(rangeH[i, ]))
        heights[i]  <- abs(diff(rangeO[i, ]))
    }
    eMin  <- which.min(widths*heights)
    hProj <- rbind(   rangeH[eMin, ], 0)
    oProj <- rbind(0, rangeO[eMin, ])
    hPts <- sweep(hProj, 1, oProj[ , 1], "+")
    oPts <- sweep(hProj, 1, oProj[ , 2], "+")
    basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
    hCorn <- basis %*% hPts
    oCorn <- basis %*% oPts
    pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
    dPts <- diff(pts)
    e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
    eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
    deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees
    return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}


# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
    require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)

getMinBBox2 <- function(xy) {
    # stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
    # skip checking
    H    <- grDevices::chull(xy)      ## hull indices, vertices ordered clockwise
    n    <- length(H)      ## number of hull vertices
    hull <- xy[H, ]        ## hull vertices
    hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
    hLens <- sqrt(rowSums(hDir^2))        ## length of basis vectors
    huDir <- diag(1/hLens) %*% hDir       ## scaled to unit length
    ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
    projMat <- tcrossprod(rbind(huDir, ouDir), hull)
    HO <- list(seq_len(n), (seq_len(n)+n))
    rr <- matrixStats::rowRanges(projMat)
    rd <- as.numeric(matrixStats::rowDiffs(rr))
    rangeH <- rr[HO[[1]],]
    rangeO <- rr[HO[[2]],]
    widths <- rd[HO[[1]]]
    heights <- rd[HO[[2]]]
    eMin  <- which.min(widths*heights)
    ## seems like sweeping is not required here?
    # hProj <- rbind(   rangeH[eMin, ], 0)
    # oProj <- rbind(0, rangeO[eMin, ])
    # hPts <- sweep(hProj, 1, oProj[ , 1], "+")
    # oPts <- sweep(hProj, 1, oProj[ , 2], "+")
    hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
    oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
    basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
    hCorn <- basis %*% hPts
    oCorn <- basis %*% oPts
    pts   <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
    colnames(pts) <- c("X", "Y")
    dPts <- diff(pts)
    e    <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
    eUp  <- e * sign(e[2])       ## rotate upwards 180 deg if necessary
    deg  <- atan2(eUp[2], eUp[1])*180 / pi     ## angle in degrees
    return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}

# your code (wrapped in a function):
getbox1 <- function(){
    data <- data.table(width=numeric(), height=numeric())
    for(i in 1:nrow(nc)){
        l=as.matrix(coor[L2==i,1:2])
        ll=getMinBBox(l)
        data=rbind(data,t(ll[2:3]))}
    data
}

## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
    co <- split(coor, coor$L2)
    rbindlist(lapply(co, gmb3))
}

all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#>  expr      min       lq     mean   median       uq       max neval cld
#>    b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924   100   c
#>    b2 4.020513 4.071012 4.444000 4.166884 4.274048  7.728204   100  b 
#>    b3 2.871639 2.938294 3.111886 2.986174 3.046096  5.935133   100 a

Answer 2

昨天我发现了一些不同的方法来解决我的 package - parallel 问题。这是一种非常快速的方法，但我找不到如何在其自己的列中写入长度和宽度。

library(lidR)
library(sf) 
library(raster)
library(data.table)
library(parallel)

nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc

setDT(nc)

overFun <- function(x){
  unlist(getMinBBox(as.matrix(st_coordinates(x)[,1:2]))[2:3])
}


cl <- makeCluster(detectCores() - 1)
clusterExport(cl, list("getMinBBox","st_coordinates","data.table"))
nc[,c("width","length"):=.(clusterMap(cl, overFun, nc$geometry)),]
stopCluster(cl)

r 循环优化（getMinBBox with sf and data.table）

r loop optimisation (getMinBBox with sf and data.table)

r

data.table

sf