r 循环优化(getMinBBox with sf and data.table)
r loop optimisation (getMinBBox with sf and data.table)
我对如何加速我的代码有一些疑问。
我有一个包含超过 100000 个多边形的 shp,我想为每个多边形计算最小边界框。我在这里找到了这部分的一些代码:https://rdrr.io/cran/flightplanning/src/R/utils.R
代码一次从一个多边形的矩阵形式的坐标计算最小边界框。
我使用循环和 data.table 但是对于更大的 shp(50000+ 多边形)这个过程真的很耗时。
是否可以在 data.table 中将 getMinBBox() 与 lapply 一起使用?
谢谢
我的代码:
library(sf)
library(data.table)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc
nc=st_cast(nc,"POLYGON")
cor=as.data.frame(st_coordinates(nc))
cor
setDT(cor)
setDT(nc)
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(cor[L2==i,1:2])
l=getMinBBox(l)
data=rbind(data,t(l[2:3]))}
nc=cbind(nc,data)
nc
## execution time for each part of code inside loop
library(microbenchmark)
mb=microbenchmark(
l<-as.matrix(cor[L2==1,1:2]),
ll<-getMinBBox(l),
data<-rbind(data,t(ll[2:3]))
)
Unit: microseconds
expr min lq mean median uq max neval cld
l <- as.matrix(cor[L2 == 1, 1:2]) 2144.7 2478.40 2606.981 2574.05 2711.55 4396.3 100 c
ll <- getMinBBox(l) 885.3 976.05 1050.203 1054.10 1103.55 1788.3 100 b
data <- rbind(data, t(ll[2:3])) 152.6 182.95 207.617 201.60 224.05 546.1 100 a
## getMinBBox code from https://rdrr.io/cran/flightplanning/src/R/utils.R
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#' Provided an angle, calculate the corresponding minimum bounding box
#'
#' @param vertices the vertices which to get the bounding box from
#' @param alpha the angle to rotate the bounding box
#'
#' @importFrom grDevices chull
getBBoxAngle = function(vertices, alpha) {
centroid = apply(vertices, 2, mean)
angleRadians = (90-alpha) * pi/180
rotatedX = cos(angleRadians) * (vertices[, 1] - centroid[1]) -
sin(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[1]
rotatedY = sin(angleRadians) * (vertices[, 1] - centroid[1]) +
cos(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[2]
bBox = cbind(range(rotatedX), range(rotatedY))
height = diff(bBox[,2])
width = diff(bBox[,1])
bBox = rbind(bBox[1,],
c(bBox[1, 1], bBox[2, 2]),
(bBox[2, ]),
c(bBox[2, 1], bBox[1, 2]))
bBoxUnrotatedX = cos(-angleRadians) * (bBox[, 1] - centroid[1]) -
sin(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[1]
bBoxUnrotatedY = sin(-angleRadians) * (bBox[, 1] - centroid[1]) +
cos(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[2]
unrotatedBbox = cbind(bBoxUnrotatedX, bBoxUnrotatedY)
list(angle=alpha, height=height, width=width, pts=unrotatedBbox)
}
#' Given a xy matrix of points, adjust the
#' points to avoid acute angles < 80 degrees
#'
#' @param xy xy dataframe
#' @param angle angle of the flight lines
#' @param minAngle the minimum angle to below which will be projected
adjustAcuteAngles = function(xy, angle, minAngle = 80) {
xy_mat = as.matrix(xy[,1:2])
rads = angle*pi/180
nPoints = nrow(xy)
# Using cosine rule on vectors
# acos(theta) = (AB_vec x BC_vec) / (|AB| * |BC|)
vectors = xy[-1,]-xy[-nPoints,]
# Final angles for each point
angles = getAngles(xy_mat)
# Mask angles less than minimum
mask = angles <= minAngle
mask[is.na(mask)] = FALSE
# Index of point with angles less than minimum
indices = (seq_along(mask)+1)[mask]
indicesOffset = (indices %% 2 == 1) * 2 - 1
# Calculate perpendicular lines for points
x = xy[indices, 1]
y = xy[indices, 2]
nIndices = length(indices)
a = rep(tan(rads), nIndices)
b = y - a * x
a_perpendicular = -1/a
b_perpendicular = y - a_perpendicular*x
b2 = b_perpendicular
a2 = a_perpendicular
# Intersect previous straight line with the perpendicular
x = xy[indices-indicesOffset, 1]
y = xy[indices-indicesOffset, 2]
a1 = a
b1 = y - a1 * x
xs = (b2-b1)/(a1-a2)
ys = a2*xs+b2
# Replace angled points
xy[indices-indicesOffset, 1] = xs
xy[indices-indicesOffset, 2] = ys
return(xy)
}
#' Create outer curves for the flight lines
#'
#' @param waypoints the waypoints of the flight plan
#' @param angle angle for the flight lines
#' @param flightLineDistance the distance between the flight lines in meters
outerCurvePoints = function(waypoints, angle, flightLineDistance) {
mask = getAngles(waypoints) == 180
mask[is.na(mask)] = TRUE
rads = angle*pi/180
angularCoef = tan(rads)
nWaypoints = nrow(waypoints)
oddPoints = seq(1, nWaypoints, 2)
evenPoints = seq(2, nWaypoints, 2)
offsetX = waypoints[evenPoints,1]-waypoints[oddPoints,1]
intercept = waypoints[oddPoints, 2] - angularCoef*waypoints[oddPoints, 1]
xMove = rep(flightLineDistance / sqrt(1 + angularCoef**2), nWaypoints)
isNegative = rep(offsetX < 0, each=2)
isNegative[seq(1, nWaypoints, 2)] = !isNegative[seq(1, nWaypoints, 2)]
xMove[isNegative] = -xMove[isNegative]
yMove = xMove * angularCoef
xs = waypoints[, 1] + xMove
ys = waypoints[, 2] + yMove
curvePoints = as.data.frame(cbind(xs, ys))
curvePoints$index = seq(1, nWaypoints)
curvePoints$before = (curvePoints$index %% 2) == 1
curvePoints = curvePoints[!c(FALSE, mask),]
return(curvePoints)
}
#' Get angles for each point considering
#' the two neighbors points
#'
#' @param waypoints the waypoints of the flight plan
getAngles = function(waypoints) {
nPoints = nrow(waypoints)
vectors = waypoints[-1,]-waypoints[-nPoints,]
dists = sqrt(apply(vectors ** 2, 1, sum))
ab = vectors[-nPoints+1,]
bc = vectors[-1,]
dist_ab = dists[-nPoints+1]
dist_bc = dists[-1]
dotProds = sapply(seq_len(nPoints-2), function(x) ab[x,] %*% bc[x,])
# Final angles for each point
angles = suppressWarnings(180-round(acos(dotProds/(dist_ab*dist_bc))*180/pi,2))
angles
}
#' Class for Flight Parameters
setClass("Flight Parameters",
slots = c(
flight.line.distance = "numeric",
flight.speed.kmh = "numeric",
front.overlap = "numeric",
gsd = "numeric",
height = "numeric",
ground.height = "numeric",
minimum.shutter.speed = "character",
photo.interval = "numeric"
),
prototype = list(
flight.line.distance = NA_real_,
flight.speed.kmh = NA_real_,
front.overlap = NA_real_,
gsd = NA_real_,
ground.height = NA_real_,
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
The example code is so small that some approaches might only be faster if you were looking at larger data.tables. It seems to me that you could shave some time off by doing a split-combine approach, which, in principle, you could 运行 in parallel, e.g. by using mclapply
instead of lapply
(on linux/macos). You are probably also a bit faster if you use rbindlist
rather than rbind
and transpose.
On my machine this approach almost halves the execution time on this tiny set. Using mclapply(co, gmb2, mc.cores=4L)
or similar may be beneficial if the individual L2 groups are larger, but only hurts performance with this small dataset.
Short of changing the code of the getMinBBox
function, this would at least reduce the time somewhat and may be worth a try.
As a side note: using openblas or Intel's MKL instead of the default R libraries can make a big difference in the matrix calculations, so you should definitely make sure you use those.
# function code
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID): 4267
#> proj4string: +proj=longlat +datum=NAD27 +no_defs
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
# common code:
lapply(c("sf", "data.table", "microbenchmark"),
require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236 100 b
#> b2 3.956369 4.186426 9.919638 4.562312 19.13847 47.27808 100 a
A final edit: You can squeeze some more performance out of the getMinBBox function by simplifying it a bit.
# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- rbind(huDir, ouDir) %*% t(hull)
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
getMinBBox2 <- function(xy) {
# stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
# skip checking
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- tcrossprod(rbind(huDir, ouDir), hull)
HO <- list(seq_len(n), (seq_len(n)+n))
rr <- matrixStats::rowRanges(projMat)
rd <- as.numeric(matrixStats::rowDiffs(rr))
rangeH <- rr[HO[[1]],]
rangeO <- rr[HO[[2]],]
widths <- rd[HO[[1]]]
heights <- rd[HO[[2]]]
eMin <- which.min(widths*heights)
## seems like sweeping is not required here?
# hProj <- rbind( rangeH[eMin, ], 0)
# oProj <- rbind(0, rangeO[eMin, ])
# hPts <- sweep(hProj, 1, oProj[ , 1], "+")
# oPts <- sweep(hProj, 1, oProj[ , 2], "+")
hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
colnames(pts) <- c("X", "Y")
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb3))
}
all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924 100 c
#> b2 4.020513 4.071012 4.444000 4.166884 4.274048 7.728204 100 b
#> b3 2.871639 2.938294 3.111886 2.986174 3.046096 5.935133 100 a
昨天我发现了一些不同的方法来解决我的 package - parallel 问题。这是一种非常快速的方法,但我找不到如何在其自己的列中写入长度和宽度。
library(lidR)
library(sf)
library(raster)
library(data.table)
library(parallel)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc
setDT(nc)
overFun <- function(x){
unlist(getMinBBox(as.matrix(st_coordinates(x)[,1:2]))[2:3])
}
cl <- makeCluster(detectCores() - 1)
clusterExport(cl, list("getMinBBox","st_coordinates","data.table"))
nc[,c("width","length"):=.(clusterMap(cl, overFun, nc$geometry)),]
stopCluster(cl)
我对如何加速我的代码有一些疑问。
我有一个包含超过 100000 个多边形的 shp,我想为每个多边形计算最小边界框。我在这里找到了这部分的一些代码:https://rdrr.io/cran/flightplanning/src/R/utils.R
代码一次从一个多边形的矩阵形式的坐标计算最小边界框。
我使用循环和 data.table 但是对于更大的 shp(50000+ 多边形)这个过程真的很耗时。
是否可以在 data.table 中将 getMinBBox() 与 lapply 一起使用?
谢谢
我的代码:
library(sf)
library(data.table)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc
nc=st_cast(nc,"POLYGON")
cor=as.data.frame(st_coordinates(nc))
cor
setDT(cor)
setDT(nc)
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(cor[L2==i,1:2])
l=getMinBBox(l)
data=rbind(data,t(l[2:3]))}
nc=cbind(nc,data)
nc
## execution time for each part of code inside loop
library(microbenchmark)
mb=microbenchmark(
l<-as.matrix(cor[L2==1,1:2]),
ll<-getMinBBox(l),
data<-rbind(data,t(ll[2:3]))
)
Unit: microseconds
expr min lq mean median uq max neval cld
l <- as.matrix(cor[L2 == 1, 1:2]) 2144.7 2478.40 2606.981 2574.05 2711.55 4396.3 100 c
ll <- getMinBBox(l) 885.3 976.05 1050.203 1054.10 1103.55 1788.3 100 b
data <- rbind(data, t(ll[2:3])) 152.6 182.95 207.617 201.60 224.05 546.1 100 a
## getMinBBox code from https://rdrr.io/cran/flightplanning/src/R/utils.R
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#' Provided an angle, calculate the corresponding minimum bounding box
#'
#' @param vertices the vertices which to get the bounding box from
#' @param alpha the angle to rotate the bounding box
#'
#' @importFrom grDevices chull
getBBoxAngle = function(vertices, alpha) {
centroid = apply(vertices, 2, mean)
angleRadians = (90-alpha) * pi/180
rotatedX = cos(angleRadians) * (vertices[, 1] - centroid[1]) -
sin(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[1]
rotatedY = sin(angleRadians) * (vertices[, 1] - centroid[1]) +
cos(angleRadians) * (vertices[, 2] - centroid[2]) +
centroid[2]
bBox = cbind(range(rotatedX), range(rotatedY))
height = diff(bBox[,2])
width = diff(bBox[,1])
bBox = rbind(bBox[1,],
c(bBox[1, 1], bBox[2, 2]),
(bBox[2, ]),
c(bBox[2, 1], bBox[1, 2]))
bBoxUnrotatedX = cos(-angleRadians) * (bBox[, 1] - centroid[1]) -
sin(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[1]
bBoxUnrotatedY = sin(-angleRadians) * (bBox[, 1] - centroid[1]) +
cos(-angleRadians) * (bBox[, 2] - centroid[2]) +
centroid[2]
unrotatedBbox = cbind(bBoxUnrotatedX, bBoxUnrotatedY)
list(angle=alpha, height=height, width=width, pts=unrotatedBbox)
}
#' Given a xy matrix of points, adjust the
#' points to avoid acute angles < 80 degrees
#'
#' @param xy xy dataframe
#' @param angle angle of the flight lines
#' @param minAngle the minimum angle to below which will be projected
adjustAcuteAngles = function(xy, angle, minAngle = 80) {
xy_mat = as.matrix(xy[,1:2])
rads = angle*pi/180
nPoints = nrow(xy)
# Using cosine rule on vectors
# acos(theta) = (AB_vec x BC_vec) / (|AB| * |BC|)
vectors = xy[-1,]-xy[-nPoints,]
# Final angles for each point
angles = getAngles(xy_mat)
# Mask angles less than minimum
mask = angles <= minAngle
mask[is.na(mask)] = FALSE
# Index of point with angles less than minimum
indices = (seq_along(mask)+1)[mask]
indicesOffset = (indices %% 2 == 1) * 2 - 1
# Calculate perpendicular lines for points
x = xy[indices, 1]
y = xy[indices, 2]
nIndices = length(indices)
a = rep(tan(rads), nIndices)
b = y - a * x
a_perpendicular = -1/a
b_perpendicular = y - a_perpendicular*x
b2 = b_perpendicular
a2 = a_perpendicular
# Intersect previous straight line with the perpendicular
x = xy[indices-indicesOffset, 1]
y = xy[indices-indicesOffset, 2]
a1 = a
b1 = y - a1 * x
xs = (b2-b1)/(a1-a2)
ys = a2*xs+b2
# Replace angled points
xy[indices-indicesOffset, 1] = xs
xy[indices-indicesOffset, 2] = ys
return(xy)
}
#' Create outer curves for the flight lines
#'
#' @param waypoints the waypoints of the flight plan
#' @param angle angle for the flight lines
#' @param flightLineDistance the distance between the flight lines in meters
outerCurvePoints = function(waypoints, angle, flightLineDistance) {
mask = getAngles(waypoints) == 180
mask[is.na(mask)] = TRUE
rads = angle*pi/180
angularCoef = tan(rads)
nWaypoints = nrow(waypoints)
oddPoints = seq(1, nWaypoints, 2)
evenPoints = seq(2, nWaypoints, 2)
offsetX = waypoints[evenPoints,1]-waypoints[oddPoints,1]
intercept = waypoints[oddPoints, 2] - angularCoef*waypoints[oddPoints, 1]
xMove = rep(flightLineDistance / sqrt(1 + angularCoef**2), nWaypoints)
isNegative = rep(offsetX < 0, each=2)
isNegative[seq(1, nWaypoints, 2)] = !isNegative[seq(1, nWaypoints, 2)]
xMove[isNegative] = -xMove[isNegative]
yMove = xMove * angularCoef
xs = waypoints[, 1] + xMove
ys = waypoints[, 2] + yMove
curvePoints = as.data.frame(cbind(xs, ys))
curvePoints$index = seq(1, nWaypoints)
curvePoints$before = (curvePoints$index %% 2) == 1
curvePoints = curvePoints[!c(FALSE, mask),]
return(curvePoints)
}
#' Get angles for each point considering
#' the two neighbors points
#'
#' @param waypoints the waypoints of the flight plan
getAngles = function(waypoints) {
nPoints = nrow(waypoints)
vectors = waypoints[-1,]-waypoints[-nPoints,]
dists = sqrt(apply(vectors ** 2, 1, sum))
ab = vectors[-nPoints+1,]
bc = vectors[-1,]
dist_ab = dists[-nPoints+1]
dist_bc = dists[-1]
dotProds = sapply(seq_len(nPoints-2), function(x) ab[x,] %*% bc[x,])
# Final angles for each point
angles = suppressWarnings(180-round(acos(dotProds/(dist_ab*dist_bc))*180/pi,2))
angles
}
#' Class for Flight Parameters
setClass("Flight Parameters",
slots = c(
flight.line.distance = "numeric",
flight.speed.kmh = "numeric",
front.overlap = "numeric",
gsd = "numeric",
height = "numeric",
ground.height = "numeric",
minimum.shutter.speed = "character",
photo.interval = "numeric"
),
prototype = list(
flight.line.distance = NA_real_,
flight.speed.kmh = NA_real_,
front.overlap = NA_real_,
gsd = NA_real_,
ground.height = NA_real_,
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
The example code is so small that some approaches might only be faster if you were looking at larger data.tables. It seems to me that you could shave some time off by doing a split-combine approach, which, in principle, you could 运行 in parallel, e.g. by using mclapply
instead of lapply
(on linux/macos). You are probably also a bit faster if you use rbindlist
rather than rbind
and transpose.
On my machine this approach almost halves the execution time on this tiny set. Using mclapply(co, gmb2, mc.cores=4L)
or similar may be beneficial if the individual L2 groups are larger, but only hurts performance with this small dataset.
Short of changing the code of the getMinBBox
function, this would at least reduce the time somewhat and may be worth a try.
As a side note: using openblas or Intel's MKL instead of the default R libraries can make a big difference in the matrix calculations, so you should definitely make sure you use those.
# function code
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
## rotating calipers algorithm using the convex hull
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
## unit basis vectors for all subspaces spanned by the hull edges
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
## unit basis vectors for the orthogonal subspaces
## rotation by 90 deg -> y' = x, x' = -y
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
## project hull vertices on the subspaces spanned by the hull edges, and on
## the subspaces spanned by their orthogonal complements - in subspace coords
projMat <- rbind(huDir, ouDir) %*% t(hull)
## range of projections and corresponding width/height of bounding rectangle
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
## the orthogonal subspace is in the 2nd half of the matrix
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
## extreme projections for min-area rect in subspace coordinates
## hull edge leading to minimum-area
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
## move projections to rectangle corners
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
## corners in standard coordinates, rows = x,y, columns = corners
## in combined (4x2)-matrix: reverse point order to be usable in polygon()
## basis formed by hull edge and orthogonal subspace
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
## angle of longer edge pointing up
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
#> Reading layer `nc' from data source `/home/b/R/x86_64-pc-linux-gnu-library/4.0/sf/shape/nc.shp' using driver `ESRI Shapefile'
#> Simple feature collection with 100 features and 14 fields
#> geometry type: MULTIPOLYGON
#> dimension: XY
#> bbox: xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
#> epsg (SRID): 4267
#> proj4string: +proj=longlat +datum=NAD27 +no_defs
height = NA_real_,
minimum.shutter.speed = NA_character_,
photo.interval = NA_real_
)
)
# common code:
lapply(c("sf", "data.table", "microbenchmark"),
require, character.only = TRUE)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
microbenchmark(b1 = getbox1(), b2 = getbox2())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.804903 8.391429 20.539996 9.870947 38.71889 66.95236 100 b
#> b2 3.956369 4.186426 9.919638 4.562312 19.13847 47.27808 100 a
A final edit: You can squeeze some more performance out of the getMinBBox function by simplifying it a bit.
# original function code (removed some comments for brevity)
getMinBBox <- function(xy) {
stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- diff(rbind(hull, hull[1, ])) ## hull vertices are circular
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- rbind(huDir, ouDir) %*% t(hull)
rangeH <- matrix(numeric(n*2), ncol=2) ## hull edge
rangeO <- matrix(numeric(n*2), ncol=2) ## orthogonal subspace
widths <- numeric(n)
heights <- numeric(n)
for(i in seq(along=numeric(n))) {
rangeH[i, ] <- range(projMat[ i, ])
rangeO[i, ] <- range(projMat[n+i, ])
widths[i] <- abs(diff(rangeH[i, ]))
heights[i] <- abs(diff(rangeO[i, ]))
}
eMin <- which.min(widths*heights)
hProj <- rbind( rangeH[eMin, ], 0)
oProj <- rbind(0, rangeO[eMin, ])
hPts <- sweep(hProj, 1, oProj[ , 1], "+")
oPts <- sweep(hProj, 1, oProj[ , 2], "+")
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# common code:
lapply(c("sf", "data.table", "microbenchmark", "compiler"),
require, character.only = TRUE)
compiler::enableJIT(3)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc <- st_cast(nc,"POLYGON")
#> Warning in st_cast.sf(nc, "POLYGON"): repeating attributes for all sub-
#> geometries for which they may not be constant
coor <- as.data.frame(st_coordinates(nc))
setDT(coor)
setDT(nc)
getMinBBox2 <- function(xy) {
# stopifnot(is.matrix(xy), is.numeric(xy), nrow(xy) >= 2, ncol(xy) == 2)
# skip checking
H <- grDevices::chull(xy) ## hull indices, vertices ordered clockwise
n <- length(H) ## number of hull vertices
hull <- xy[H, ] ## hull vertices
hDir <- matrixStats::colDiffs(rbind(hull, hull[1, ]))
hLens <- sqrt(rowSums(hDir^2)) ## length of basis vectors
huDir <- diag(1/hLens) %*% hDir ## scaled to unit length
ouDir <- cbind(-huDir[ , 2], huDir[ , 1])
projMat <- tcrossprod(rbind(huDir, ouDir), hull)
HO <- list(seq_len(n), (seq_len(n)+n))
rr <- matrixStats::rowRanges(projMat)
rd <- as.numeric(matrixStats::rowDiffs(rr))
rangeH <- rr[HO[[1]],]
rangeO <- rr[HO[[2]],]
widths <- rd[HO[[1]]]
heights <- rd[HO[[2]]]
eMin <- which.min(widths*heights)
## seems like sweeping is not required here?
# hProj <- rbind( rangeH[eMin, ], 0)
# oProj <- rbind(0, rangeO[eMin, ])
# hPts <- sweep(hProj, 1, oProj[ , 1], "+")
# oPts <- sweep(hProj, 1, oProj[ , 2], "+")
hPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 1], 2))
oPts <- rbind(rangeH[eMin, ], rep(rangeO[eMin, 2], 2))
basis <- cbind(huDir[eMin, ], ouDir[eMin, ])
hCorn <- basis %*% hPts
oCorn <- basis %*% oPts
pts <- t(cbind(hCorn, oCorn[ , c(2, 1)]))
colnames(pts) <- c("X", "Y")
dPts <- diff(pts)
e <- dPts[which.max(rowSums(dPts^2)), ] ## one of the longer edges
eUp <- e * sign(e[2]) ## rotate upwards 180 deg if necessary
deg <- atan2(eUp[2], eUp[1])*180 / pi ## angle in degrees
return(list(pts=pts, width=heights[eMin], height=widths[eMin], angle=deg))
}
# your code (wrapped in a function):
getbox1 <- function(){
data <- data.table(width=numeric(), height=numeric())
for(i in 1:nrow(nc)){
l=as.matrix(coor[L2==i,1:2])
ll=getMinBBox(l)
data=rbind(data,t(ll[2:3]))}
data
}
## split - apply code:
gmb2 <- function(i) getMinBBox(as.matrix(i[,1:2]))[c(2:3)]
gmb3 <- function(i) getMinBBox2(as.matrix(i[,1:2]))[c(2:3)]
getbox2 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb2))
}
getbox3 <- function(){
co <- split(coor, coor$L2)
rbindlist(lapply(co, gmb3))
}
all(identical(getbox1(), getbox2()), identical(getbox2(), getbox3()))
#> [1] TRUE
microbenchmark(b1 = getbox1(), b2 = getbox2(), b3=getbox3())
#> Unit: milliseconds
#> expr min lq mean median uq max neval cld
#> b1 7.852630 8.067991 8.862175 8.269768 8.632328 14.474924 100 c
#> b2 4.020513 4.071012 4.444000 4.166884 4.274048 7.728204 100 b
#> b3 2.871639 2.938294 3.111886 2.986174 3.046096 5.935133 100 a
昨天我发现了一些不同的方法来解决我的 package - parallel 问题。这是一种非常快速的方法,但我找不到如何在其自己的列中写入长度和宽度。
library(lidR)
library(sf)
library(raster)
library(data.table)
library(parallel)
nc <- st_read(system.file("shape/nc.shp", package="sf"))[1:4,]
nc
setDT(nc)
overFun <- function(x){
unlist(getMinBBox(as.matrix(st_coordinates(x)[,1:2]))[2:3])
}
cl <- makeCluster(detectCores() - 1)
clusterExport(cl, list("getMinBBox","st_coordinates","data.table"))
nc[,c("width","length"):=.(clusterMap(cl, overFun, nc$geometry)),]
stopCluster(cl)