使用 R 的多个多边形中的点
Points in multiple polygons using R
目前我有两个 data.frames,其中一个是多边形 (poly.x, poly.y, enum)
和一个点 (pt.x, pt.y)
,其中 enum
是多边形的 ID。我试图确定哪些点属于哪些多边形,所以我得到 (pt.x, pt.y, enum)
的 data.frame。
我第一次尝试使用 sp
包中的 point.in.polygon
和 lapply
函数来查找该点属于哪个多边形。虽然我的代码有效,但在大型数据集上需要 long 时间。
我的第二次尝试使用 over
也来自 sp
包,从 gis stackexchange 上的问题拼凑而成。虽然它 快 很多,但我似乎无法从 over
获得正确的输出,因为它是 1
s 和 NA
s 的数据帧。
下面我提供了一个简化的工作示例(npoly
可以更改以测试不同方法的速度)以及我使用 sp::point.in.polygon
的工作尝试和我的 sp::over
尝试。只要速度快,我就不介意最终使用哪种方法。
如有任何帮助,我们将不胜感激!
#-------------------------------------------
# Libraries
library(ggplot2) # sample plots
library(dplyr) # bind_rows(), etc
library(sp) # spatial data
# Sample data
npoly = 100
# polygons
localpolydf <- data.frame(
x = rep(c(0, 1, 1, 0), npoly) + rep(0:(npoly-1), each = 4),
y = rep(c(0, 0, 1, 1), npoly),
enum = rep(1:npoly, each = 4))
# points
offsetdf <- data.frame(
x = seq(min(localpolydf$x) - 0.5, max(localpolydf$x) + 0.5, by = 0.5),
y = runif(npoly*2 + 3, 0, 1))
# Sample plot
ggplot() +
geom_polygon(aes(x, y, group = enum),
localpolydf, fill = NA, colour = "black") +
geom_point(aes(x, y), offsetdf)
#-------------------------------------------
# Dplyr and lapply solution for point.in.polygon
ptm <- proc.time() # Start timer
# create lists
offsetlist <- split(offsetdf, rownames(offsetdf))
polygonlist <- split(localpolydf, localpolydf$enum)
# lapply over each pt in offsetlist
pts <- lapply(offsetlist, function(pt) {
# lapply over each polygon in polygonlist
ptpoly <- lapply(polygonlist, function(poly) {
data.frame(
enum = poly$enum[1],
ptin = point.in.polygon(pt[1,1], pt[1,2], poly$x, poly$y))
})
ptpoly <- bind_rows(ptpoly) %>% filter(ptin != 0)
if (nrow(ptpoly) == 0) return(data.frame(x = pt$x, y = pt$y, enum = NA, ptin = NA))
ptpoly$x = pt$x
ptpoly$y = pt$y
return(ptpoly[c("x", "y", "enum", "ptin")])
})
pts_apply <- bind_rows(pts)
proc.time() - ptm # end timer
#-------------------------------------------
# Attempted sp solution for over
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
polygonlist <- lapply(polygonlist, function(x) x[,c("x", "y")])
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, Polygon)
polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(list(polygonsp))
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
proc.time() - ptm # end timer
#===========================================
# Output
# Apply: point.in.polygon
> head(pts_apply)
x y enum ptin
1 -0.5 0.2218138 NA NA
2 4.0 0.9785541 4 2
3 4.0 0.9785541 5 2
4 49.0 0.3971479 49 2
5 49.0 0.3971479 50 2
6 49.5 0.1177206 50 1
user system elapsed
4.434 0.002 4.435
# SP: over
> head(pts_sp)
1 2 3 4 5 6
NA 1 1 NA 1 NA
user system elapsed
0.048 0.000 0.047
over
returns 几何中点的索引。也许是这样的:
xy <- offsetps[names(na.omit(pts_sp == 1)), ]
plot(polygonsp, axes = 1, xlim = c(0, 10))
points(offsetps)
points(xy, col = "red")
再看一眼后,我意识到 Roman 做了 pts_sp == 1
,因为我的所有方块只有 1 个 ID,即当我做了 ID = 1
.
一旦我解决了这个问题,我就可以使用 ID = enum
列了。要处理多个多边形中的点,我可以使用 returnList = TRUE
并添加额外的行以将列表转换为 data.frame 但这里不需要。
# Attempted sp solution
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, function(poly){
Polygons(list(Polygon(poly[, c("x", "y")])), ID = poly[1, "enum"])
})
# polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(polygonsp)
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
pts_sp <- data.frame(
x = offsetps$x, y = offsetps$y,
enum = unique(localpolydf$enum)[pts_sp])
proc.time() - ptm # end timer
使用 over
的替代方法是使用 sf::intersection
,因为 sf 包越来越受欢迎。
将数据放入 sf 对象需要我做一些工作,但如果您使用的是外部数据,您只需使用 st_read
读入,它就已经是正确的形式了。
方法如下:
library(tidyverse)
library(sf)
# convert into st_polygon friendly format (all polygons must be closed)
# must be a nicer way to do this!
localpoly <- localpolydf %>% split(localpolydf$enum) %>%
lapply(function(x) rbind(x,x[1,])) %>%
lapply(function(x) x[,1:2]) %>%
lapply(function(x) list(as.matrix(x))) %>%
lapply(function(x) st_polygon(x))
# convert points into sf object
points <- st_as_sf(offsetdf,coords=c('x','y'),remove = F)
#convert polygons to sf object and add id column
polys <- localpoly %>% st_sfc() %>% st_sf(geom=.) %>%
mutate(id=factor(1:100))
#find intersection
joined <- polys %>% st_intersection(points)
# Sample plot
ggplot() + geom_sf(data=polys) +
geom_sf(data=joined %>% filter(id %in% c(1:10)),aes(col=id)) +
lims(x=c(0,10))
请注意,在撰写本文时要使用 geom_sf,您需要安装 ggplot 的开发版本。
绘图输出:
目前我有两个 data.frames,其中一个是多边形 (poly.x, poly.y, enum)
和一个点 (pt.x, pt.y)
,其中 enum
是多边形的 ID。我试图确定哪些点属于哪些多边形,所以我得到 (pt.x, pt.y, enum)
的 data.frame。
我第一次尝试使用 sp
包中的 point.in.polygon
和 lapply
函数来查找该点属于哪个多边形。虽然我的代码有效,但在大型数据集上需要 long 时间。
我的第二次尝试使用 over
也来自 sp
包,从 gis stackexchange 上的问题拼凑而成。虽然它 快 很多,但我似乎无法从 over
获得正确的输出,因为它是 1
s 和 NA
s 的数据帧。
下面我提供了一个简化的工作示例(npoly
可以更改以测试不同方法的速度)以及我使用 sp::point.in.polygon
的工作尝试和我的 sp::over
尝试。只要速度快,我就不介意最终使用哪种方法。
如有任何帮助,我们将不胜感激!
#-------------------------------------------
# Libraries
library(ggplot2) # sample plots
library(dplyr) # bind_rows(), etc
library(sp) # spatial data
# Sample data
npoly = 100
# polygons
localpolydf <- data.frame(
x = rep(c(0, 1, 1, 0), npoly) + rep(0:(npoly-1), each = 4),
y = rep(c(0, 0, 1, 1), npoly),
enum = rep(1:npoly, each = 4))
# points
offsetdf <- data.frame(
x = seq(min(localpolydf$x) - 0.5, max(localpolydf$x) + 0.5, by = 0.5),
y = runif(npoly*2 + 3, 0, 1))
# Sample plot
ggplot() +
geom_polygon(aes(x, y, group = enum),
localpolydf, fill = NA, colour = "black") +
geom_point(aes(x, y), offsetdf)
#-------------------------------------------
# Dplyr and lapply solution for point.in.polygon
ptm <- proc.time() # Start timer
# create lists
offsetlist <- split(offsetdf, rownames(offsetdf))
polygonlist <- split(localpolydf, localpolydf$enum)
# lapply over each pt in offsetlist
pts <- lapply(offsetlist, function(pt) {
# lapply over each polygon in polygonlist
ptpoly <- lapply(polygonlist, function(poly) {
data.frame(
enum = poly$enum[1],
ptin = point.in.polygon(pt[1,1], pt[1,2], poly$x, poly$y))
})
ptpoly <- bind_rows(ptpoly) %>% filter(ptin != 0)
if (nrow(ptpoly) == 0) return(data.frame(x = pt$x, y = pt$y, enum = NA, ptin = NA))
ptpoly$x = pt$x
ptpoly$y = pt$y
return(ptpoly[c("x", "y", "enum", "ptin")])
})
pts_apply <- bind_rows(pts)
proc.time() - ptm # end timer
#-------------------------------------------
# Attempted sp solution for over
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
polygonlist <- lapply(polygonlist, function(x) x[,c("x", "y")])
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, Polygon)
polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(list(polygonsp))
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
proc.time() - ptm # end timer
#===========================================
# Output
# Apply: point.in.polygon
> head(pts_apply)
x y enum ptin
1 -0.5 0.2218138 NA NA
2 4.0 0.9785541 4 2
3 4.0 0.9785541 5 2
4 49.0 0.3971479 49 2
5 49.0 0.3971479 50 2
6 49.5 0.1177206 50 1
user system elapsed
4.434 0.002 4.435
# SP: over
> head(pts_sp)
1 2 3 4 5 6
NA 1 1 NA 1 NA
user system elapsed
0.048 0.000 0.047
over
returns 几何中点的索引。也许是这样的:
xy <- offsetps[names(na.omit(pts_sp == 1)), ]
plot(polygonsp, axes = 1, xlim = c(0, 10))
points(offsetps)
points(xy, col = "red")
再看一眼后,我意识到 Roman 做了 pts_sp == 1
,因为我的所有方块只有 1 个 ID,即当我做了 ID = 1
.
一旦我解决了这个问题,我就可以使用 ID = enum
列了。要处理多个多边形中的点,我可以使用 returnList = TRUE
并添加额外的行以将列表转换为 data.frame 但这里不需要。
# Attempted sp solution
ptm <- proc.time() # Start timer
# Split the dataframe into a list based on enum and then remove enum from df in the list
polygonlist <- split(localpolydf, localpolydf$enum)
# Convert the list to Polygon, then create a Polygons object
polygonsp <- sapply(polygonlist, function(poly){
Polygons(list(Polygon(poly[, c("x", "y")])), ID = poly[1, "enum"])
})
# polygonsp <- Polygons(polygonsp, ID = 1)
polygonsp <- SpatialPolygons(polygonsp)
plot(polygonsp)
# Convert points to coordinates
offsetps <- offsetdf
coordinates(offsetps) <- ~x+y
points(offsetps$x, offsetps$y)
# Determine polygons points are in
pts_sp <- over(offsetps, polygonsp)
pts_sp <- data.frame(
x = offsetps$x, y = offsetps$y,
enum = unique(localpolydf$enum)[pts_sp])
proc.time() - ptm # end timer
使用 over
的替代方法是使用 sf::intersection
,因为 sf 包越来越受欢迎。
将数据放入 sf 对象需要我做一些工作,但如果您使用的是外部数据,您只需使用 st_read
读入,它就已经是正确的形式了。
方法如下:
library(tidyverse)
library(sf)
# convert into st_polygon friendly format (all polygons must be closed)
# must be a nicer way to do this!
localpoly <- localpolydf %>% split(localpolydf$enum) %>%
lapply(function(x) rbind(x,x[1,])) %>%
lapply(function(x) x[,1:2]) %>%
lapply(function(x) list(as.matrix(x))) %>%
lapply(function(x) st_polygon(x))
# convert points into sf object
points <- st_as_sf(offsetdf,coords=c('x','y'),remove = F)
#convert polygons to sf object and add id column
polys <- localpoly %>% st_sfc() %>% st_sf(geom=.) %>%
mutate(id=factor(1:100))
#find intersection
joined <- polys %>% st_intersection(points)
# Sample plot
ggplot() + geom_sf(data=polys) +
geom_sf(data=joined %>% filter(id %in% c(1:10)),aes(col=id)) +
lims(x=c(0,10))
请注意,在撰写本文时要使用 geom_sf,您需要安装 ggplot 的开发版本。
绘图输出: