R 叠加具有一定容差的点和多边形

R Overlay points and polygons with a certain degree of tolerance

我想使用 R 叠加一些空间点和多边形,以便为这些点分配我已考虑的地理区域的一些属性。

我通常做的是使用sp包的命令over。我的问题是我正在处理全球发生的大量地理参考事件,在某些情况下(尤其是在沿海地区),经度和纬度组合略微超出 country/region 边界。 这是一个基于 very good question.

的可重现示例
## example data
set.seed(1)
library(raster)
library(rgdal)
library(sp)
p <- shapefile(system.file("external/lux.shp", package="raster"))
p2 <- as(0.30*extent(p), "SpatialPolygons")
proj4string(p2) <- proj4string(p)
pts1 <- spsample(p2-p, n=3, type="random")
pts2<- spsample(p, n=10, type="random")
pts<-rbind(pts1, pts2)

## Plot to visualize
plot(p, col=colorRampPalette(blues9)(12))
plot(pts, pch=16, cex=.5,col="red", add=TRUE)

# overlay
pts_index<-over(pts, p)

# result
pts_index

#>     ID_1       NAME_1 ID_2           NAME_2 AREA
#>1    NA         <NA> <NA>             <NA>   NA
#>2    NA         <NA> <NA>             <NA>   NA
#>3    NA         <NA> <NA>             <NA>   NA
#>4     1     Diekirch    1         Clervaux  312
#>5     1     Diekirch    5            Wiltz  263
#>6     2 Grevenmacher   12     Grevenmacher  210
#>7     2 Grevenmacher    6       Echternach  188
#>8     3   Luxembourg    9 Esch-sur-Alzette  251
#>9     1     Diekirch    3          Redange  259
#>10    2 Grevenmacher    7           Remich  129
#>11    1     Diekirch    1         Clervaux  312
#>12    1     Diekirch    5            Wiltz  263
#>13    2 Grevenmacher    7           Remich  129

有没有办法给 over 函数一种容差,以便捕获非常靠近边界的点?

注意:

根据 this 我可以将最近的多边形分配给缺失点,但这并不是我想要的。

编辑:最近邻解

#adding lon and lat to the table
pts_index$lon<-pts@coords[,1]
pts_index$lat<-pts@coords[,2]

#add an ID to split and then re-compose the table 
pts_index$split_id<-seq(1,nrow(pts_index),1)
#filtering out the missed points

library(dplyr)
library(geosphere)
missed_pts<-filter(pts_index, is.na(NAME_1))
pts_missed<-SpatialPoints(missed_pts[,c(6,7)],proj4string=CRS(proj4string(p)))

#find the nearest neighbors' characteristics
n <- length(pts_missed)
nearestID1 <- character(n)
nearestNAME1 <- character(n)
nearestID2 <- character(n)
nearestNAME2 <- character(n)
nearestAREA <- character(n)

for (i in seq_along(nearestID1)) {
  nearestID1[i] <- as.character(p$ID_1[which.min(dist2Line (pts_missed[i,], p))])
  nearestNAME1[i] <- as.character(p$NAME_1[which.min(dist2Line (pts_missed[i,], p))])
  nearestID2[i] <- as.character(p$ID_2[which.min(dist2Line (pts_missed[i,], p))])
  nearestNAME2[i] <- as.character(p$NAME_2[which.min(dist2Line (pts_missed[i,], p))])
  nearestAREA[i] <- as.character(p$AREA[which.min(dist2Line (pts_missed[i,], p))])
}
missed_pts$ID_1<-nearestID1
missed_pts$NAME_1<-nearestNAME1
missed_pts$ID_2<-nearestID2
missed_pts$NAME_2<-nearestNAME2
missed_pts$AREA<-nearestAREA

#missed_pts have now the characteristics of the nearest poliygon
#bringing now everything toogether
pts_index[match(missed_pts$split_id, pts_index$split_id),] <- missed_pts
pts_index<-pts_index[,-c(6:8)]

pts_index

       ID_1       NAME_1 ID_2           NAME_2 AREA
1     1     Diekirch    4          Vianden   76
2     1     Diekirch    4          Vianden   76
3     1     Diekirch    4          Vianden   76
4     1     Diekirch    1         Clervaux  312
5     1     Diekirch    5            Wiltz  263
6     2 Grevenmacher   12     Grevenmacher  210
7     2 Grevenmacher    6       Echternach  188
8     3   Luxembourg    9 Esch-sur-Alzette  251
9     1     Diekirch    3          Redange  259
10    2 Grevenmacher    7           Remich  129
11    1     Diekirch    1         Clervaux  312
12    1     Diekirch    5            Wiltz  263
13    2 Grevenmacher    7           Remich  129

这与@Gilles 在他的回答中提出的输出完全相同。 我只是想知道是否有比这一切更有效的东西。

我认为您不能将 "tolerance" 添加到 over 或其他常见的相交算法。 通过缓冲多边形,您会增加一些公差,但随后某些点可能会落入两个不同的多边形中。

一种可能是围绕落在区域多边形之外的点创建缓冲区,将这些缓冲区与多边形相交,计算面积,并为每个点仅保留具有最大面积的线。与您建议的方法(找到最近的多边形)相比,这种方法的优点是您不必计算所有多边形的距离。

可能还有更直接的可能性...

这里是一个使用 sf 来操纵空间对象的例子,但你当然可以用 sprgeos 来做同样的事情。
一个困难是找到 "tolerance" 的正确级别(缓冲区的大小)。这里我使用了2km的公差。

## Your example
set.seed(1)
library(raster)
#> Loading required package: sp
library(rgdal)
library(sp)


p <- shapefile(system.file("external/lux.shp", package="raster"))
p2 <- as(0.30*extent(p), "SpatialPolygons")
proj4string(p2) <- proj4string(p)
pts1 <- spsample(p2-p, n=3, type="random")
pts2<- spsample(p, n=10, type="random")
pts<-rbind(pts1, pts2)

请注意,我使用 over 时的输出与您不同:

over(pts, p)
#>    ID_1       NAME_1 ID_2           NAME_2 AREA
#> 1    NA         <NA> <NA>             <NA>   NA
#> 2    NA         <NA> <NA>             <NA>   NA
#> 3    NA         <NA> <NA>             <NA>   NA
#> 4     1     Diekirch    1         Clervaux  312
#> 5     1     Diekirch    5            Wiltz  263
#> 6     2 Grevenmacher   12     Grevenmacher  210
#> 7     2 Grevenmacher    6       Echternach  188
#> 8     3   Luxembourg    9 Esch-sur-Alzette  251
#> 9     1     Diekirch    3          Redange  259
#> 10    2 Grevenmacher    7           Remich  129
#> 11    1     Diekirch    1         Clervaux  312
#> 12    1     Diekirch    5            Wiltz  263
#> 13    2 Grevenmacher    7           Remich  129

在多边形之外的点上使用缓冲区:

# additional packages needed
library(sf)
library(dplyr)

# transform the sp objects into sf objects and add an ID to the points
pts <- st_as_sf(pts)
pts$IDpts <- 1:nrow(pts)
p <- st_as_sf(p)

# project the data in planar coordinates (here a projection for Luxemburg)
# better for area calculations but maybe not crucial here
pts <- st_transform(pts, crs = 2169)
p <- st_transform(p, crs = 2169)


# intersect the points with the polygons (equivalent to you "over")
pts_index <- st_set_geometry(st_intersection(pts, p), NULL)
#> Warning: attribute variables are assumed to be spatially constant
#> throughout all geometries


# points that are outside the polygons
pts_out <- pts[lengths(st_within(pts, p)) == 0,]
# buffer around these points with a given size
bf <- st_buffer(pts_out, dist = 2000) # distance in meters, here 2km

# intersect these buffers with the polygons and compute their area
bf <- st_intersection(bf, p)
#> Warning: attribute variables are assumed to be spatially constant
#> throughout all geometries
bf$area <- st_area(bf)

# for each point (IDpts), select the line with the highest area
# then drop the geometry columns and transform the result n a data.frame
pts_out <- bf %>% group_by(IDpts) %>% slice(which.max(area)) %>% 
     select(1:6) %>% st_set_geometry(NULL) %>% as.data.frame()

输出:

# Colate the results from the point within polygons and outside polygons
pts_index <- rbind(pts_index, pts_out)
pts_index <- pts_index[order(pts_index$IDpts),]
pts_index
#>    IDpts ID_1       NAME_1 ID_2           NAME_2 AREA
#> 1      1    1     Diekirch    4          Vianden   76
#> 2      2    1     Diekirch    4          Vianden   76
#> 3      3    1     Diekirch    4          Vianden   76
#> 4      4    1     Diekirch    1         Clervaux  312
#> 5      5    1     Diekirch    5            Wiltz  263
#> 6      6    2 Grevenmacher   12     Grevenmacher  210
#> 7      7    2 Grevenmacher    6       Echternach  188
#> 8      8    3   Luxembourg    9 Esch-sur-Alzette  251
#> 9      9    1     Diekirch    3          Redange  259
#> 10    10    2 Grevenmacher    7           Remich  129
#> 11    11    1     Diekirch    1         Clervaux  312
#> 12    12    1     Diekirch    5            Wiltz  263
#> 13    13    2 Grevenmacher    7           Remich  129

这是我使用 sf 的尝试。如果您盲目地想将多边形特征连接到点形成最近的邻居,用 join = st_nearest_feature

调用 st_join 就足够了
library(sf)

# convert data to sf
pts_sf = st_as_sf(pts)
p_sf = st_as_sf(p)

# this is enough for joining polygon attributes to points from their nearest neighbor
st_join(pts_sf, p_sf, join = st_nearest_feature)

如果您希望能够设置一些公差,使得比该公差更远的点不会连接任何多边形属性,我们需要创建自己的连接函数。

st_nearest_feature2 = function(x, y, tolerance = 100) {
  isec = st_intersects(x, y)
  no_isec = which(lengths(isec) == 0)

  for (i in no_isec) {
    nrst = st_nearest_points(st_geometry(x)[i], y)
    nrst_len = st_length(nrst)
    nrst_mn = which.min(nrst_len)
    isec[i] = ifelse(as.vector(nrst_len[nrst_mn]) > tolerance, integer(0), nrst_mn)
  }

  unlist(isec)

}

st_join(pts_sf, p_sf, join = st_nearest_feature2, tolerance = 1000)

这按预期工作,即当您将 tolerance 设置为零时,您将获得与 over 相同的结果,对于更大的值,您将接近上面的 st_nearest_feature 结果。

示例数据 -

set.seed(1)
library(raster)
library(rgdal)
library(sp)
p <- shapefile(system.file("external/lux.shp", package="raster"))
p2 <- as(0.30*extent(p), "SpatialPolygons")
proj4string(p2) <- proj4string(p)
pts1 <- spsample(p2-p, n=3, type="random")
pts2<- spsample(p, n=10, type="random")
pts<-rbind(pts1, pts2)

## Plot to visualize
plot(p, col=colorRampPalette(blues9)(12))
plot(pts, pch=16, cex=.5,col="red", add=TRUE)

使用 sfnngeo 包的解决方案 -

library(nngeo)

# Convert to 'sf'
pts = st_as_sf(pts)
p = st_as_sf(p)

# Spatial join
p1 = st_join(pts, p, join = st_nn)
p1

## Simple feature collection with 13 features and 5 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 5.795068 ymin: 49.54622 xmax: 6.518138 ymax: 50.1426
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##   ID_1       NAME_1 ID_2           NAME_2 AREA                  geometry
## 1     1     Diekirch    4          Vianden   76 POINT (6.235953 49.91801)
## 2     1     Diekirch    4          Vianden   76 POINT (6.251893 49.92177)
## 3     1     Diekirch    4          Vianden   76  POINT (6.236712 49.9023)
## 4     1     Diekirch    1         Clervaux  312  POINT (6.090294 50.1426)
## 5     1     Diekirch    5            Wiltz  263  POINT (5.948738 49.8796)
## 6     2 Grevenmacher   12     Grevenmacher  210 POINT (6.302851 49.66278)
## 7     2 Grevenmacher    6       Echternach  188 POINT (6.518138 49.76773)
## 8     3   Luxembourg    9 Esch-sur-Alzette  251 POINT (6.116905 49.56184)
## 9     1     Diekirch    3          Redange  259 POINT (5.932418 49.78505)
## 10    2 Grevenmacher    7           Remich  129 POINT (6.285379 49.54622)

显示连接了哪些多边形和点的绘图 -

# Visuzlize join
l = st_connect(pts, p, dist = 1)
plot(st_geometry(p))
plot(st_geometry(pts), add = TRUE)
plot(st_geometry(l), col = "red", lwd = 2, add = TRUE)

编辑:

# Spatial join with 100 meters threshold
p2 = st_join(pts, p, join = st_nn, maxdist = 100)
p2
## Simple feature collection with 13 features and 5 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 5.795068 ymin: 49.54622 xmax: 6.518138 ymax: 50.1426
## epsg (SRID):    4326
## proj4string:    +proj=longlat +datum=WGS84 +no_defs
## First 10 features:
##   ID_1       NAME_1 ID_2           NAME_2 AREA                  geometry
## 1    NA         <NA> <NA>             <NA>   NA POINT (6.235953 49.91801)
## 2    NA         <NA> <NA>             <NA>   NA POINT (6.251893 49.92177)
## 3     1     Diekirch    4          Vianden   76  POINT (6.236712 49.9023)
## 4     1     Diekirch    1         Clervaux  312  POINT (6.090294 50.1426)
## 5     1     Diekirch    5            Wiltz  263  POINT (5.948738 49.8796)
## 6     2 Grevenmacher   12     Grevenmacher  210 POINT (6.302851 49.66278)
## 7     2 Grevenmacher    6       Echternach  188 POINT (6.518138 49.76773)
## 8     3   Luxembourg    9 Esch-sur-Alzette  251 POINT (6.116905 49.56184)
## 9     1     Diekirch    3          Redange  259 POINT (5.932418 49.78505)
## 10    2 Grevenmacher    7           Remich  129 POINT (6.285379 49.54622)