使用 sf 将一个点捕捉到线段上最近的点

Snap a point to the closest point on a line segment using sf

我想使用 sf::st_snap 将一个点捕捉到路段上最近的点。然而,函数似乎 return 错误的结果,它将我的点捕捉到路段起点的点。有想法该怎么解决这个吗?

下面提供了可重现的示例,包括我使用 sf::st_snapmaptools::snapPointsToLines

时得到的结果的比较

使用sf::st_snap

# Max distance
  cut_dist = 200 # meters

# snap points to closest road
  new_point <- sf::st_snap(point1, roads, tolerance = cut_dist)

# view points on the map
  mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point) + mapView(roads) 

# Distance between pont1 and new_point
  st_distance( point1, new_point)
> 1591 meters # note this is above the set maximun distance

使用 maptools::snapPointsToLines(我期望的结果)

# convert sf to sp
  point_sp <-  as_Spatial(point1)
  roads_sp <-  as_Spatial(roads)

# snap points
  new_point_sp <- snapPointsToLines(point_sp, roads_sp, maxDist = cut_dist)

# view points on the map
  mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point_sp) + mapView(roads) 

# Distance between pont1 and new_point
  spDistsN1( point_sp, new_point_sp)
>  116 meters

数据和图书馆

library(sf)
library(mapview)
library(maptools)
library(sp)

point1 <- structure(list(idhex = 9L, geometry = structure(list(structure(c(665606.970079183, 
          6525003.41418009), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
          "sfc"), precision = 0, bbox = structure(c(xmin = 665606.970079183, 
          ymin = 6525003.41418009, xmax = 665606.970079183, ymax = 6525003.41418009
          ), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(idhex = NA_integer_), .Label = c("constant", 
                                                                                                                                                         "aggregate", "identity"), class = "factor"), row.names = 2L, class = c("sf", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                              "data.table", "data.frame"))


   roads <- structure(list(id = 139885, osm_id = 250886593, geometry = structure(list(
        structure(c(665387.589147313, 665367.867159783, 665363.008143169, 
        665363.051860059, 665363.308104069, 665366.519781353, 665368.635421323, 
        665370.846894641, 665370.829724196, 665367.910645335, 665361.777524054, 
        665355.967776345, 665351.649946698, 665343.44353825, 665334.917779131, 
        665313.306069501, 665309.001351385, 665310.66019677, 665313.528620709, 
        665341.742306731, 665351.854389331, 665354.981775569, 665360.254611229, 
        665365.006104512, 665379.034588542, 665394.435039616, 665409.282519288, 
        665410.676785182, 665448.890797438, 665458.917562631, 665471.042094353, 
        665485.485001236, 665495.899212422, 665504.535684257, 665509.674854913, 
        665506.145837246, 665483.727146874, 665481.426949686, 665462.311063365, 
        665445.215460573, 665450.424049663, 665450.837897892, 665491.036360788, 
        665491.419140717, 665469.507518623, 665458.677850808, 665455.926197775, 
        665462.873809047, 665460.283684337, 665426.046702616, 665396.279686035, 
        665368.373253059, 665357.878521323, 665304.347529357, 665221.04051616, 
        665170.777462125, 665144.670345016, 665106.030568334, 665073.2789218, 
        665018.208956171, 664947.693178271, 664921.708297412, 664861.659061389, 
        664797.900403384, 664745.001666066, 664730.200174759, 664717.892651619, 
        664706.473711845, 664697.750102392, 664688.215719591, 664681.544531593, 
        664672.960647368, 664665.064067202, 664636.446517023, 664622.930521655, 
        664518.065243846, 664442.725560545, 664423.048166559, 664411.132259582, 
        664407.05972929, 664398.364646172, 664391.348502443, 664382.558239303, 
        664372.012526058, 664354.354954718, 664332.995014599, 664311.609706282, 
        664271.102641808, 664228.816287751, 664150.088321471, 664069.895400484, 
        6526138.02793883, 6526135.40749336, 6526130.11578605, 6526111.34403368, 
        6526087.4978365, 6526054.13445288, 6526022.49962268, 6525982.74830288, 
        6525959.40435839, 6525944.55197219, 6525918.33886077, 6525894.18611795, 
        6525874.55473851, 6525840.53410542, 6525813.96628006, 6525767.42907088, 
        6525745.21917638, 6525733.51582599, 6525713.24841331, 6525627.57847652, 
        6525608.06984863, 6525568.30170735, 6525550.71644271, 6525539.76231607, 
        6525491.25651378, 6525446.12690364, 6525433.36256694, 6525431.23562504, 
        6525372.98235432, 6525354.13376808, 6525331.3288195, 6525309.59511696, 
        6525293.92174422, 6525270.21980161, 6525256.11455612, 6525228.35885783, 
        6525217.10943051, 6525215.95489587, 6525195.91355696, 6525158.79257025, 
        6525134.01851773, 6525131.70940566, 6525050.96446632, 6524950.68358502, 
        6524851.23226232, 6524806.24052727, 6524749.34394609, 6524714.63617193, 
        6524660.07336072, 6524612.21010524, 6524583.84484865, 6524562.03540982, 
        6524557.38094998, 6524533.67136837, 6524510.74454804, 6524495.56823805, 
        6524486.9387399, 6524475.63373441, 6524465.4404841, 6524468.04929815, 
        6524475.95178632, 6524478.86036788, 6524470.76472937, 6524447.96214429, 
        6524448.06967557, 6524443.4855897, 6524435.86812114, 6524425.93373791, 
        6524417.67487537, 6524409.79262886, 6524399.64960133, 6524378.79085156, 
        6524360.33496349, 6524303.24355601, 6524302.70486651, 6524293.01335665, 
        6524290.81442892, 6524298.30279414, 6524309.46697681, 6524313.27442914, 
        6524337.22831533, 6524364.43083297, 6524376.27944935, 6524382.92319852, 
        6524389.6474774, 6524406.74565716, 6524430.82326744, 6524462.46041311, 
        6524492.20009833, 6524544.74318075, 6524591.10483188), .Dim = c(91L, 
        2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING", 
        "sfc"), precision = 0, bbox = structure(c(xmin = 664069.895400484, 
        ymin = 6524290.81442892, xmax = 665509.674854913, ymax = 6526138.02793883
        ), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 139885L, class = c("sf", 
        "data.frame"), sf_column = "geometry", agr = structure(c(id = NA_integer_, 
        osm_id = NA_integer_), .Label = c("constant", "aggregate", "identity"
        ), class = "factor"))

我不知道为什么 st_snap return 是线串的 start/end 点。也许这是 https://github.com/r-spatial/sf.

的一个问题

这是一个似乎可以识别正确点的解决方法。请注意 st_nearest_points 最近才推出,因此您可能需要从 github.

安装 sf
nrst = st_nearest_points(point1, roads)
new_point2 = st_cast(nrst, "POINT")[2]

mapView(point1, color="red") + st_buffer(point1, dist = cut_dist) + new_point2 + roads

将此包装为 return 新几何体集的函数,捕捉点低于某个 max_dist:

library(sf)
library(mapview)

st_snap_points = function(x, y, max_dist = 1000) {

  if (inherits(x, "sf")) n = nrow(x)
  if (inherits(x, "sfc")) n = length(x)

  out = do.call(c,
                lapply(seq(n), function(i) {
                  nrst = st_nearest_points(st_geometry(x)[i], y)
                  nrst_len = st_length(nrst)
                  nrst_mn = which.min(nrst_len)
                  if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
                  return(st_cast(nrst[nrst_mn], "POINT")[2])
                })
  )
  return(out)
}

brew = st_transform(breweries, st_crs(trails))

tst = st_snap_points(brew, trails, 500)

mapview(breweries) + mapview(tst, col.regions = "red") + trails

考虑到这几乎是四年前的回答,我想我应该添加一个替代的、更快的方法,这在当时可能是不可能的。 @TimSalabim 的 st_snap_points() 功能很棒,但是如果你有一个带有几个点的 sf collection,直接在 sf collection 上使用 st_nearest_points() 可以更快地完成。

正如 Tim 和其他人在讨论中提到的,st_nearest_points() returns 一个 LINESTRING 对象 - 起初有点令人困惑,但可以很容易地变成 POINT.听起来 LINESTRING 中点的顺序总是从第一个几何体到第二个几何体,因此您可以选择 LINESTRING 的起点或终点,具体取决于您放置几何体的顺序进入 st_nearest_points().

?st_nearest_points:

Value

an sfc object with all two-point LINESTRING geometries of point pairs from the first to the second geometry, of length x * y, with y cycling fastest. See examples for ideas how to convert these to POINT geometries.

使用您的示例,我们可以通过将道路作为第二个对象并检索 LINESTRING 的端点(第二点)来找到道路上的最近点,如下所示:

# find the LINESTRING object which is the shortest line between the two geometries
st_nearest_points(point1, roads) %>% 
  {. ->> my_linestring}

my_linestring

# Geometry set for 1 feature 
# Geometry type: LINESTRING
# Dimension:     XY
# Bounding box:  xmin: 665491.2 ymin: 6525003 xmax: 665607 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# LINESTRING (665607 6525003, 665491.2 6525003)
  

# then, convert the LINESTRING to POINTs
#    and, pull out the second point, because we want the point on the 'roads' object, 
#    which was supplied second to st_nearest_points()
my_linestring %>% 
  st_cast('POINT') %>% 
  .[2] %>%
  {. ->> closest_point}

closest_point

# Geometry set for 1 feature 
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 665491.2 ymin: 6525003 xmax: 665491.2 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# POINT (665491.2 6525003)

我们可以在地图上看一下来确认:

library(tidyverse)

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = point1, col = 'black')+
  geom_sf(data = closest_point, col = 'blue')

要计算点之间的距离,我们使用 st_distance():

# work out the distance
st_distance(point1, closest_point)[1]
# 115.7514 [m]

要将距离阈值合并到“捕捉”中,我们可以创建一个包含if语句的函数:

# we can combine the code and snap the point conditionally
f1 <- function(x, y, max_dist){
  
  st_nearest_points(x, y) %>% 
    {. ->> my_linestring} %>% 
    st_cast('POINT') %>% 
    .[2] %>%
    {. ->> closest_point} %>% 
    st_distance(., x) %>% 
    as.numeric %>% 
    {. ->> my_distance}
  
  if(my_distance <= max_dist){
    
    return(closest_point)
    
  } else {
      
    return(st_geometry(point1))
    
  }
  
}

# run the function with threshold of 400 m (it should snap)
f1(point1, roads, max_dist = 400) %>% 
  {. ->> p1_400}

# run the function with a threshold of 50 m (it should NOT snap)
f1(point1, roads, max_dist = 50) %>% 
  {. ->> p1_50}

# plot it to see the results
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = p1_400, col = 'blue')+
  geom_sf(data = p1_50)
# the blue point is on the road because it was within the threshold
# the black point is in the original spot because it was outside the threshold



处理一个 sf collection 而不是一个点

现在,我假设许多实例需要在多个点而不是单个点上完成此操作,我们可以稍微修改它以在具有多个点的 sf collection 上使用此方法。为了证明这一点,首先在 roads:

bbox 内组成 100 个 运行dom 点
set.seed(69)

# let's make some random points around the road
tibble(
  x = runif(n = 100, min = st_bbox(roads)$xmin, max = st_bbox(roads)$xmax),
  y = runif(n = 100, min = st_bbox(roads)$ymin, max = st_bbox(roads)$ymax), 
  id = 1:100
) %>% 
  st_as_sf(
    x = ., 
    coords = c('x', 'y'), 
    crs = st_crs(roads)
  ) %>% 
  {. ->> my_points}

my_points

# Simple feature collection with 100 features and 1 field
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 2
#      id           geometry
# * <int>        <POINT [m]>
# 1     1 (664834.1 6525742)
# 2     2 (665176.8 6524370)
# 3     3 (664999.9 6525528)
# 4     4 (665315.7 6525242)
# 5     5   (664601 6525464)
# 6     6 (665320.7 6525600)
# 7     7 (664316.2 6525065)
# 8     8   (665204 6526025)
# 9     9 (664319.8 6524335)
# 10    10 (664101.7 6525830)
# # ... with 90 more rows


# now show the points on a figure
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = my_points, shape = 1)

要找到 roads 上最接近每个 运行dom 点的点,我们使用与上面单点示例相同的函数,但我们将 LINESTRING 保存为每个点一列。然后,我们不再只检索 LINESTRING 的第二个点,而是取出 每个 LINETRINGcollection(列)的第二个点.您可以选择在 row-by-row 的基础上提取每个 LINESTRING 的第二个点,方法是在 mutate() 之前包含 rowwise(),但这会使计算速度慢得多。相反,通过整体处理 sf collection 并每隔一个点(每行的第二个点)取出一个点,您可以更快地获得结果。这就是他们在 ?st_nearest_points.

示例中的演示方式

所以基本上,我们使用 mutate()sf collection 中创建新列,其中包括 LINESTRING 和最近的 POINT.

# now, let's find the closest points
my_points %>% 
  mutate(
    my_linestring = st_nearest_points(geometry, roads),
    closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
  ) %>% 
  {. ->> closest_points}

closest_points

# Simple feature collection with 100 features and 1 field
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 4
#      id           geometry                        my_linestring      closest_point
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)
# # ... with 90 more rows


# and have a look at a figure
ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = my_points, shape = 1)+
  geom_sf(data = closest_points$my_linestring, linetype = 'dotted')+
  geom_sf(data = closest_points$closest_point, shape = 1, col = 'blue')

如果您想在分析中包含距离阈值,我们计算点之间的距离(或 my_linestring 的长度),并创建一个新的几何列,有条件地包含原始几何或新的捕捉点,取决于 distance (see here)。我们使用 ifelse() 和 400 m 的阈值。

# now, use the closest_point if the distance is within a threshold, otherwise keep the original point
closest_points %>% 
  mutate(
    distance = st_distance(geometry, roads)[,1],
    distance2 = st_length(my_linestring), # not used, demo purposes only
    snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
  ) %>% 
  {. ->> closest_points_threshold}

closest_points_threshold

# Simple feature collection with 100 features and 3 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 7
#      id           geometry                        my_linestring      closest_point distance distance2 snapped_point_cond
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>      [m]       [m]        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)    475.      475.  (664834.1 6525742)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)    121.      121.  (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)    356.      356.  (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)    153.      153.  (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)    755.      755.    (664601 6525464)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)     31.2      31.2 (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)    534.      534.  (664316.2 6525065)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)    164.      164.  (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)     64.5      64.5 (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)   1210.     1210.  (664101.7 6525830)
# # ... with 90 more rows


# and plot it
ggplot()+
  geom_sf(data = roads %>% st_buffer(400), col = 'red', fill = NA)+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = closest_points_threshold$snapped_point_cond, shape = 1, col = 'blue')+
  geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400) %>% .$my_linestring, linetype = 'dotted')+
  geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400), shape = 1)

因此,蓝色点是最后的 threshold-snapped 点。正如我们所见,400 米缓冲区外的点(红色多边形)保留在其原始位置,而内部的任何内容(黑点和虚线)都与道路对齐。



比较 st_nearest_points() 与@TimSalabim 的 st_snap_points()

Time-wise,st_nearest_points()方法似乎比st_snap_points()快。据我所知(这不是我的强项),这是因为 st_snap_points() 在 point-by-point 基础上工作,或者在 sf collection 上使用 row-by-row 基础,而我们可以使用 st_nearest_points().

一次处理 LINESTRING 的整个 collection

以 100 分为例,我们说的是 <0.2 秒与 ~11 秒。

此外,如果我们尝试 point-by-point (rowwise()) 方法达到 st_nearest_points(),它是所有三种方法中最慢的,大约需要 15 秒。

#~~~~~ st_nearest_points() ~~~~~

system.time(
  my_points %>% 
    mutate(
      my_linestring = st_nearest_points(geometry, roads),
      closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
      distance = st_distance(geometry, roads)[,1],
      snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
    ) %>% 
    {. ->> closest_points_f1}
)
# <0.2 secs


#~~~~~ st_snap_points() ~~~~~

st_snap_points = function(x, y, max_dist = 1000) {
  
  if (inherits(x, "sf")) n = nrow(x)
  if (inherits(x, "sfc")) n = length(x)
  
  out = do.call(c,
                lapply(seq(n), function(i) {
                  nrst = st_nearest_points(st_geometry(x)[i], y)
                  nrst_len = st_length(nrst)
                  nrst_mn = which.min(nrst_len)
                  if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
                  return(st_cast(nrst[nrst_mn], "POINT")[2])
                })
  )
  return(out)
}

system.time(
  st_snap_points(my_points, roads, max_dist = 400) %>% 
    {. ->> closest_points_f2}
)
# 11 secs


#~~~~~ st_nearest_points() rowwise ~~~~~

system.time(
  my_points %>% 
    rowwise %>% 
    mutate(
      my_linestring = st_nearest_points(geometry, roads),
      closest_point = st_cast(my_linestring, 'POINT')[2], # pull out the second point of each row
      distance = st_distance(geometry, roads)[,1],
      snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
    ) %>% 
    {. ->> closest_points_f1_rowwise}
)
# 15 secs

所有三种方法产生相同的最终数字:

ggplot()+
  geom_sf(data = roads, col = 'red')+
  geom_sf(data = closest_points_f1$snapped_point_cond, shape = 1, col = 'blue')

但是,st_nearest_points() 方法 returns 和 sf collection 具有所有计算列,而 st_snap_points() 仅产生最终的捕捉点。我认为拥有所有原始和工作专栏非常有用,尤其是对于故障排除。另外,这种方法明显更快。

closest_points_f1

# Simple feature collection with 100 features and 2 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 6
#      id           geometry                        my_linestring      closest_point distance snapped_point_cond
# * <int>        <POINT [m]>                     <LINESTRING [m]>        <POINT [m]>      [m]        <POINT [m]>
# 1     1 (664834.1 6525742)   (664834.1 6525742, 665309 6525745)   (665309 6525745)    475.  (664834.1 6525742)
# 2     2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)    121.  (665142.8 6524486)
# 3     3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)    356.  (665337.9 6525639)
# 4     4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)    153.  (665454.2 6525178)
# 5     5   (664601 6525464)   (664601 6525464, 665317.8 6525700) (665317.8 6525700)    755.    (664601 6525464)
# 6     6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)     31.2 (665348.4 6525615)
# 7     7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)    534.  (664316.2 6525065)
# 8     8   (665204 6526025)   (665204 6526025, 665367.7 6526036) (665367.7 6526036)    164.  (665367.7 6526036)
# 9     9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)     64.5 (664354.4 6524390)
# 10    10 (664101.7 6525830)   (664101.7 6525830, 665309 6525745)   (665309 6525745)   1210.  (664101.7 6525830)
# # ... with 90 more rows

closest_points_f2

# Geometry set for 100 features 
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 664069.9 ymin: 6524380 xmax: 665480.3 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# First 5 geometries:
# POINT (664834.1 6525742)
# POINT (665142.8 6524486)
# POINT (665337.9 6525639)
# POINT (665454.2 6525178)
# POINT (664601 6525464)

我用 1000 点再次 运行 示例,st_neareast_points() 花了 ~0.31 秒(长 1.5 倍),st_snap_points() 花了 120 秒(长 ~10 倍)。我还在 10,000 点上 运行 st_nearest_points() 花了 ~2.5 秒,比 100 点长 12 倍,比 100 点长 8 倍(我没有 运行 st_snap_points() 在 10,000 行上).

我尝试了上面提供的两种解决方案,发现它们都不适合我。第一个花费太多时间;第二个占用太多内存。我提供的解决方案应该既更快又内存成本更低。 SF的进步使之成为可能。

我的做法如下:

  1. 我在线条(道路)周围创建了一个缓冲区并将其连接到一个形状(使用并集)。
  2. 我丢弃了所有不在缓冲区内的点。
  3. 对于每个点,我找到最接近的特征。
  4. 对于每个点及其最近的特征,我找到最近的点。
  5. 我添加点属性并添加不在缓冲区内的点。

这里有简单的代码(仅适用于 sf)。当心,它可能会添加一个名为“有效”的列,这可能会削弱原始数据。

require(sf)
require(dplyr)

buffer_lines <- function(lines, dist = 100, verbose = FALSE) {
    if (verbose) message("Creating buffer for lines...")
    st_buffer(lines, dist = dist) |> st_union()
}

get_points_close_to_lines <- function(points, lines, dist = 100,
                                      verbose = FALSE) {
    buff <- buffer_lines(lines, dist = dist, verbose = verbose)
    if (verbose) message("Finding which points are in buffer...")
    st_intersects(points, buff, sparse = FALSE)
}

snap_points_to_lines <- function(points, lines, dist = 100,
                                 all_points = FALSE, verbose = FALSE) {
    inside <- get_points_close_to_lines(points, lines, dist = dist,
                                        verbose = verbose)
    buff_points <- points[inside, ]
    if (verbose) message("Finding nearest features...")
    nf <- st_nearest_feature(buff_points, lines)
    if (verbose) message("Finding nearest points...")
    np <- st_nearest_points(buff_points, lines[nf, ], pairwise = TRUE)
    np <- st_cast(np, "POINT")[c(FALSE, TRUE)]
    if (verbose) message("Adding attributes...")
    out <- st_drop_geometry(buff_points)
    out$geometry <- np
    out <- st_as_sf(out)
    if (all_points) {
        if (verbose) message("Adding points outside buffer...")
        out$valid <- TRUE
        outside <- points[!inside, ]
        outside$valid <- FALSE
        out <- dplyr::bind_rows(out, outside)
    }
    out
}