使用 sf 将一个点捕捉到线段上最近的点
Snap a point to the closest point on a line segment using sf
我想使用 sf::st_snap
将一个点捕捉到路段上最近的点。然而,函数似乎 return 错误的结果,它将我的点捕捉到路段起点的点。有想法该怎么解决这个吗?
下面提供了可重现的示例,包括我使用 sf::st_snap
与 maptools::snapPointsToLines
时得到的结果的比较
使用sf::st_snap
# Max distance
cut_dist = 200 # meters
# snap points to closest road
new_point <- sf::st_snap(point1, roads, tolerance = cut_dist)
# view points on the map
mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point) + mapView(roads)
# Distance between pont1 and new_point
st_distance( point1, new_point)
> 1591 meters # note this is above the set maximun distance
使用 maptools::snapPointsToLines
(我期望的结果)
# convert sf to sp
point_sp <- as_Spatial(point1)
roads_sp <- as_Spatial(roads)
# snap points
new_point_sp <- snapPointsToLines(point_sp, roads_sp, maxDist = cut_dist)
# view points on the map
mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point_sp) + mapView(roads)
# Distance between pont1 and new_point
spDistsN1( point_sp, new_point_sp)
> 116 meters
数据和图书馆
library(sf)
library(mapview)
library(maptools)
library(sp)
point1 <- structure(list(idhex = 9L, geometry = structure(list(structure(c(665606.970079183,
6525003.41418009), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 665606.970079183,
ymin = 6525003.41418009, xmax = 665606.970079183, ymax = 6525003.41418009
), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(idhex = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = 2L, class = c("sf",
"data.table", "data.frame"))
roads <- structure(list(id = 139885, osm_id = 250886593, geometry = structure(list(
structure(c(665387.589147313, 665367.867159783, 665363.008143169,
665363.051860059, 665363.308104069, 665366.519781353, 665368.635421323,
665370.846894641, 665370.829724196, 665367.910645335, 665361.777524054,
665355.967776345, 665351.649946698, 665343.44353825, 665334.917779131,
665313.306069501, 665309.001351385, 665310.66019677, 665313.528620709,
665341.742306731, 665351.854389331, 665354.981775569, 665360.254611229,
665365.006104512, 665379.034588542, 665394.435039616, 665409.282519288,
665410.676785182, 665448.890797438, 665458.917562631, 665471.042094353,
665485.485001236, 665495.899212422, 665504.535684257, 665509.674854913,
665506.145837246, 665483.727146874, 665481.426949686, 665462.311063365,
665445.215460573, 665450.424049663, 665450.837897892, 665491.036360788,
665491.419140717, 665469.507518623, 665458.677850808, 665455.926197775,
665462.873809047, 665460.283684337, 665426.046702616, 665396.279686035,
665368.373253059, 665357.878521323, 665304.347529357, 665221.04051616,
665170.777462125, 665144.670345016, 665106.030568334, 665073.2789218,
665018.208956171, 664947.693178271, 664921.708297412, 664861.659061389,
664797.900403384, 664745.001666066, 664730.200174759, 664717.892651619,
664706.473711845, 664697.750102392, 664688.215719591, 664681.544531593,
664672.960647368, 664665.064067202, 664636.446517023, 664622.930521655,
664518.065243846, 664442.725560545, 664423.048166559, 664411.132259582,
664407.05972929, 664398.364646172, 664391.348502443, 664382.558239303,
664372.012526058, 664354.354954718, 664332.995014599, 664311.609706282,
664271.102641808, 664228.816287751, 664150.088321471, 664069.895400484,
6526138.02793883, 6526135.40749336, 6526130.11578605, 6526111.34403368,
6526087.4978365, 6526054.13445288, 6526022.49962268, 6525982.74830288,
6525959.40435839, 6525944.55197219, 6525918.33886077, 6525894.18611795,
6525874.55473851, 6525840.53410542, 6525813.96628006, 6525767.42907088,
6525745.21917638, 6525733.51582599, 6525713.24841331, 6525627.57847652,
6525608.06984863, 6525568.30170735, 6525550.71644271, 6525539.76231607,
6525491.25651378, 6525446.12690364, 6525433.36256694, 6525431.23562504,
6525372.98235432, 6525354.13376808, 6525331.3288195, 6525309.59511696,
6525293.92174422, 6525270.21980161, 6525256.11455612, 6525228.35885783,
6525217.10943051, 6525215.95489587, 6525195.91355696, 6525158.79257025,
6525134.01851773, 6525131.70940566, 6525050.96446632, 6524950.68358502,
6524851.23226232, 6524806.24052727, 6524749.34394609, 6524714.63617193,
6524660.07336072, 6524612.21010524, 6524583.84484865, 6524562.03540982,
6524557.38094998, 6524533.67136837, 6524510.74454804, 6524495.56823805,
6524486.9387399, 6524475.63373441, 6524465.4404841, 6524468.04929815,
6524475.95178632, 6524478.86036788, 6524470.76472937, 6524447.96214429,
6524448.06967557, 6524443.4855897, 6524435.86812114, 6524425.93373791,
6524417.67487537, 6524409.79262886, 6524399.64960133, 6524378.79085156,
6524360.33496349, 6524303.24355601, 6524302.70486651, 6524293.01335665,
6524290.81442892, 6524298.30279414, 6524309.46697681, 6524313.27442914,
6524337.22831533, 6524364.43083297, 6524376.27944935, 6524382.92319852,
6524389.6474774, 6524406.74565716, 6524430.82326744, 6524462.46041311,
6524492.20009833, 6524544.74318075, 6524591.10483188), .Dim = c(91L,
2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING",
"sfc"), precision = 0, bbox = structure(c(xmin = 664069.895400484,
ymin = 6524290.81442892, xmax = 665509.674854913, ymax = 6526138.02793883
), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 139885L, class = c("sf",
"data.frame"), sf_column = "geometry", agr = structure(c(id = NA_integer_,
osm_id = NA_integer_), .Label = c("constant", "aggregate", "identity"
), class = "factor"))
我不知道为什么 st_snap
return 是线串的 start/end 点。也许这是 https://github.com/r-spatial/sf.
的一个问题
这是一个似乎可以识别正确点的解决方法。请注意 st_nearest_points
最近才推出,因此您可能需要从 github.
安装 sf
nrst = st_nearest_points(point1, roads)
new_point2 = st_cast(nrst, "POINT")[2]
mapView(point1, color="red") + st_buffer(point1, dist = cut_dist) + new_point2 + roads
将此包装为 return 新几何体集的函数,捕捉点低于某个 max_dist
:
library(sf)
library(mapview)
st_snap_points = function(x, y, max_dist = 1000) {
if (inherits(x, "sf")) n = nrow(x)
if (inherits(x, "sfc")) n = length(x)
out = do.call(c,
lapply(seq(n), function(i) {
nrst = st_nearest_points(st_geometry(x)[i], y)
nrst_len = st_length(nrst)
nrst_mn = which.min(nrst_len)
if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
return(st_cast(nrst[nrst_mn], "POINT")[2])
})
)
return(out)
}
brew = st_transform(breweries, st_crs(trails))
tst = st_snap_points(brew, trails, 500)
mapview(breweries) + mapview(tst, col.regions = "red") + trails
考虑到这几乎是四年前的回答,我想我应该添加一个替代的、更快的方法,这在当时可能是不可能的。 @TimSalabim 的 st_snap_points()
功能很棒,但是如果你有一个带有几个点的 sf collection
,直接在 sf collection
上使用 st_nearest_points()
可以更快地完成。
正如 Tim 和其他人在讨论中提到的,st_nearest_points()
returns 一个 LINESTRING
对象 - 起初有点令人困惑,但可以很容易地变成 POINT
.听起来 LINESTRING
中点的顺序总是从第一个几何体到第二个几何体,因此您可以选择 LINESTRING
的起点或终点,具体取决于您放置几何体的顺序进入 st_nearest_points()
.
见?st_nearest_points
:
Value
an sfc object with all two-point LINESTRING geometries of point pairs from the first to the second geometry, of length x * y, with y cycling fastest. See examples for ideas how to convert these to POINT geometries.
使用您的示例,我们可以通过将道路作为第二个对象并检索 LINESTRING
的端点(第二点)来找到道路上的最近点,如下所示:
# find the LINESTRING object which is the shortest line between the two geometries
st_nearest_points(point1, roads) %>%
{. ->> my_linestring}
my_linestring
# Geometry set for 1 feature
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: 665491.2 ymin: 6525003 xmax: 665607 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# LINESTRING (665607 6525003, 665491.2 6525003)
# then, convert the LINESTRING to POINTs
# and, pull out the second point, because we want the point on the 'roads' object,
# which was supplied second to st_nearest_points()
my_linestring %>%
st_cast('POINT') %>%
.[2] %>%
{. ->> closest_point}
closest_point
# Geometry set for 1 feature
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 665491.2 ymin: 6525003 xmax: 665491.2 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# POINT (665491.2 6525003)
我们可以在地图上看一下来确认:
library(tidyverse)
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = point1, col = 'black')+
geom_sf(data = closest_point, col = 'blue')
要计算点之间的距离,我们使用 st_distance()
:
# work out the distance
st_distance(point1, closest_point)[1]
# 115.7514 [m]
要将距离阈值合并到“捕捉”中,我们可以创建一个包含if
语句的函数:
# we can combine the code and snap the point conditionally
f1 <- function(x, y, max_dist){
st_nearest_points(x, y) %>%
{. ->> my_linestring} %>%
st_cast('POINT') %>%
.[2] %>%
{. ->> closest_point} %>%
st_distance(., x) %>%
as.numeric %>%
{. ->> my_distance}
if(my_distance <= max_dist){
return(closest_point)
} else {
return(st_geometry(point1))
}
}
# run the function with threshold of 400 m (it should snap)
f1(point1, roads, max_dist = 400) %>%
{. ->> p1_400}
# run the function with a threshold of 50 m (it should NOT snap)
f1(point1, roads, max_dist = 50) %>%
{. ->> p1_50}
# plot it to see the results
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = p1_400, col = 'blue')+
geom_sf(data = p1_50)
# the blue point is on the road because it was within the threshold
# the black point is in the original spot because it was outside the threshold
处理一个 sf collection
而不是一个点
现在,我假设许多实例需要在多个点而不是单个点上完成此操作,我们可以稍微修改它以在具有多个点的 sf collection
上使用此方法。为了证明这一点,首先在 roads
:
的 bbox
内组成 100 个 运行dom 点
set.seed(69)
# let's make some random points around the road
tibble(
x = runif(n = 100, min = st_bbox(roads)$xmin, max = st_bbox(roads)$xmax),
y = runif(n = 100, min = st_bbox(roads)$ymin, max = st_bbox(roads)$ymax),
id = 1:100
) %>%
st_as_sf(
x = .,
coords = c('x', 'y'),
crs = st_crs(roads)
) %>%
{. ->> my_points}
my_points
# Simple feature collection with 100 features and 1 field
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 2
# id geometry
# * <int> <POINT [m]>
# 1 1 (664834.1 6525742)
# 2 2 (665176.8 6524370)
# 3 3 (664999.9 6525528)
# 4 4 (665315.7 6525242)
# 5 5 (664601 6525464)
# 6 6 (665320.7 6525600)
# 7 7 (664316.2 6525065)
# 8 8 (665204 6526025)
# 9 9 (664319.8 6524335)
# 10 10 (664101.7 6525830)
# # ... with 90 more rows
# now show the points on a figure
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = my_points, shape = 1)
要找到 roads
上最接近每个 运行dom 点的点,我们使用与上面单点示例相同的函数,但我们将 LINESTRING
保存为每个点一列。然后,我们不再只检索 LINESTRING
的第二个点,而是取出 每个 个 LINETRING
的 collection
(列)的第二个点.您可以选择在 row-by-row 的基础上提取每个 LINESTRING
的第二个点,方法是在 mutate()
之前包含 rowwise()
,但这会使计算速度慢得多。相反,通过整体处理 sf collection
并每隔一个点(每行的第二个点)取出一个点,您可以更快地获得结果。这就是他们在 ?st_nearest_points
.
示例中的演示方式
所以基本上,我们使用 mutate()
在 sf collection
中创建新列,其中包括 LINESTRING
和最近的 POINT
.
# now, let's find the closest points
my_points %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
) %>%
{. ->> closest_points}
closest_points
# Simple feature collection with 100 features and 1 field
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 4
# id geometry my_linestring closest_point
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745)
# # ... with 90 more rows
# and have a look at a figure
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = my_points, shape = 1)+
geom_sf(data = closest_points$my_linestring, linetype = 'dotted')+
geom_sf(data = closest_points$closest_point, shape = 1, col = 'blue')
如果您想在分析中包含距离阈值,我们计算点之间的距离(或 my_linestring
的长度),并创建一个新的几何列,有条件地包含原始几何或新的捕捉点,取决于 distance
(see here)。我们使用 ifelse()
和 400 m 的阈值。
# now, use the closest_point if the distance is within a threshold, otherwise keep the original point
closest_points %>%
mutate(
distance = st_distance(geometry, roads)[,1],
distance2 = st_length(my_linestring), # not used, demo purposes only
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_threshold}
closest_points_threshold
# Simple feature collection with 100 features and 3 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 7
# id geometry my_linestring closest_point distance distance2 snapped_point_cond
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]> [m] [m] <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745) 475. 475. (664834.1 6525742)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486) 121. 121. (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639) 356. 356. (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178) 153. 153. (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700) 755. 755. (664601 6525464)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615) 31.2 31.2 (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591) 534. 534. (664316.2 6525065)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036) 164. 164. (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390) 64.5 64.5 (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745) 1210. 1210. (664101.7 6525830)
# # ... with 90 more rows
# and plot it
ggplot()+
geom_sf(data = roads %>% st_buffer(400), col = 'red', fill = NA)+
geom_sf(data = roads, col = 'red')+
geom_sf(data = closest_points_threshold$snapped_point_cond, shape = 1, col = 'blue')+
geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400) %>% .$my_linestring, linetype = 'dotted')+
geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400), shape = 1)
因此,蓝色点是最后的 threshold-snapped 点。正如我们所见,400 米缓冲区外的点(红色多边形)保留在其原始位置,而内部的任何内容(黑点和虚线)都与道路对齐。
比较 st_nearest_points()
与@TimSalabim 的 st_snap_points()
Time-wise,st_nearest_points()
方法似乎比st_snap_points()
快。据我所知(这不是我的强项),这是因为 st_snap_points()
在 point-by-point 基础上工作,或者在 sf collection
上使用 row-by-row 基础,而我们可以使用 st_nearest_points()
.
一次处理 LINESTRING
的整个 collection
以 100 分为例,我们说的是 <0.2 秒与 ~11 秒。
此外,如果我们尝试 point-by-point (rowwise()
) 方法达到 st_nearest_points()
,它是所有三种方法中最慢的,大约需要 15 秒。
#~~~~~ st_nearest_points() ~~~~~
system.time(
my_points %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
distance = st_distance(geometry, roads)[,1],
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_f1}
)
# <0.2 secs
#~~~~~ st_snap_points() ~~~~~
st_snap_points = function(x, y, max_dist = 1000) {
if (inherits(x, "sf")) n = nrow(x)
if (inherits(x, "sfc")) n = length(x)
out = do.call(c,
lapply(seq(n), function(i) {
nrst = st_nearest_points(st_geometry(x)[i], y)
nrst_len = st_length(nrst)
nrst_mn = which.min(nrst_len)
if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
return(st_cast(nrst[nrst_mn], "POINT")[2])
})
)
return(out)
}
system.time(
st_snap_points(my_points, roads, max_dist = 400) %>%
{. ->> closest_points_f2}
)
# 11 secs
#~~~~~ st_nearest_points() rowwise ~~~~~
system.time(
my_points %>%
rowwise %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[2], # pull out the second point of each row
distance = st_distance(geometry, roads)[,1],
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_f1_rowwise}
)
# 15 secs
所有三种方法产生相同的最终数字:
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = closest_points_f1$snapped_point_cond, shape = 1, col = 'blue')
但是,st_nearest_points()
方法 returns 和 sf collection
具有所有计算列,而 st_snap_points()
仅产生最终的捕捉点。我认为拥有所有原始和工作专栏非常有用,尤其是对于故障排除。另外,这种方法明显更快。
closest_points_f1
# Simple feature collection with 100 features and 2 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 6
# id geometry my_linestring closest_point distance snapped_point_cond
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]> [m] <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745) 475. (664834.1 6525742)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486) 121. (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639) 356. (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178) 153. (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700) 755. (664601 6525464)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615) 31.2 (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591) 534. (664316.2 6525065)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036) 164. (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390) 64.5 (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745) 1210. (664101.7 6525830)
# # ... with 90 more rows
closest_points_f2
# Geometry set for 100 features
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664069.9 ymin: 6524380 xmax: 665480.3 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# First 5 geometries:
# POINT (664834.1 6525742)
# POINT (665142.8 6524486)
# POINT (665337.9 6525639)
# POINT (665454.2 6525178)
# POINT (664601 6525464)
我用 1000 点再次 运行 示例,st_neareast_points()
花了 ~0.31 秒(长 1.5 倍),st_snap_points()
花了 120 秒(长 ~10 倍)。我还在 10,000 点上 运行 st_nearest_points()
花了 ~2.5 秒,比 100 点长 12 倍,比 100 点长 8 倍(我没有 运行 st_snap_points()
在 10,000 行上).
我尝试了上面提供的两种解决方案,发现它们都不适合我。第一个花费太多时间;第二个占用太多内存。我提供的解决方案应该既更快又内存成本更低。 SF的进步使之成为可能。
我的做法如下:
- 我在线条(道路)周围创建了一个缓冲区并将其连接到一个形状(使用并集)。
- 我丢弃了所有不在缓冲区内的点。
- 对于每个点,我找到最接近的特征。
- 对于每个点及其最近的特征,我找到最近的点。
- 我添加点属性并添加不在缓冲区内的点。
这里有简单的代码(仅适用于 sf)。当心,它可能会添加一个名为“有效”的列,这可能会削弱原始数据。
require(sf)
require(dplyr)
buffer_lines <- function(lines, dist = 100, verbose = FALSE) {
if (verbose) message("Creating buffer for lines...")
st_buffer(lines, dist = dist) |> st_union()
}
get_points_close_to_lines <- function(points, lines, dist = 100,
verbose = FALSE) {
buff <- buffer_lines(lines, dist = dist, verbose = verbose)
if (verbose) message("Finding which points are in buffer...")
st_intersects(points, buff, sparse = FALSE)
}
snap_points_to_lines <- function(points, lines, dist = 100,
all_points = FALSE, verbose = FALSE) {
inside <- get_points_close_to_lines(points, lines, dist = dist,
verbose = verbose)
buff_points <- points[inside, ]
if (verbose) message("Finding nearest features...")
nf <- st_nearest_feature(buff_points, lines)
if (verbose) message("Finding nearest points...")
np <- st_nearest_points(buff_points, lines[nf, ], pairwise = TRUE)
np <- st_cast(np, "POINT")[c(FALSE, TRUE)]
if (verbose) message("Adding attributes...")
out <- st_drop_geometry(buff_points)
out$geometry <- np
out <- st_as_sf(out)
if (all_points) {
if (verbose) message("Adding points outside buffer...")
out$valid <- TRUE
outside <- points[!inside, ]
outside$valid <- FALSE
out <- dplyr::bind_rows(out, outside)
}
out
}
我想使用 sf::st_snap
将一个点捕捉到路段上最近的点。然而,函数似乎 return 错误的结果,它将我的点捕捉到路段起点的点。有想法该怎么解决这个吗?
下面提供了可重现的示例,包括我使用 sf::st_snap
与 maptools::snapPointsToLines
使用sf::st_snap
# Max distance
cut_dist = 200 # meters
# snap points to closest road
new_point <- sf::st_snap(point1, roads, tolerance = cut_dist)
# view points on the map
mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point) + mapView(roads)
# Distance between pont1 and new_point
st_distance( point1, new_point)
> 1591 meters # note this is above the set maximun distance
使用 maptools::snapPointsToLines
(我期望的结果)
# convert sf to sp
point_sp <- as_Spatial(point1)
roads_sp <- as_Spatial(roads)
# snap points
new_point_sp <- snapPointsToLines(point_sp, roads_sp, maxDist = cut_dist)
# view points on the map
mapView(point1, color="red") + mapView( st_buffer(point1, dist = cut_dist)) + mapView(new_point_sp) + mapView(roads)
# Distance between pont1 and new_point
spDistsN1( point_sp, new_point_sp)
> 116 meters
数据和图书馆
library(sf)
library(mapview)
library(maptools)
library(sp)
point1 <- structure(list(idhex = 9L, geometry = structure(list(structure(c(665606.970079183,
6525003.41418009), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 665606.970079183,
ymin = 6525003.41418009, xmax = 665606.970079183, ymax = 6525003.41418009
), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(idhex = NA_integer_), .Label = c("constant",
"aggregate", "identity"), class = "factor"), row.names = 2L, class = c("sf",
"data.table", "data.frame"))
roads <- structure(list(id = 139885, osm_id = 250886593, geometry = structure(list(
structure(c(665387.589147313, 665367.867159783, 665363.008143169,
665363.051860059, 665363.308104069, 665366.519781353, 665368.635421323,
665370.846894641, 665370.829724196, 665367.910645335, 665361.777524054,
665355.967776345, 665351.649946698, 665343.44353825, 665334.917779131,
665313.306069501, 665309.001351385, 665310.66019677, 665313.528620709,
665341.742306731, 665351.854389331, 665354.981775569, 665360.254611229,
665365.006104512, 665379.034588542, 665394.435039616, 665409.282519288,
665410.676785182, 665448.890797438, 665458.917562631, 665471.042094353,
665485.485001236, 665495.899212422, 665504.535684257, 665509.674854913,
665506.145837246, 665483.727146874, 665481.426949686, 665462.311063365,
665445.215460573, 665450.424049663, 665450.837897892, 665491.036360788,
665491.419140717, 665469.507518623, 665458.677850808, 665455.926197775,
665462.873809047, 665460.283684337, 665426.046702616, 665396.279686035,
665368.373253059, 665357.878521323, 665304.347529357, 665221.04051616,
665170.777462125, 665144.670345016, 665106.030568334, 665073.2789218,
665018.208956171, 664947.693178271, 664921.708297412, 664861.659061389,
664797.900403384, 664745.001666066, 664730.200174759, 664717.892651619,
664706.473711845, 664697.750102392, 664688.215719591, 664681.544531593,
664672.960647368, 664665.064067202, 664636.446517023, 664622.930521655,
664518.065243846, 664442.725560545, 664423.048166559, 664411.132259582,
664407.05972929, 664398.364646172, 664391.348502443, 664382.558239303,
664372.012526058, 664354.354954718, 664332.995014599, 664311.609706282,
664271.102641808, 664228.816287751, 664150.088321471, 664069.895400484,
6526138.02793883, 6526135.40749336, 6526130.11578605, 6526111.34403368,
6526087.4978365, 6526054.13445288, 6526022.49962268, 6525982.74830288,
6525959.40435839, 6525944.55197219, 6525918.33886077, 6525894.18611795,
6525874.55473851, 6525840.53410542, 6525813.96628006, 6525767.42907088,
6525745.21917638, 6525733.51582599, 6525713.24841331, 6525627.57847652,
6525608.06984863, 6525568.30170735, 6525550.71644271, 6525539.76231607,
6525491.25651378, 6525446.12690364, 6525433.36256694, 6525431.23562504,
6525372.98235432, 6525354.13376808, 6525331.3288195, 6525309.59511696,
6525293.92174422, 6525270.21980161, 6525256.11455612, 6525228.35885783,
6525217.10943051, 6525215.95489587, 6525195.91355696, 6525158.79257025,
6525134.01851773, 6525131.70940566, 6525050.96446632, 6524950.68358502,
6524851.23226232, 6524806.24052727, 6524749.34394609, 6524714.63617193,
6524660.07336072, 6524612.21010524, 6524583.84484865, 6524562.03540982,
6524557.38094998, 6524533.67136837, 6524510.74454804, 6524495.56823805,
6524486.9387399, 6524475.63373441, 6524465.4404841, 6524468.04929815,
6524475.95178632, 6524478.86036788, 6524470.76472937, 6524447.96214429,
6524448.06967557, 6524443.4855897, 6524435.86812114, 6524425.93373791,
6524417.67487537, 6524409.79262886, 6524399.64960133, 6524378.79085156,
6524360.33496349, 6524303.24355601, 6524302.70486651, 6524293.01335665,
6524290.81442892, 6524298.30279414, 6524309.46697681, 6524313.27442914,
6524337.22831533, 6524364.43083297, 6524376.27944935, 6524382.92319852,
6524389.6474774, 6524406.74565716, 6524430.82326744, 6524462.46041311,
6524492.20009833, 6524544.74318075, 6524591.10483188), .Dim = c(91L,
2L), class = c("XY", "LINESTRING", "sfg"))), class = c("sfc_LINESTRING",
"sfc"), precision = 0, bbox = structure(c(xmin = 664069.895400484,
ymin = 6524290.81442892, xmax = 665509.674854913, ymax = 6526138.02793883
), class = "bbox"), crs = structure(list(epsg = 32633L, proj4string = "+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 139885L, class = c("sf",
"data.frame"), sf_column = "geometry", agr = structure(c(id = NA_integer_,
osm_id = NA_integer_), .Label = c("constant", "aggregate", "identity"
), class = "factor"))
我不知道为什么 st_snap
return 是线串的 start/end 点。也许这是 https://github.com/r-spatial/sf.
这是一个似乎可以识别正确点的解决方法。请注意 st_nearest_points
最近才推出,因此您可能需要从 github.
nrst = st_nearest_points(point1, roads)
new_point2 = st_cast(nrst, "POINT")[2]
mapView(point1, color="red") + st_buffer(point1, dist = cut_dist) + new_point2 + roads
将此包装为 return 新几何体集的函数,捕捉点低于某个 max_dist
:
library(sf)
library(mapview)
st_snap_points = function(x, y, max_dist = 1000) {
if (inherits(x, "sf")) n = nrow(x)
if (inherits(x, "sfc")) n = length(x)
out = do.call(c,
lapply(seq(n), function(i) {
nrst = st_nearest_points(st_geometry(x)[i], y)
nrst_len = st_length(nrst)
nrst_mn = which.min(nrst_len)
if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
return(st_cast(nrst[nrst_mn], "POINT")[2])
})
)
return(out)
}
brew = st_transform(breweries, st_crs(trails))
tst = st_snap_points(brew, trails, 500)
mapview(breweries) + mapview(tst, col.regions = "red") + trails
考虑到这几乎是四年前的回答,我想我应该添加一个替代的、更快的方法,这在当时可能是不可能的。 @TimSalabim 的 st_snap_points()
功能很棒,但是如果你有一个带有几个点的 sf collection
,直接在 sf collection
上使用 st_nearest_points()
可以更快地完成。
正如 Tim 和其他人在讨论中提到的,st_nearest_points()
returns 一个 LINESTRING
对象 - 起初有点令人困惑,但可以很容易地变成 POINT
.听起来 LINESTRING
中点的顺序总是从第一个几何体到第二个几何体,因此您可以选择 LINESTRING
的起点或终点,具体取决于您放置几何体的顺序进入 st_nearest_points()
.
见?st_nearest_points
:
Value
an sfc object with all two-point LINESTRING geometries of point pairs from the first to the second geometry, of length x * y, with y cycling fastest. See examples for ideas how to convert these to POINT geometries.
使用您的示例,我们可以通过将道路作为第二个对象并检索 LINESTRING
的端点(第二点)来找到道路上的最近点,如下所示:
# find the LINESTRING object which is the shortest line between the two geometries
st_nearest_points(point1, roads) %>%
{. ->> my_linestring}
my_linestring
# Geometry set for 1 feature
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: 665491.2 ymin: 6525003 xmax: 665607 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# LINESTRING (665607 6525003, 665491.2 6525003)
# then, convert the LINESTRING to POINTs
# and, pull out the second point, because we want the point on the 'roads' object,
# which was supplied second to st_nearest_points()
my_linestring %>%
st_cast('POINT') %>%
.[2] %>%
{. ->> closest_point}
closest_point
# Geometry set for 1 feature
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 665491.2 ymin: 6525003 xmax: 665491.2 ymax: 6525003
# Projected CRS: WGS 84 / UTM zone 33N
# POINT (665491.2 6525003)
我们可以在地图上看一下来确认:
library(tidyverse)
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = point1, col = 'black')+
geom_sf(data = closest_point, col = 'blue')
要计算点之间的距离,我们使用 st_distance()
:
# work out the distance
st_distance(point1, closest_point)[1]
# 115.7514 [m]
要将距离阈值合并到“捕捉”中,我们可以创建一个包含if
语句的函数:
# we can combine the code and snap the point conditionally
f1 <- function(x, y, max_dist){
st_nearest_points(x, y) %>%
{. ->> my_linestring} %>%
st_cast('POINT') %>%
.[2] %>%
{. ->> closest_point} %>%
st_distance(., x) %>%
as.numeric %>%
{. ->> my_distance}
if(my_distance <= max_dist){
return(closest_point)
} else {
return(st_geometry(point1))
}
}
# run the function with threshold of 400 m (it should snap)
f1(point1, roads, max_dist = 400) %>%
{. ->> p1_400}
# run the function with a threshold of 50 m (it should NOT snap)
f1(point1, roads, max_dist = 50) %>%
{. ->> p1_50}
# plot it to see the results
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = p1_400, col = 'blue')+
geom_sf(data = p1_50)
# the blue point is on the road because it was within the threshold
# the black point is in the original spot because it was outside the threshold
处理一个 sf collection
而不是一个点
现在,我假设许多实例需要在多个点而不是单个点上完成此操作,我们可以稍微修改它以在具有多个点的 sf collection
上使用此方法。为了证明这一点,首先在 roads
:
bbox
内组成 100 个 运行dom 点
set.seed(69)
# let's make some random points around the road
tibble(
x = runif(n = 100, min = st_bbox(roads)$xmin, max = st_bbox(roads)$xmax),
y = runif(n = 100, min = st_bbox(roads)$ymin, max = st_bbox(roads)$ymax),
id = 1:100
) %>%
st_as_sf(
x = .,
coords = c('x', 'y'),
crs = st_crs(roads)
) %>%
{. ->> my_points}
my_points
# Simple feature collection with 100 features and 1 field
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 2
# id geometry
# * <int> <POINT [m]>
# 1 1 (664834.1 6525742)
# 2 2 (665176.8 6524370)
# 3 3 (664999.9 6525528)
# 4 4 (665315.7 6525242)
# 5 5 (664601 6525464)
# 6 6 (665320.7 6525600)
# 7 7 (664316.2 6525065)
# 8 8 (665204 6526025)
# 9 9 (664319.8 6524335)
# 10 10 (664101.7 6525830)
# # ... with 90 more rows
# now show the points on a figure
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = my_points, shape = 1)
要找到 roads
上最接近每个 运行dom 点的点,我们使用与上面单点示例相同的函数,但我们将 LINESTRING
保存为每个点一列。然后,我们不再只检索 LINESTRING
的第二个点,而是取出 每个 个 LINETRING
的 collection
(列)的第二个点.您可以选择在 row-by-row 的基础上提取每个 LINESTRING
的第二个点,方法是在 mutate()
之前包含 rowwise()
,但这会使计算速度慢得多。相反,通过整体处理 sf collection
并每隔一个点(每行的第二个点)取出一个点,您可以更快地获得结果。这就是他们在 ?st_nearest_points
.
所以基本上,我们使用 mutate()
在 sf collection
中创建新列,其中包括 LINESTRING
和最近的 POINT
.
# now, let's find the closest points
my_points %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
) %>%
{. ->> closest_points}
closest_points
# Simple feature collection with 100 features and 1 field
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 4
# id geometry my_linestring closest_point
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745)
# # ... with 90 more rows
# and have a look at a figure
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = my_points, shape = 1)+
geom_sf(data = closest_points$my_linestring, linetype = 'dotted')+
geom_sf(data = closest_points$closest_point, shape = 1, col = 'blue')
如果您想在分析中包含距离阈值,我们计算点之间的距离(或 my_linestring
的长度),并创建一个新的几何列,有条件地包含原始几何或新的捕捉点,取决于 distance
(see here)。我们使用 ifelse()
和 400 m 的阈值。
# now, use the closest_point if the distance is within a threshold, otherwise keep the original point
closest_points %>%
mutate(
distance = st_distance(geometry, roads)[,1],
distance2 = st_length(my_linestring), # not used, demo purposes only
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_threshold}
closest_points_threshold
# Simple feature collection with 100 features and 3 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 7
# id geometry my_linestring closest_point distance distance2 snapped_point_cond
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]> [m] [m] <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745) 475. 475. (664834.1 6525742)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486) 121. 121. (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639) 356. 356. (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178) 153. 153. (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700) 755. 755. (664601 6525464)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615) 31.2 31.2 (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591) 534. 534. (664316.2 6525065)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036) 164. 164. (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390) 64.5 64.5 (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745) 1210. 1210. (664101.7 6525830)
# # ... with 90 more rows
# and plot it
ggplot()+
geom_sf(data = roads %>% st_buffer(400), col = 'red', fill = NA)+
geom_sf(data = roads, col = 'red')+
geom_sf(data = closest_points_threshold$snapped_point_cond, shape = 1, col = 'blue')+
geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400) %>% .$my_linestring, linetype = 'dotted')+
geom_sf(data = closest_points_threshold %>% filter(as.numeric(distance) <= 400), shape = 1)
因此,蓝色点是最后的 threshold-snapped 点。正如我们所见,400 米缓冲区外的点(红色多边形)保留在其原始位置,而内部的任何内容(黑点和虚线)都与道路对齐。
比较 st_nearest_points()
与@TimSalabim 的 st_snap_points()
Time-wise,st_nearest_points()
方法似乎比st_snap_points()
快。据我所知(这不是我的强项),这是因为 st_snap_points()
在 point-by-point 基础上工作,或者在 sf collection
上使用 row-by-row 基础,而我们可以使用 st_nearest_points()
.
LINESTRING
的整个 collection
以 100 分为例,我们说的是 <0.2 秒与 ~11 秒。
此外,如果我们尝试 point-by-point (rowwise()
) 方法达到 st_nearest_points()
,它是所有三种方法中最慢的,大约需要 15 秒。
#~~~~~ st_nearest_points() ~~~~~
system.time(
my_points %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[seq(2, nrow(.)*2, 2)],
distance = st_distance(geometry, roads)[,1],
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_f1}
)
# <0.2 secs
#~~~~~ st_snap_points() ~~~~~
st_snap_points = function(x, y, max_dist = 1000) {
if (inherits(x, "sf")) n = nrow(x)
if (inherits(x, "sfc")) n = length(x)
out = do.call(c,
lapply(seq(n), function(i) {
nrst = st_nearest_points(st_geometry(x)[i], y)
nrst_len = st_length(nrst)
nrst_mn = which.min(nrst_len)
if (as.vector(nrst_len[nrst_mn]) > max_dist) return(st_geometry(x)[i])
return(st_cast(nrst[nrst_mn], "POINT")[2])
})
)
return(out)
}
system.time(
st_snap_points(my_points, roads, max_dist = 400) %>%
{. ->> closest_points_f2}
)
# 11 secs
#~~~~~ st_nearest_points() rowwise ~~~~~
system.time(
my_points %>%
rowwise %>%
mutate(
my_linestring = st_nearest_points(geometry, roads),
closest_point = st_cast(my_linestring, 'POINT')[2], # pull out the second point of each row
distance = st_distance(geometry, roads)[,1],
snapped_point_cond = st_sfc(ifelse(as.numeric(distance) <= 400, st_geometry(closest_point), geometry), crs = st_crs(roads))
) %>%
{. ->> closest_points_f1_rowwise}
)
# 15 secs
所有三种方法产生相同的最终数字:
ggplot()+
geom_sf(data = roads, col = 'red')+
geom_sf(data = closest_points_f1$snapped_point_cond, shape = 1, col = 'blue')
但是,st_nearest_points()
方法 returns 和 sf collection
具有所有计算列,而 st_snap_points()
仅产生最终的捕捉点。我认为拥有所有原始和工作专栏非常有用,尤其是对于故障排除。另外,这种方法明显更快。
closest_points_f1
# Simple feature collection with 100 features and 2 fields
# Active geometry column: geometry
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664090.7 ymin: 6524310 xmax: 665500.6 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# # A tibble: 100 x 6
# id geometry my_linestring closest_point distance snapped_point_cond
# * <int> <POINT [m]> <LINESTRING [m]> <POINT [m]> [m] <POINT [m]>
# 1 1 (664834.1 6525742) (664834.1 6525742, 665309 6525745) (665309 6525745) 475. (664834.1 6525742)
# 2 2 (665176.8 6524370) (665176.8 6524370, 665142.8 6524486) (665142.8 6524486) 121. (665142.8 6524486)
# 3 3 (664999.9 6525528) (664999.9 6525528, 665337.9 6525639) (665337.9 6525639) 356. (665337.9 6525639)
# 4 4 (665315.7 6525242) (665315.7 6525242, 665454.2 6525178) (665454.2 6525178) 153. (665454.2 6525178)
# 5 5 (664601 6525464) (664601 6525464, 665317.8 6525700) (665317.8 6525700) 755. (664601 6525464)
# 6 6 (665320.7 6525600) (665320.7 6525600, 665348.4 6525615) (665348.4 6525615) 31.2 (665348.4 6525615)
# 7 7 (664316.2 6525065) (664316.2 6525065, 664069.9 6524591) (664069.9 6524591) 534. (664316.2 6525065)
# 8 8 (665204 6526025) (665204 6526025, 665367.7 6526036) (665367.7 6526036) 164. (665367.7 6526036)
# 9 9 (664319.8 6524335) (664319.8 6524335, 664354.4 6524390) (664354.4 6524390) 64.5 (664354.4 6524390)
# 10 10 (664101.7 6525830) (664101.7 6525830, 665309 6525745) (665309 6525745) 1210. (664101.7 6525830)
# # ... with 90 more rows
closest_points_f2
# Geometry set for 100 features
# Geometry type: POINT
# Dimension: XY
# Bounding box: xmin: 664069.9 ymin: 6524380 xmax: 665480.3 ymax: 6526107
# Projected CRS: WGS 84 / UTM zone 33N
# First 5 geometries:
# POINT (664834.1 6525742)
# POINT (665142.8 6524486)
# POINT (665337.9 6525639)
# POINT (665454.2 6525178)
# POINT (664601 6525464)
我用 1000 点再次 运行 示例,st_neareast_points()
花了 ~0.31 秒(长 1.5 倍),st_snap_points()
花了 120 秒(长 ~10 倍)。我还在 10,000 点上 运行 st_nearest_points()
花了 ~2.5 秒,比 100 点长 12 倍,比 100 点长 8 倍(我没有 运行 st_snap_points()
在 10,000 行上).
我尝试了上面提供的两种解决方案,发现它们都不适合我。第一个花费太多时间;第二个占用太多内存。我提供的解决方案应该既更快又内存成本更低。 SF的进步使之成为可能。
我的做法如下:
- 我在线条(道路)周围创建了一个缓冲区并将其连接到一个形状(使用并集)。
- 我丢弃了所有不在缓冲区内的点。
- 对于每个点,我找到最接近的特征。
- 对于每个点及其最近的特征,我找到最近的点。
- 我添加点属性并添加不在缓冲区内的点。
这里有简单的代码(仅适用于 sf)。当心,它可能会添加一个名为“有效”的列,这可能会削弱原始数据。
require(sf)
require(dplyr)
buffer_lines <- function(lines, dist = 100, verbose = FALSE) {
if (verbose) message("Creating buffer for lines...")
st_buffer(lines, dist = dist) |> st_union()
}
get_points_close_to_lines <- function(points, lines, dist = 100,
verbose = FALSE) {
buff <- buffer_lines(lines, dist = dist, verbose = verbose)
if (verbose) message("Finding which points are in buffer...")
st_intersects(points, buff, sparse = FALSE)
}
snap_points_to_lines <- function(points, lines, dist = 100,
all_points = FALSE, verbose = FALSE) {
inside <- get_points_close_to_lines(points, lines, dist = dist,
verbose = verbose)
buff_points <- points[inside, ]
if (verbose) message("Finding nearest features...")
nf <- st_nearest_feature(buff_points, lines)
if (verbose) message("Finding nearest points...")
np <- st_nearest_points(buff_points, lines[nf, ], pairwise = TRUE)
np <- st_cast(np, "POINT")[c(FALSE, TRUE)]
if (verbose) message("Adding attributes...")
out <- st_drop_geometry(buff_points)
out$geometry <- np
out <- st_as_sf(out)
if (all_points) {
if (verbose) message("Adding points outside buffer...")
out$valid <- TRUE
outside <- points[!inside, ]
outside$valid <- FALSE
out <- dplyr::bind_rows(out, outside)
}
out
}