从 lat/lon 坐标中找到 5 个最近的站点
Find 5 closest stations from lat/lon coordinates
我试图找到从一个数据集 (set1
) 到另一个数据集 (set2
) 的 5 个最近的站点。 This post 是我使用的基础,找到最接近的单个似乎很简单,但我正在编写 for
循环来处理它并且效率不高。此外,我收到错误并且不明白为什么它不起作用。理想情况下,我想使用 set1
在 set2
中找到最近的车站,找到 5 个最近的车站,并为每个车站添加一列,为 set1
中的每个唯一 ID。
编辑:这个问题与 How to assign a name to lat-long observations based on shortest distance 不同,因为我试图找到最近的 5 个车站,而不仅仅是一个距离。此外,寻找最小值的方法也不同。请重新打开这个问题。
输出:
set1 <- structure(list(id = c(5984, 7495, 4752, 2654, 4578, 9865, 3265,
1252, 4679, 1346), lat = c(48.39167, 48.148056, 48.721111, 47.189167,
47.054443, 47.129166, 47.306667, 47.84, 47.304167, 48.109444),
lon = c(13.671114, 12.866947, 15.94223, 11.099736, 12.958342,
14.203892, 11.86389, 16.526674, 16.193064, 17.071392)), row.names = c(NA,
10L), class = "data.frame", .Names = c("id", "lat", "lon"))
set2 <- structure(list(id = 1:10, lat = structure(c(35.8499984741211,
34.75, 70.9329986572266, 78.25, 69.6829986572266, 74.515998840332,
70.3659973144531, 67.265998840332, 63.6990013122559, 60.1990013122559
), .Dim = 10L), lon = structure(c(14.4829998016357, 32.4000015258789,
-8.66600036621094, 15.4670000076294, 18.9160003662109, 19.0160007476807,
31.0990009307861, 14.3660001754761, 9.59899997711182, 11.0830001831055
), .Dim = 10L)), row.names = c(NA, 10L), class = "data.frame", .Names = c("id",
"lat", "lon"))
代码:
library(rgeos)
library(sp)
set1sp <- SpatialPoints(set1)
set2sp <- SpatialPoints(set2)
for (i in length(set1$id)){
for (j in 4:9){
if(i == 1) {
sub <- set2
set1[i,j] <- apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min)
sub <- filter(sub, id != set1[i,j])}
else{
set1[i,j] <- apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min)
sub <- filter(sub, id != set1[i,j])}
}
}
输出错误:
Error in `[<-.data.frame`(`*tmp*`, i, j, value = c(8L, 8L, 8L, 8L, 8L, :
replacement has 10 rows, data has 1
我必须为 set1sp
和 set2sp
设置投影系统和坐标才能使 gDistance
工作。我假定为 WGS84。
dummyset1= set1
dummyset2= set2
coordinates(set1) = c('lon', 'lat')
coordinates(set2) = c('lon', 'lat')
proj4string(set1) = "+proj=longlat +datum=WGS84"
proj4string(set2) = "+proj=longlat +datum=WGS84"
set1sp = set1
set2sp = set2
set1 = dummyset1
set2 = dummyset2
此循环将 return 根据使用 for 循环的一般结构得到您想要的输出。
for (i in 1:length(set1$id)){
#Store the projected data in a dummy variable sub
sub <- set2sp
for (j in 4:8){
if (j == 4){
set1[i,j] <- apply(gDistance(set2sp['id'], set1sp['id'][i,], byid=TRUE), 1, which.min)
#Remove the index of the closest point from sub.
sub <- sub[which(sub$id != set1[i,j]), ]
}
else {
#Note that sub is now being checked instead of set2sp. This is because sub has had the index of the closest point removed.
set1[i,j] <- apply(gDistance(sub['id'], set1sp['id'][i,], byid=TRUE), 1, which.min)
sub <- sub[which(sub$id != set1[i,j]), ]
}
}
}
结果输出为:
set1
id lat lon V4 V5 V6 V7 V8
1 5984 48.39167 13.67111 10 1 8 7 6
2 7495 48.14806 12.86695 10 1 8 7 6
3 4752 48.72111 15.94223 10 1 8 7 6
4 2654 47.18917 11.09974 1 9 8 7 6
5 4578 47.05444 12.95834 1 9 8 7 6
6 9865 47.12917 14.20389 1 9 8 7 6
7 3265 47.30667 11.86389 1 9 8 7 6
8 1252 47.84000 16.52667 1 9 8 7 6
9 4679 47.30417 16.19306 1 9 8 7 6
10 1346 48.10944 17.07139 1 9 8 7 6
以下计算集合 2 和集合 1 中所有点的大圆距离。然后取集合 1 的最小值,并对它们进行排序;然后绘图。
library(sp)
coordinates(set1) = c('lon', 'lat')
coordinates(set2) = c('lon', 'lat')
proj4string(set1) = "+proj=longlat +datum=WGS84"
proj4string(set2) = "+proj=longlat +datum=WGS84"
d = apply(spDists(set1,set2),2,min)
order(d)[1:5]
# [1] 1 10 9 2 8
plot(set2, pch=2, axes=TRUE)
points(set1)
o = order(d)[1:5]
points(set2[o,], col = 'red', pch=16)
我试图找到从一个数据集 (set1
) 到另一个数据集 (set2
) 的 5 个最近的站点。 This post 是我使用的基础,找到最接近的单个似乎很简单,但我正在编写 for
循环来处理它并且效率不高。此外,我收到错误并且不明白为什么它不起作用。理想情况下,我想使用 set1
在 set2
中找到最近的车站,找到 5 个最近的车站,并为每个车站添加一列,为 set1
中的每个唯一 ID。
编辑:这个问题与 How to assign a name to lat-long observations based on shortest distance 不同,因为我试图找到最近的 5 个车站,而不仅仅是一个距离。此外,寻找最小值的方法也不同。请重新打开这个问题。
输出:
set1 <- structure(list(id = c(5984, 7495, 4752, 2654, 4578, 9865, 3265,
1252, 4679, 1346), lat = c(48.39167, 48.148056, 48.721111, 47.189167,
47.054443, 47.129166, 47.306667, 47.84, 47.304167, 48.109444),
lon = c(13.671114, 12.866947, 15.94223, 11.099736, 12.958342,
14.203892, 11.86389, 16.526674, 16.193064, 17.071392)), row.names = c(NA,
10L), class = "data.frame", .Names = c("id", "lat", "lon"))
set2 <- structure(list(id = 1:10, lat = structure(c(35.8499984741211,
34.75, 70.9329986572266, 78.25, 69.6829986572266, 74.515998840332,
70.3659973144531, 67.265998840332, 63.6990013122559, 60.1990013122559
), .Dim = 10L), lon = structure(c(14.4829998016357, 32.4000015258789,
-8.66600036621094, 15.4670000076294, 18.9160003662109, 19.0160007476807,
31.0990009307861, 14.3660001754761, 9.59899997711182, 11.0830001831055
), .Dim = 10L)), row.names = c(NA, 10L), class = "data.frame", .Names = c("id",
"lat", "lon"))
代码:
library(rgeos)
library(sp)
set1sp <- SpatialPoints(set1)
set2sp <- SpatialPoints(set2)
for (i in length(set1$id)){
for (j in 4:9){
if(i == 1) {
sub <- set2
set1[i,j] <- apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min)
sub <- filter(sub, id != set1[i,j])}
else{
set1[i,j] <- apply(gDistance(set1sp, set2sp, byid=TRUE), 1, which.min)
sub <- filter(sub, id != set1[i,j])}
}
}
输出错误:
Error in `[<-.data.frame`(`*tmp*`, i, j, value = c(8L, 8L, 8L, 8L, 8L, :
replacement has 10 rows, data has 1
我必须为 set1sp
和 set2sp
设置投影系统和坐标才能使 gDistance
工作。我假定为 WGS84。
dummyset1= set1
dummyset2= set2
coordinates(set1) = c('lon', 'lat')
coordinates(set2) = c('lon', 'lat')
proj4string(set1) = "+proj=longlat +datum=WGS84"
proj4string(set2) = "+proj=longlat +datum=WGS84"
set1sp = set1
set2sp = set2
set1 = dummyset1
set2 = dummyset2
此循环将 return 根据使用 for 循环的一般结构得到您想要的输出。
for (i in 1:length(set1$id)){
#Store the projected data in a dummy variable sub
sub <- set2sp
for (j in 4:8){
if (j == 4){
set1[i,j] <- apply(gDistance(set2sp['id'], set1sp['id'][i,], byid=TRUE), 1, which.min)
#Remove the index of the closest point from sub.
sub <- sub[which(sub$id != set1[i,j]), ]
}
else {
#Note that sub is now being checked instead of set2sp. This is because sub has had the index of the closest point removed.
set1[i,j] <- apply(gDistance(sub['id'], set1sp['id'][i,], byid=TRUE), 1, which.min)
sub <- sub[which(sub$id != set1[i,j]), ]
}
}
}
结果输出为:
set1
id lat lon V4 V5 V6 V7 V8
1 5984 48.39167 13.67111 10 1 8 7 6
2 7495 48.14806 12.86695 10 1 8 7 6
3 4752 48.72111 15.94223 10 1 8 7 6
4 2654 47.18917 11.09974 1 9 8 7 6
5 4578 47.05444 12.95834 1 9 8 7 6
6 9865 47.12917 14.20389 1 9 8 7 6
7 3265 47.30667 11.86389 1 9 8 7 6
8 1252 47.84000 16.52667 1 9 8 7 6
9 4679 47.30417 16.19306 1 9 8 7 6
10 1346 48.10944 17.07139 1 9 8 7 6
以下计算集合 2 和集合 1 中所有点的大圆距离。然后取集合 1 的最小值,并对它们进行排序;然后绘图。
library(sp)
coordinates(set1) = c('lon', 'lat')
coordinates(set2) = c('lon', 'lat')
proj4string(set1) = "+proj=longlat +datum=WGS84"
proj4string(set2) = "+proj=longlat +datum=WGS84"
d = apply(spDists(set1,set2),2,min)
order(d)[1:5]
# [1] 1 10 9 2 8
plot(set2, pch=2, axes=TRUE)
points(set1)
o = order(d)[1:5]
points(set2[o,], col = 'red', pch=16)