R - 对于位置数据框,从另一个位置数据框中找到最近的成员
R - For a dataframe of locations, find the nearest member from another dataframe of locations
我需要在 DF1(3000 个位置)中获取位置 (Lat/Lon),然后使用 R 找到到 DF2(1500 个位置)中最近位置的半正弦距离。
我可以通过对 DF2 中的位置进行硬编码或为 DF2 中的每个项目创建一个列来实现这一点,这非常麻烦。
关于如何高效完成此任务的任何想法?
Data structures
DF1
Lat Lon
DF2
Lat Lon
试试这个:
# load packages
install.packages("tidyverse")
library(geosphere)
install.packages("rjson")
library(rjson)
# make a function to retrieve test data
get_latlon <- function(x){
url <- paste0("https://api3.geo.admin.ch/rest/services/api/SearchServer?searchText=",paste(x),"&type=locations")
result <- rjson::fromJSON(file = URLencode(url))
as_tibble(result$results[[1]]) %>%
mutate(attr_names = names(attrs)) %>%
spread(attr_names, attrs) %>%
unnest(cols = c(detail, featureId, geom_quadindex, geom_st_box2d, label, lat,
lon, num, origin, rank, x, y, zoomlevel)) %>%
select(detail,lat,lon)
}
# retrieve test data
cities1 <- c("spiez","zumikon","winterthur","neuenburg")
cities2 <- c("zurich","bern","lausanne")
cities1 %>% map(get_latlon) %>% bind_rows() -> DF1
cities2 %>% map(get_latlon) %>% bind_rows() -> DF2
# make a combined dataframe
names(DF1) <- paste0(names(DF1), ".a")
names(DF2) <- paste0(names(DF2), ".b")
crossing(DF1,DF2) -> data
# function to calculate the Harversine distance
haversine <- function(lon1, lat1, lon2, lat2, r = 6378137) {
if(!is.numeric(c(lon1, lat1, lon2, lat2)))
stop("Inpust must be numeric")
# Convert degrees to radians
lon1 <- lon1 * pi / 180
lat1 <- lat1 * pi / 180
lon2 <- lon2 * pi / 180
lat2 <- lat2 * pi / 180
delta.lon <- (lon2 - lon1)
delta.lat <- (lat2 - lat1)
a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) *
sin(delta.lon/2)^2
c <- 2 * asin(min(1,sqrt(a)))
distance <- r * c
return(distance) # Distance
}
# find the smallest distances for locations in DF1
data %>%
group_by(detail.a,detail.b) %>%
mutate(haversine=haversine(lat.a,lon.a,lat.b,lon.b)) %>%
group_by(detail.a) %>%
slice(which.min(haversine))
我需要在 DF1(3000 个位置)中获取位置 (Lat/Lon),然后使用 R 找到到 DF2(1500 个位置)中最近位置的半正弦距离。
我可以通过对 DF2 中的位置进行硬编码或为 DF2 中的每个项目创建一个列来实现这一点,这非常麻烦。
关于如何高效完成此任务的任何想法?
Data structures
DF1
Lat Lon
DF2
Lat Lon
试试这个:
# load packages
install.packages("tidyverse")
library(geosphere)
install.packages("rjson")
library(rjson)
# make a function to retrieve test data
get_latlon <- function(x){
url <- paste0("https://api3.geo.admin.ch/rest/services/api/SearchServer?searchText=",paste(x),"&type=locations")
result <- rjson::fromJSON(file = URLencode(url))
as_tibble(result$results[[1]]) %>%
mutate(attr_names = names(attrs)) %>%
spread(attr_names, attrs) %>%
unnest(cols = c(detail, featureId, geom_quadindex, geom_st_box2d, label, lat,
lon, num, origin, rank, x, y, zoomlevel)) %>%
select(detail,lat,lon)
}
# retrieve test data
cities1 <- c("spiez","zumikon","winterthur","neuenburg")
cities2 <- c("zurich","bern","lausanne")
cities1 %>% map(get_latlon) %>% bind_rows() -> DF1
cities2 %>% map(get_latlon) %>% bind_rows() -> DF2
# make a combined dataframe
names(DF1) <- paste0(names(DF1), ".a")
names(DF2) <- paste0(names(DF2), ".b")
crossing(DF1,DF2) -> data
# function to calculate the Harversine distance
haversine <- function(lon1, lat1, lon2, lat2, r = 6378137) {
if(!is.numeric(c(lon1, lat1, lon2, lat2)))
stop("Inpust must be numeric")
# Convert degrees to radians
lon1 <- lon1 * pi / 180
lat1 <- lat1 * pi / 180
lon2 <- lon2 * pi / 180
lat2 <- lat2 * pi / 180
delta.lon <- (lon2 - lon1)
delta.lat <- (lat2 - lat1)
a <- sin(delta.lat/2)^2 + cos(lat1) * cos(lat2) *
sin(delta.lon/2)^2
c <- 2 * asin(min(1,sqrt(a)))
distance <- r * c
return(distance) # Distance
}
# find the smallest distances for locations in DF1
data %>%
group_by(detail.a,detail.b) %>%
mutate(haversine=haversine(lat.a,lon.a,lat.b,lon.b)) %>%
group_by(detail.a) %>%
slice(which.min(haversine))