使用 dplyr::mutate 计算到数据点的地理距离
Calculating geographic distances to data points with dplyr::mutate
我正在尝试将 R 与 tidyverse 包一起使用,但在将函数应用到我的数据时遇到了问题。我的数据包括 lat/long 坐标,我想计算从每个位置(我的数据框的行)到参考位置的距离。我正在尝试使用 geosphere::distm 函数。
library(tidyverse)
library(geosphere)
my_long <- 172
my_lat <- -43
data <- data %>% rowwise() %>% mutate(
dist = distm(c(myLong, myLat), c(long, lat), fun=distHaversine) # this works
)
我使用 rowwise()
函数使它工作,如上所述,但这已被弃用,所以我想知道如何使用现代 tidyverse
,即 dplyr
或 purrr
,我想,例如我最接近的是使用 map2:
my_distm <- function(long1, lat1, long2, lat2)
distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)
data <- data %>% mutate(
dist = map2(long, lat, my_distm, my_long, my_lat) # this doesn't
)
到目前为止我都失败了。
您可以将 mutate
与 mapply
一起使用:
library(tidyverse)
library(geosphere)
my_long <- 172
my_lat <- -43
df <- data.frame(long = c(170, 180), lat = c(-43, 43))
df %>% rowwise() %>% mutate(
dist = distm(c(my_long, my_lat), c(long, lat), fun=distHaversine) # this works
)
#Source: local data frame [2 x 3]
#Groups: <by row>
# A tibble: 2 x 3
# long lat dist
# <dbl> <dbl> <dbl>
#1 170 -43 162824
#2 180 43 9606752
df %>% mutate(
dist = mapply(function(lg, lt) distm(c(my_long, my_lat), c(lg, lt), fun=distHaversine), long, lat)
)
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
更新 使用 map2
:
df %>%
mutate(dist = map2(long, lat, ~distm(c(my_long, my_lat), c(.x, .y), fun=distHaversine)))
# here .x stands for a value from long column, and .y stands for a value from lat column
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
使用my_distm
:
my_distm <- function(long1, lat1, long2, lat2)
distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)
df %>% mutate(dist = map2(long, lat, ~my_distm(my_long, my_lat, .x, .y)))
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
您可以使用 distHaversine
而不是 distm
,并且 cbind
:
data %>% mutate(dist = distHaversine(cbind(myLong, myLat), cbind(long, lat)))
示例数据:
myLong = 172
myLat = -43
long = c(180,179,179)
lat = c(-40,-41,-40)
data = data.frame(myLong,myLat,long,lat)
结果为:
myLong myLat long lat dist
1 172 -43 180 -40 745481.0
2 172 -43 179 -41 620164.8
3 172 -43 179 -40 672076.2
我也很喜欢 rowwise
,但由于您正在寻找其他解决方案
Psidom
的数据
my_long <- 172
my_lat <- -43
myval <- c(my_long, my_lat)
df <- data.frame(long = c(170, 180), lat = c(-43, 43))
呼噜声解决方案
这里是purrr::map
library(purrr)
df1 <- df %>%
mutate(dist = map(1:nrow(.), ~distm(myval, df[.x,], fun=distHaversine)))
# long lat dist
# 1 170 -43 162824
# 2 180 43 9606752
您可以通过将 myval
多次重复成 2 列的形状 data.frame 来使用 map2
,但不能作为矢量
OP 请求
要select long
和lat
从更大的数据框中与distm
一起使用,在map
语句中使用select
garbage <- data.frame(long = c(170, 180), lat = c(-43, 43), junk=c(0,0))
df1 <- garbage %>%
mutate(dist = map(1:nrow(.), ~distm(myval, select(garbage[.x,],long,lat), fun=distHaversine)))
# long lat junk dist
# 1 170 -43 0 162824
# 2 180 43 0 9606752
使用迭代器应用解决方案
我也喜欢用iterators
进行行操作
library(iterators)
df2 <- df %>%
mutate(dist = sapply(iter(df, by="row"), function(x) distm(myval, x, fun=distHaversine)))
# long lat dist
# 1 170 -43 162824
# 2 180 43 9606752
你可以使用 pmap()
f <- function(StartLong, StartLat, EndLong, EndLat)
distm(c(StartLong, StartLat), c(EndLong, EndLat))
df %>% mutate(dist = pmap_dbl(., f))
或Vectorize()
你的函数并直接在mutate()
中使用:
g <- Vectorize(f)
df %>% mutate(dist = g(StartLong, StartLat, EndLong, EndLat))
给出:
# StartLong StartLat EndLong EndLat dist
#1 170 -43 172 -43 162824
#2 180 43 172 -43 9606752
来自 purrrlyr
by_row()
的另一个想法
library(purrrlyr)
df %>%
by_row(function(x) {
distm(c(x$StartLong, x$StartLat),
c(x$EndLong, x$EndLat)) },
.collate = "rows", .to = "dist")
给出:
## tibble [2 x 5]
# StartLong StartLat EndLong EndLat dist
# <dbl> <dbl> <dbl> <dbl> <dbl>
#1 170 -43 172 -43 162824
#2 180 43 172 -43 9606752
数据
df <- structure(list(StartLong = c(170, 180), StartLat = c(-43, 43),
EndLong = c(172, 172), EndLat = c(-43, -43)), .Names = c("StartLong",
"StartLat", "EndLong", "EndLat"), row.names = c(NA, -2L), class = "data.frame")
我正在尝试将 R 与 tidyverse 包一起使用,但在将函数应用到我的数据时遇到了问题。我的数据包括 lat/long 坐标,我想计算从每个位置(我的数据框的行)到参考位置的距离。我正在尝试使用 geosphere::distm 函数。
library(tidyverse)
library(geosphere)
my_long <- 172
my_lat <- -43
data <- data %>% rowwise() %>% mutate(
dist = distm(c(myLong, myLat), c(long, lat), fun=distHaversine) # this works
)
我使用 rowwise()
函数使它工作,如上所述,但这已被弃用,所以我想知道如何使用现代 tidyverse
,即 dplyr
或 purrr
,我想,例如我最接近的是使用 map2:
my_distm <- function(long1, lat1, long2, lat2)
distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)
data <- data %>% mutate(
dist = map2(long, lat, my_distm, my_long, my_lat) # this doesn't
)
到目前为止我都失败了。
您可以将 mutate
与 mapply
一起使用:
library(tidyverse)
library(geosphere)
my_long <- 172
my_lat <- -43
df <- data.frame(long = c(170, 180), lat = c(-43, 43))
df %>% rowwise() %>% mutate(
dist = distm(c(my_long, my_lat), c(long, lat), fun=distHaversine) # this works
)
#Source: local data frame [2 x 3]
#Groups: <by row>
# A tibble: 2 x 3
# long lat dist
# <dbl> <dbl> <dbl>
#1 170 -43 162824
#2 180 43 9606752
df %>% mutate(
dist = mapply(function(lg, lt) distm(c(my_long, my_lat), c(lg, lt), fun=distHaversine), long, lat)
)
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
更新 使用 map2
:
df %>%
mutate(dist = map2(long, lat, ~distm(c(my_long, my_lat), c(.x, .y), fun=distHaversine)))
# here .x stands for a value from long column, and .y stands for a value from lat column
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
使用my_distm
:
my_distm <- function(long1, lat1, long2, lat2)
distm(c(long1, lat1), c(long2, lat2), fun=distHaversine)
df %>% mutate(dist = map2(long, lat, ~my_distm(my_long, my_lat, .x, .y)))
# long lat dist
#1 170 -43 162824
#2 180 43 9606752
您可以使用 distHaversine
而不是 distm
,并且 cbind
:
data %>% mutate(dist = distHaversine(cbind(myLong, myLat), cbind(long, lat)))
示例数据:
myLong = 172
myLat = -43
long = c(180,179,179)
lat = c(-40,-41,-40)
data = data.frame(myLong,myLat,long,lat)
结果为:
myLong myLat long lat dist
1 172 -43 180 -40 745481.0
2 172 -43 179 -41 620164.8
3 172 -43 179 -40 672076.2
我也很喜欢 rowwise
,但由于您正在寻找其他解决方案
Psidom
的数据my_long <- 172
my_lat <- -43
myval <- c(my_long, my_lat)
df <- data.frame(long = c(170, 180), lat = c(-43, 43))
呼噜声解决方案
这里是purrr::map
library(purrr)
df1 <- df %>%
mutate(dist = map(1:nrow(.), ~distm(myval, df[.x,], fun=distHaversine)))
# long lat dist
# 1 170 -43 162824
# 2 180 43 9606752
您可以通过将 myval
多次重复成 2 列的形状 data.frame 来使用 map2
,但不能作为矢量
OP 请求
要select long
和lat
从更大的数据框中与distm
一起使用,在map
语句中使用select
garbage <- data.frame(long = c(170, 180), lat = c(-43, 43), junk=c(0,0))
df1 <- garbage %>%
mutate(dist = map(1:nrow(.), ~distm(myval, select(garbage[.x,],long,lat), fun=distHaversine)))
# long lat junk dist
# 1 170 -43 0 162824
# 2 180 43 0 9606752
使用迭代器应用解决方案
我也喜欢用iterators
进行行操作
library(iterators)
df2 <- df %>%
mutate(dist = sapply(iter(df, by="row"), function(x) distm(myval, x, fun=distHaversine)))
# long lat dist
# 1 170 -43 162824
# 2 180 43 9606752
你可以使用 pmap()
f <- function(StartLong, StartLat, EndLong, EndLat)
distm(c(StartLong, StartLat), c(EndLong, EndLat))
df %>% mutate(dist = pmap_dbl(., f))
或Vectorize()
你的函数并直接在mutate()
中使用:
g <- Vectorize(f)
df %>% mutate(dist = g(StartLong, StartLat, EndLong, EndLat))
给出:
# StartLong StartLat EndLong EndLat dist
#1 170 -43 172 -43 162824
#2 180 43 172 -43 9606752
来自 purrrlyr
by_row()
的另一个想法
library(purrrlyr)
df %>%
by_row(function(x) {
distm(c(x$StartLong, x$StartLat),
c(x$EndLong, x$EndLat)) },
.collate = "rows", .to = "dist")
给出:
## tibble [2 x 5]
# StartLong StartLat EndLong EndLat dist
# <dbl> <dbl> <dbl> <dbl> <dbl>
#1 170 -43 172 -43 162824
#2 180 43 172 -43 9606752
数据
df <- structure(list(StartLong = c(170, 180), StartLat = c(-43, 43),
EndLong = c(172, 172), EndLat = c(-43, -43)), .Names = c("StartLong",
"StartLat", "EndLong", "EndLat"), row.names = c(NA, -2L), class = "data.frame")