How to calculate trajectory length in R
This is what the sample looks like:
# A tibble: 10 x 5
trip_id start_location end_location start_time end_time
<int> <chr> <chr> <chr> <chr>
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30
my.df <- structure(list(trip_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 43456L,43457L, 43458L), start_location = c("13.6753,100.63453", "13.66348,100.71868","13.63345,100.71102", "13.59653,100.70172", "13.57542,100.79453","13.60123,100.71091", "13.60388,100.70617", "13.94582,100.735","14.01229,100.66908", "13.79245,100.70217"), end_location = c("13.65828,100.71631","13.65258,100.71571", "13.63349,100.71096", "13.63433,100.71101","13.59612,100.74922", "13.63241,100.71297", "13.60567,100.71292","13.95905,100.62037", "13.98712,100.61631", "13.90366,100.66788"), start_time = c("00:05:24", "03:49:54", "04:14:52", "05:01:52","05:57:11", "06:21:52", "06:43:32", "19:28:51", "20:58:30", "22:09:30"), end_time = c("00:41:14", "03:57:52", "04:53:52", "05:36:52","06:15:52", "06:33:52", "06:58:52", "20:28:30", "21:23:30", "22:40:30")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
我的问题是如何计算 start_location 和 end_location 列之间的长度经纬坐标。
trip_id start_location end_location start_time end_time length (in meters)
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14 120
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52 500
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52 480
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52 7000
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52 1563
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52 7892
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52 200
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30 5863
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30 1478
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30 2348
geosphere 包可以根据经纬度计算距离。
#separate the latitude and longitude into separate columns
my.df<- separate(my.df, start_location, into=c("start_lat", "start_long"), sep="[,]")
my.df<- separate(my.df, end_location, into=c("end_lat", "end_long"), sep="[,]")
#calculate distances
my.df$length <- distGeo(my.df[ ,c("start_long", "start_lat")], my.df[ ,c("end_long", "end_lat")])
# A tibble: 10 x 8
trip_id start_lat start_long end_lat end_long start_time end_time length
<int> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 13.6753 100.63453 13.65828 100.71631 00:05:24 00:41:14 9046.
2 2 13.66348 100.71868 13.65258 100.71571 03:49:54 03:57:52 1248.
3 3 13.63345 100.71102 13.63349 100.71096 04:14:52 04:53:52 7.86
4 4 13.59653 100.70172 13.63433 100.71101 05:01:52 05:36:52 4301.
5 5 13.57542 100.79453 13.59612 100.74922 05:57:11 06:15:52 5412.
6 6 13.60123 100.71091 13.63241 100.71297 06:21:52 06:33:52 3457.
7 7 13.60388 100.70617 13.60567 100.71292 06:43:32 06:58:52 757.
8 43456 13.94582 100.735 13.95905 100.62037 19:28:51 20:28:30 12473.
9 43457 14.01229 100.66908 13.98712 100.61631 20:58:30 21:23:30 6345.
10 43458 13.79245 100.70217 13.90366 100.66788 22:09:30 22:40:30 12850.
这样做。其实你的坐标向量应该在计算地理距离之前反转,因为 geosphere::disGeo()
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y)))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time len
<int> <list> <list> <chr> <chr> <dbl>
1 1 <dbl [2]> <dbl [2]> 00:05:24 00:41:14 9046.
2 2 <dbl [2]> <dbl [2]> 03:49:54 03:57:52 1248.
3 3 <dbl [2]> <dbl [2]> 04:14:52 04:53:52 7.86
4 4 <dbl [2]> <dbl [2]> 05:01:52 05:36:52 4301.
5 5 <dbl [2]> <dbl [2]> 05:57:11 06:15:52 5412.
6 6 <dbl [2]> <dbl [2]> 06:21:52 06:33:52 3457.
7 7 <dbl [2]> <dbl [2]> 06:43:32 06:58:52 757.
8 43456 <dbl [2]> <dbl [2]> 19:28:51 20:28:30 12473.
9 43457 <dbl [2]> <dbl [2]> 20:58:30 21:23:30 6345.
10 43458 <dbl [2]> <dbl [2]> 22:09:30 22:40:30 12850.
根据下面的评论,以下语法不会强制更改原始 lat/long 列
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))),
.names = "{.col}_1")) %>%
mutate(length = map2_dbl(start_location_1, end_location_1, ~distGeo(.x, .y))) %>%
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
unnest_wider(start_location, names_sep = "_") %>%
unnest_wider(end_location, names_sep = "_")
# A tibble: 10 x 8
trip_id start_location_Long start_location_Lat end_location_Long end_location_Lat start_time end_time length
<int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
1 1 101. 13.7 101. 13.7 00:05:24 00:41:14 9046.
2 2 101. 13.7 101. 13.7 03:49:54 03:57:52 1248.
3 3 101. 13.6 101. 13.6 04:14:52 04:53:52 7.86
4 4 101. 13.6 101. 13.6 05:01:52 05:36:52 4301.
5 5 101. 13.6 101. 13.6 05:57:11 06:15:52 5412.
6 6 101. 13.6 101. 13.6 06:21:52 06:33:52 3457.
7 7 101. 13.6 101. 13.6 06:43:32 06:58:52 757.
8 43456 101. 13.9 101. 14.0 19:28:51 20:28:30 12473.
9 43457 101. 14.0 101. 14.0 20:58:30 21:23:30 6345.
10 43458 101. 13.8 101. 13.9 22:09:30 22:40:30 12850.
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
~map_chr(.x, ~ paste(.x[1], .x[2], sep = ", "))))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time length
<int> <chr> <chr> <chr> <chr> <dbl>
1 1 100.63453, 13.6753 100.71631, 13.65828 00:05:24 00:41:14 9046.
2 2 100.71868, 13.66348 100.71571, 13.65258 03:49:54 03:57:52 1248.
3 3 100.71102, 13.63345 100.71096, 13.63349 04:14:52 04:53:52 7.86
4 4 100.70172, 13.59653 100.71101, 13.63433 05:01:52 05:36:52 4301.
5 5 100.79453, 13.57542 100.74922, 13.59612 05:57:11 06:15:52 5412.
6 6 100.71091, 13.60123 100.71297, 13.63241 06:21:52 06:33:52 3457.
7 7 100.70617, 13.60388 100.71292, 13.60567 06:43:32 06:58:52 757.
8 43456 100.735, 13.94582 100.62037, 13.95905 19:28:51 20:28:30 12473.
9 43457 100.66908, 14.01229 100.61631, 13.98712 20:58:30 21:23:30 6345.
10 43458 100.70217, 13.79245 100.66788, 13.90366 22:09:30 22:40:30 12850.
我正在寻找起点和终点之间的长度。 那么,我应该应用哪些包和功能。
This is what the sample looks like:
# A tibble: 10 x 5
trip_id start_location end_location start_time end_time
<int> <chr> <chr> <chr> <chr>
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30
my.df <- structure(list(trip_id = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 43456L,43457L, 43458L), start_location = c("13.6753,100.63453", "13.66348,100.71868","13.63345,100.71102", "13.59653,100.70172", "13.57542,100.79453","13.60123,100.71091", "13.60388,100.70617", "13.94582,100.735","14.01229,100.66908", "13.79245,100.70217"), end_location = c("13.65828,100.71631","13.65258,100.71571", "13.63349,100.71096", "13.63433,100.71101","13.59612,100.74922", "13.63241,100.71297", "13.60567,100.71292","13.95905,100.62037", "13.98712,100.61631", "13.90366,100.66788"), start_time = c("00:05:24", "03:49:54", "04:14:52", "05:01:52","05:57:11", "06:21:52", "06:43:32", "19:28:51", "20:58:30", "22:09:30"), end_time = c("00:41:14", "03:57:52", "04:53:52", "05:36:52","06:15:52", "06:33:52", "06:58:52", "20:28:30", "21:23:30", "22:40:30")), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"))
我的问题是如何计算 start_location 和 end_location 列之间的长度经纬坐标。
trip_id start_location end_location start_time end_time length (in meters)
1 1 13.6753,100.63453 13.65828,100.71631 00:05:24 00:41:14 120
2 2 13.66348,100.71868 13.65258,100.71571 03:49:54 03:57:52 500
3 3 13.63345,100.71102 13.63349,100.71096 04:14:52 04:53:52 480
4 4 13.59653,100.70172 13.63433,100.71101 05:01:52 05:36:52 7000
5 5 13.57542,100.79453 13.59612,100.74922 05:57:11 06:15:52 1563
6 6 13.60123,100.71091 13.63241,100.71297 06:21:52 06:33:52 7892
7 7 13.60388,100.70617 13.60567,100.71292 06:43:32 06:58:52 200
8 43456 13.94582,100.735 13.95905,100.62037 19:28:51 20:28:30 5863
9 43457 14.01229,100.66908 13.98712,100.61631 20:58:30 21:23:30 1478
10 43458 13.79245,100.70217 13.90366,100.66788 22:09:30 22:40:30 2348
geosphere 包可以根据经纬度计算距离。
#separate the latitude and longitude into separate columns
my.df<- separate(my.df, start_location, into=c("start_lat", "start_long"), sep="[,]")
my.df<- separate(my.df, end_location, into=c("end_lat", "end_long"), sep="[,]")
#calculate distances
my.df$length <- distGeo(my.df[ ,c("start_long", "start_lat")], my.df[ ,c("end_long", "end_lat")])
# A tibble: 10 x 8
trip_id start_lat start_long end_lat end_long start_time end_time length
<int> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 13.6753 100.63453 13.65828 100.71631 00:05:24 00:41:14 9046.
2 2 13.66348 100.71868 13.65258 100.71571 03:49:54 03:57:52 1248.
3 3 13.63345 100.71102 13.63349 100.71096 04:14:52 04:53:52 7.86
4 4 13.59653 100.70172 13.63433 100.71101 05:01:52 05:36:52 4301.
5 5 13.57542 100.79453 13.59612 100.74922 05:57:11 06:15:52 5412.
6 6 13.60123 100.71091 13.63241 100.71297 06:21:52 06:33:52 3457.
7 7 13.60388 100.70617 13.60567 100.71292 06:43:32 06:58:52 757.
8 43456 13.94582 100.735 13.95905 100.62037 19:28:51 20:28:30 12473.
9 43457 14.01229 100.66908 13.98712 100.61631 20:58:30 21:23:30 6345.
10 43458 13.79245 100.70217 13.90366 100.66788 22:09:30 22:40:30 12850.
这样做。其实你的坐标向量应该在计算地理距离之前反转,因为 geosphere::disGeo()
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y)))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time len
<int> <list> <list> <chr> <chr> <dbl>
1 1 <dbl [2]> <dbl [2]> 00:05:24 00:41:14 9046.
2 2 <dbl [2]> <dbl [2]> 03:49:54 03:57:52 1248.
3 3 <dbl [2]> <dbl [2]> 04:14:52 04:53:52 7.86
4 4 <dbl [2]> <dbl [2]> 05:01:52 05:36:52 4301.
5 5 <dbl [2]> <dbl [2]> 05:57:11 06:15:52 5412.
6 6 <dbl [2]> <dbl [2]> 06:21:52 06:33:52 3457.
7 7 <dbl [2]> <dbl [2]> 06:43:32 06:58:52 757.
8 43456 <dbl [2]> <dbl [2]> 19:28:51 20:28:30 12473.
9 43457 <dbl [2]> <dbl [2]> 20:58:30 21:23:30 6345.
10 43458 <dbl [2]> <dbl [2]> 22:09:30 22:40:30 12850.
根据下面的评论,以下语法不会强制更改原始 lat/long 列
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(as.numeric(.x[2]), as.numeric(.x[1]))),
.names = "{.col}_1")) %>%
mutate(length = map2_dbl(start_location_1, end_location_1, ~distGeo(.x, .y))) %>%
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
unnest_wider(start_location, names_sep = "_") %>%
unnest_wider(end_location, names_sep = "_")
# A tibble: 10 x 8
trip_id start_location_Long start_location_Lat end_location_Long end_location_Lat start_time end_time length
<int> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
1 1 101. 13.7 101. 13.7 00:05:24 00:41:14 9046.
2 2 101. 13.7 101. 13.7 03:49:54 03:57:52 1248.
3 3 101. 13.6 101. 13.6 04:14:52 04:53:52 7.86
4 4 101. 13.6 101. 13.6 05:01:52 05:36:52 4301.
5 5 101. 13.6 101. 13.6 05:57:11 06:15:52 5412.
6 6 101. 13.6 101. 13.6 06:21:52 06:33:52 3457.
7 7 101. 13.6 101. 13.6 06:43:32 06:58:52 757.
8 43456 101. 13.9 101. 14.0 19:28:51 20:28:30 12473.
9 43457 101. 14.0 101. 14.0 20:58:30 21:23:30 6345.
10 43458 101. 13.8 101. 13.9 22:09:30 22:40:30 12850.
my.df %>% mutate(across(ends_with("location"),
~map(str_split(.x, ","),
~ c(Long = as.numeric(.x[2]), Lat = as.numeric(.x[1]))))) %>%
mutate(length = map2_dbl(start_location, end_location, ~distGeo(.x, .y))) %>%
~map_chr(.x, ~ paste(.x[1], .x[2], sep = ", "))))
# A tibble: 10 x 6
trip_id start_location end_location start_time end_time length
<int> <chr> <chr> <chr> <chr> <dbl>
1 1 100.63453, 13.6753 100.71631, 13.65828 00:05:24 00:41:14 9046.
2 2 100.71868, 13.66348 100.71571, 13.65258 03:49:54 03:57:52 1248.
3 3 100.71102, 13.63345 100.71096, 13.63349 04:14:52 04:53:52 7.86
4 4 100.70172, 13.59653 100.71101, 13.63433 05:01:52 05:36:52 4301.
5 5 100.79453, 13.57542 100.74922, 13.59612 05:57:11 06:15:52 5412.
6 6 100.71091, 13.60123 100.71297, 13.63241 06:21:52 06:33:52 3457.
7 7 100.70617, 13.60388 100.71292, 13.60567 06:43:32 06:58:52 757.
8 43456 100.735, 13.94582 100.62037, 13.95905 19:28:51 20:28:30 12473.
9 43457 100.66908, 14.01229 100.61631, 13.98712 20:58:30 21:23:30 6345.
10 43458 100.70217, 13.79245 100.66788, 13.90366 22:09:30 22:40:30 12850.