从坐标中提取地址分量
Extract address components from coordiantes
我正在尝试使用 R 进行反向地理编码。我第一次使用 ggmap 但无法使用我的 API 密钥。现在我正在尝试使用 googleway。
newframe[,c("Front.lat","Front.long")]
Front.lat Front.long
1 -37.82681 144.9592
2 -37.82681 145.9592
newframe$address <- apply(newframe, 1, function(x){
google_reverse_geocode(location = as.numeric(c(x["Front.lat"],
x["Front.long"])),
key = "xxxx")
})
这将变量提取为列表,但我无法弄清楚结构。
我正在努力弄清楚如何提取下面列出的地址组件作为 newframe 中的变量
postal_code
、administrative_area_level_1
、administrative_area_level_2
、locality
、route
、street_number
我希望每个地址组件都作为一个单独的变量。
您可以轻松使用ggmap:revgeocode
;看下面:
library(ggmap)
df <- cbind(df,do.call(rbind,
lapply(1:nrow(df),
function(i)
revgeocode(as.numeric(
df[i,2:1]), output = "more")
[c("administrative_area_level_1","locality","postal_code","address")])))
#output:
df
# Front.lat Front.long administrative_area_level_1 locality
# 1 -37.82681 144.9592 Victoria Southbank
# 2 -37.82681 145.9592 Victoria Noojee
# postal_code address
# 1 3006 45 Clarke St, Southbank VIC 3006, Australia
# 2 3833 Cec Dunns Track, Noojee VIC 3833, Australia
您可以将 "route"
和 "street_number"
添加到要提取的变量中,但正如您所见,第二个地址没有街道号码,这将导致错误。
注意:您也可以使用sub
从地址中提取信息。
数据:
df <- structure(list(Front.lat = c(-37.82681, -37.82681), Front.long =
c(144.9592, 145.9592)), .Names = c("Front.lat", "Front.long"), class = "data.frame",
row.names = c(NA, -2L))
Google 的 API returns JSON 中的响应。其中,当翻译成 R 自然形成嵌套列表。在 googleway
内部,这是通过 jsonlite::fromJSON()
完成的
在 googleway
中,我通过使用 simplify
参数让您选择返回原始 JSON 或列表。
我特意从 Google 的响应中返回了所有数据,并留给用户通过通常的列表子集操作提取他们感兴趣的元素。
话虽如此,在 googleway 的开发版本中,我编写了一些函数来帮助访问各种 API 调用的元素。以下是其中的三个,可能对你有用
## Install the development version
# devtools::install_github("SymbolixAU/googleway")
res <- google_reverse_geocode(
location = c(df[1, 'Front.lat'], df[1, 'Front.long']),
key = apiKey
)
geocode_address(res)
# [1] "45 Clarke St, Southbank VIC 3006, Australia"
# [2] "Bank Apartments, 275-283 City Rd, Southbank VIC 3006, Australia"
# [3] "Southbank VIC 3006, Australia"
# [4] "Melbourne VIC, Australia"
# [5] "South Wharf VIC 3006, Australia"
# [6] "Melbourne, VIC, Australia"
# [7] "CBD & South Melbourne, VIC, Australia"
# [8] "Melbourne Metropolitan Area, VIC, Australia"
# [9] "Victoria, Australia"
# [10] "Australia"
geocode_address_components(res)
# long_name short_name types
# 1 45 45 street_number
# 2 Clarke Street Clarke St route
# 3 Southbank Southbank locality, political
# 4 Melbourne City Melbourne administrative_area_level_2, political
# 5 Victoria VIC administrative_area_level_1, political
# 6 Australia AU country, political
# 7 3006 3006 postal_code
geocode_type(res)
# [[1]]
# [1] "street_address"
#
# [[2]]
# [1] "establishment" "general_contractor" "point_of_interest"
#
# [[3]]
# [1] "locality" "political"
#
# [[4]]
# [1] "colloquial_area" "locality" "political"
在对 newframe$address 进行反向地理编码后,地址组件可以进一步提取如下:
# Make a boolean array of the valid ("OK" status) responses (other statuses may be "NO_RESULTS", "REQUEST_DENIED" etc).
sel <- sapply(c(1: nrow(newframe)), function(x){
newframe$address[[x]]$status == 'OK'
})
# Get the address_components of the first result (i.e. best match) returned per geocoded coordinate.
address.components <- sapply(c(1: nrow(newframe[sel,])), function(x){
newframe$address[[x]]$results[1,]$address_components
})
# Get all possible component types.
all.types <- unique(unlist(sapply(c(1: length(address.components)), function(x){
unlist(lapply(address.components[[x]]$types, function(l) l[[1]]))
})))
# Get "long_name" values of the address_components for each type present (the other option is "short_name").
all.values <- lapply(c(1: length(address.components)), function(x){
types <- unlist(lapply(address.components[[x]]$types, function(l) l[[1]]))
matches <- match(all.types, types)
values <- address.components[[x]]$long_name[matches]
})
# Bind results into a dataframe.
all.values <- do.call("rbind", all.values)
all.values <- as.data.frame(all.values)
names(all.values) <- all.types
# Add columns and update original data frame.
newframe[, all.types] <- NA
newframe[sel,][, all.types] <- all.values
请注意,我只保留了每个组件给出的第一个类型,有效地跳过了 "political" 类型,因为它出现在多个组件中并且可能是多余的,例如"administrative_area_level_1, political".
我正在尝试使用 R 进行反向地理编码。我第一次使用 ggmap 但无法使用我的 API 密钥。现在我正在尝试使用 googleway。
newframe[,c("Front.lat","Front.long")]
Front.lat Front.long
1 -37.82681 144.9592
2 -37.82681 145.9592
newframe$address <- apply(newframe, 1, function(x){
google_reverse_geocode(location = as.numeric(c(x["Front.lat"],
x["Front.long"])),
key = "xxxx")
})
这将变量提取为列表,但我无法弄清楚结构。
我正在努力弄清楚如何提取下面列出的地址组件作为 newframe 中的变量
postal_code
、administrative_area_level_1
、administrative_area_level_2
、locality
、route
、street_number
我希望每个地址组件都作为一个单独的变量。
您可以轻松使用ggmap:revgeocode
;看下面:
library(ggmap)
df <- cbind(df,do.call(rbind,
lapply(1:nrow(df),
function(i)
revgeocode(as.numeric(
df[i,2:1]), output = "more")
[c("administrative_area_level_1","locality","postal_code","address")])))
#output:
df
# Front.lat Front.long administrative_area_level_1 locality
# 1 -37.82681 144.9592 Victoria Southbank
# 2 -37.82681 145.9592 Victoria Noojee
# postal_code address
# 1 3006 45 Clarke St, Southbank VIC 3006, Australia
# 2 3833 Cec Dunns Track, Noojee VIC 3833, Australia
您可以将 "route"
和 "street_number"
添加到要提取的变量中,但正如您所见,第二个地址没有街道号码,这将导致错误。
注意:您也可以使用sub
从地址中提取信息。
数据:
df <- structure(list(Front.lat = c(-37.82681, -37.82681), Front.long =
c(144.9592, 145.9592)), .Names = c("Front.lat", "Front.long"), class = "data.frame",
row.names = c(NA, -2L))
Google 的 API returns JSON 中的响应。其中,当翻译成 R 自然形成嵌套列表。在 googleway
内部,这是通过 jsonlite::fromJSON()
在 googleway
中,我通过使用 simplify
参数让您选择返回原始 JSON 或列表。
我特意从 Google 的响应中返回了所有数据,并留给用户通过通常的列表子集操作提取他们感兴趣的元素。
话虽如此,在 googleway 的开发版本中,我编写了一些函数来帮助访问各种 API 调用的元素。以下是其中的三个,可能对你有用
## Install the development version
# devtools::install_github("SymbolixAU/googleway")
res <- google_reverse_geocode(
location = c(df[1, 'Front.lat'], df[1, 'Front.long']),
key = apiKey
)
geocode_address(res)
# [1] "45 Clarke St, Southbank VIC 3006, Australia"
# [2] "Bank Apartments, 275-283 City Rd, Southbank VIC 3006, Australia"
# [3] "Southbank VIC 3006, Australia"
# [4] "Melbourne VIC, Australia"
# [5] "South Wharf VIC 3006, Australia"
# [6] "Melbourne, VIC, Australia"
# [7] "CBD & South Melbourne, VIC, Australia"
# [8] "Melbourne Metropolitan Area, VIC, Australia"
# [9] "Victoria, Australia"
# [10] "Australia"
geocode_address_components(res)
# long_name short_name types
# 1 45 45 street_number
# 2 Clarke Street Clarke St route
# 3 Southbank Southbank locality, political
# 4 Melbourne City Melbourne administrative_area_level_2, political
# 5 Victoria VIC administrative_area_level_1, political
# 6 Australia AU country, political
# 7 3006 3006 postal_code
geocode_type(res)
# [[1]]
# [1] "street_address"
#
# [[2]]
# [1] "establishment" "general_contractor" "point_of_interest"
#
# [[3]]
# [1] "locality" "political"
#
# [[4]]
# [1] "colloquial_area" "locality" "political"
在对 newframe$address 进行反向地理编码后,地址组件可以进一步提取如下:
# Make a boolean array of the valid ("OK" status) responses (other statuses may be "NO_RESULTS", "REQUEST_DENIED" etc).
sel <- sapply(c(1: nrow(newframe)), function(x){
newframe$address[[x]]$status == 'OK'
})
# Get the address_components of the first result (i.e. best match) returned per geocoded coordinate.
address.components <- sapply(c(1: nrow(newframe[sel,])), function(x){
newframe$address[[x]]$results[1,]$address_components
})
# Get all possible component types.
all.types <- unique(unlist(sapply(c(1: length(address.components)), function(x){
unlist(lapply(address.components[[x]]$types, function(l) l[[1]]))
})))
# Get "long_name" values of the address_components for each type present (the other option is "short_name").
all.values <- lapply(c(1: length(address.components)), function(x){
types <- unlist(lapply(address.components[[x]]$types, function(l) l[[1]]))
matches <- match(all.types, types)
values <- address.components[[x]]$long_name[matches]
})
# Bind results into a dataframe.
all.values <- do.call("rbind", all.values)
all.values <- as.data.frame(all.values)
names(all.values) <- all.types
# Add columns and update original data frame.
newframe[, all.types] <- NA
newframe[sel,][, all.types] <- all.values
请注意,我只保留了每个组件给出的第一个类型,有效地跳过了 "political" 类型,因为它出现在多个组件中并且可能是多余的,例如"administrative_area_level_1, political".