从纬度和经度检索国家坐标
Retrieving country coordinates from latitude and longitude
我正在尝试使用 RDSTK
包中的 coordinates2politics()
检索给定国家(纬度、经度)的名称:
library(dplyr)
library(plyr)
library(rjson)
library(RDSTK)
df2 <- df %>%
mutate(politics = coordinates2politics(place_lat, place_lon),
country = ldply(fromJSON(coordinates2politics(place_lat, place_lon)),
data.frame)[["politics.name"]])
在这里,coordinates2politics(latitude, longitude)
returns 一个 JSON 字符串,我将其转换为数据帧以提取 politics.name
在 df2
中,我得到 politics
的正确值(这是整个 JSON 字符串)但 country
的值错误
- 使用此方法(转换为数据帧),我如何检索
来自 JSON 字符串的元素?
- 是否有更有效的方法从 JSON 字符串中提取元素?
- 是否有更好的方法从给定的纬度/经度检索国家名称(除了使用
RDSTK
包)?
df1
> dput(head(df1, 10L))
structure(list(place_lat = c(-23.682803, 30.109684, 36.232855,
26.674996, 40.655138, 40.00134, 44.0752271, 32.230987, -9.5333295,
38.3045585), place_lon = c(-46.5955455, -93.767675, -115.223125,
-81.816602, -73.9487755, -74.1880345, -103.2334107, -90.1580165,
-35.6871125, -92.4367735), location = c("South West", "North West",
"North West", "North West", "North West", "North West", "North West",
"North West", "South West", "North West"), sentiment = c("positive",
"positive", "neutral", "positive", "neutral", "positive", "positive",
"neutral", "positive", "neutral"), id = 1:10), .Names = c("place_lat",
"place_lon", "location", "sentiment", "id"), row.names = c(NA,
10L), class = "data.frame")
df2
> dput(head(df2, n=2L))
structure(list(place_lat = c(-23.682803, 30.109684), place_lon = c(-46.5955455,
-93.767675), location = c("South West", "North West"), sentiment = c("positive",
"positive"), id = 1:2, politics = structure(c("[\n {\n \"politics\": [\n {\n \"type\": \"admin2\",\n \"friendly_type\": \"country\",\n \"name\": \"Brazil\",\n \"code\": \"bra\"\n },\n {\n \"type\": \"admin4\",\n \"friendly_type\": \"state\",\n \"name\": \"São Paulo\",\n \"code\": \"br32\"\n }\n ],\n \"location\": {\n \"latitude\": -23.682803,\n \"longitude\": -46.5955455\n }\n }\n]",
"[\n {\n \"politics\": [\n {\n \"type\": \"admin2\",\n \"friendly_type\": \"country\",\n \"name\": \"United States\",\n \"code\": \"usa\"\n },\n {\n \"type\": \"constituency\",\n \"friendly_type\": \"constituency\",\n \"name\": \"Eighth district, TX\",\n \"code\": \"48_08\"\n },\n {\n \"type\": \"admin6\",\n \"friendly_type\": \"county\",\n \"name\": \"Orange\",\n \"code\": \"48_361\"\n },\n {\n \"type\": \"admin4\",\n \"friendly_type\": \"state\",\n \"name\": \"Texas\",\n \"code\": \"us48\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"Orange\",\n \"code\": \"48_54132\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"Pinehurst\",\n \"code\": \"48_57608\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"\",\n \"code\": \"_\"\n }\n ],\n \"location\": {\n \"latitude\": 30.109684,\n \"longitude\": -93.767675\n }\n }\n]"
), .Names = c("http://www.datasciencetoolkit.org/coordinates2politics/-23.682803%2c-46.5955455",
"http://www.datasciencetoolkit.org/coordinates2politics/30.109684%2c-93.767675"
)), country = structure(c(1L, 1L), .Label = "Brazil", class = "factor")), .Names = c("place_lat",
"place_lon", "location", "sentiment", "id", "politics", "country"
), row.names = 1:2, class = "data.frame")
这是从 lat/lon 获取国家/地区名称的另一种方法(其中一种)。这不需要 API 调用服务器。 (将 GeoJSON 文件保存在本地以供 real/production 使用):
library(rgdal)
library(magrittr)
world <- readOGR("https://raw.githubusercontent.com/AshKyd/geojson-regions/master/data/source/ne_50m_admin_0_countries.geo.json", "OGRGeoJSON")
places %>%
select(place_lon, place_lat) %>%
coordinates %>%
SpatialPoints(CRS(proj4string(world))) %over% world %>%
select(iso_a2, name) %>%
cbind(places, .)
## place_lat place_lon location sentiment id iso_a2 name
## 1 -23.68280 -46.59555 South West positive 1 BR Brazil
## 2 30.10968 -93.76767 North West positive 2 US United States
## 3 36.23286 -115.22312 North West neutral 3 US United States
## 4 26.67500 -81.81660 North West positive 4 US United States
## 5 40.65514 -73.94878 North West neutral 5 US United States
## 6 40.00134 -74.18803 North West positive 6 US United States
## 7 44.07523 -103.23341 North West positive 7 US United States
## 8 32.23099 -90.15802 North West neutral 8 US United States
## 9 -9.53333 -35.68711 South West positive 9 BR Brazil
## 10 38.30456 -92.43677 North West neutral 10 US United States
您可以使用 gadm2 shapefile 获取更精细的位置数据,但它 巨大 并且需要一段时间(即使在我的系统上)加载:
# this takes _forever_
big_world <- readOGR("gadm2.shp", "gadm2")
# this part takes a while, too, so best save off temp results
big_res <- places %>%
select(place_lon, place_lat) %>%
coordinates %>%
SpatialPoints(CRS(proj4string(big_world))) %over% big_world
big_res %>%
select(iso_a2=ISO, name=NAME_0, name_1=NAME_1, name_2=NAME_2) %>%
cbind(places, .)
## place_lat place_lon location sentiment id iso_a2 name name_1 name_2
## 1 -23.68280 -46.59555 South West positive 1 BRA Brazil São Paulo Diadema
## 2 30.10968 -93.76767 North West positive 2 USA United States Texas Orange
## 3 36.23286 -115.22312 North West neutral 3 USA United States Nevada Clark
## 4 26.67500 -81.81660 North West positive 4 USA United States Florida Lee
## 5 40.65514 -73.94878 North West neutral 5 USA United States New York Kings
## 6 40.00134 -74.18803 North West positive 6 USA United States New Jersey Ocean
## 7 44.07523 -103.23341 North West positive 7 USA United States South Dakota Pennington
## 8 32.23099 -90.15802 North West neutral 8 USA United States Mississippi Rankin
## 9 -9.53333 -35.68711 South West positive 9 BRA Brazil Alagoas Maceió (capital)
## 10 38.30456 -92.43677 North West neutral 10 USA United States Missouri Miller
如果您可以使用 geonames
包,您可以改为查询该服务。
> require(geonames)
> options(geonamesUsername="myusername")
您需要 GNCountryCode 函数的矢量化版本:
> vg = Vectorize(GNcountryCode)
然后dplyr:
> df1 %>% mutate(cc=unlist(vg(place_lat, place_lon)["countryCode",]))
place_lat place_lon location sentiment id cc
1 -23.68280 -46.59555 South West positive 1 BR
2 30.10968 -93.76767 North West positive 2 US
3 36.23286 -115.22312 North West neutral 3 US
4 26.67500 -81.81660 North West positive 4 US
5 40.65514 -73.94878 North West neutral 5 US
6 40.00134 -74.18803 North West positive 6 US
7 44.07523 -103.23341 North West positive 7 US
8 32.23099 -90.15802 North West neutral 8 US
9 -9.53333 -35.68711 South West positive 9 BR
10 38.30456 -92.43677 North West neutral 10 US
如果你想要名字,请使用 "countryName",但你会得到 "Federative Republic of Brazil",因为其他人称之为 "Brasil"(或 "Brazil")。
我正在尝试使用 RDSTK
包中的 coordinates2politics()
检索给定国家(纬度、经度)的名称:
library(dplyr)
library(plyr)
library(rjson)
library(RDSTK)
df2 <- df %>%
mutate(politics = coordinates2politics(place_lat, place_lon),
country = ldply(fromJSON(coordinates2politics(place_lat, place_lon)),
data.frame)[["politics.name"]])
在这里,coordinates2politics(latitude, longitude)
returns 一个 JSON 字符串,我将其转换为数据帧以提取 politics.name
在 df2
中,我得到 politics
的正确值(这是整个 JSON 字符串)但 country
- 使用此方法(转换为数据帧),我如何检索 来自 JSON 字符串的元素?
- 是否有更有效的方法从 JSON 字符串中提取元素?
- 是否有更好的方法从给定的纬度/经度检索国家名称(除了使用
RDSTK
包)?
df1
> dput(head(df1, 10L))
structure(list(place_lat = c(-23.682803, 30.109684, 36.232855,
26.674996, 40.655138, 40.00134, 44.0752271, 32.230987, -9.5333295,
38.3045585), place_lon = c(-46.5955455, -93.767675, -115.223125,
-81.816602, -73.9487755, -74.1880345, -103.2334107, -90.1580165,
-35.6871125, -92.4367735), location = c("South West", "North West",
"North West", "North West", "North West", "North West", "North West",
"North West", "South West", "North West"), sentiment = c("positive",
"positive", "neutral", "positive", "neutral", "positive", "positive",
"neutral", "positive", "neutral"), id = 1:10), .Names = c("place_lat",
"place_lon", "location", "sentiment", "id"), row.names = c(NA,
10L), class = "data.frame")
df2
> dput(head(df2, n=2L))
structure(list(place_lat = c(-23.682803, 30.109684), place_lon = c(-46.5955455,
-93.767675), location = c("South West", "North West"), sentiment = c("positive",
"positive"), id = 1:2, politics = structure(c("[\n {\n \"politics\": [\n {\n \"type\": \"admin2\",\n \"friendly_type\": \"country\",\n \"name\": \"Brazil\",\n \"code\": \"bra\"\n },\n {\n \"type\": \"admin4\",\n \"friendly_type\": \"state\",\n \"name\": \"São Paulo\",\n \"code\": \"br32\"\n }\n ],\n \"location\": {\n \"latitude\": -23.682803,\n \"longitude\": -46.5955455\n }\n }\n]",
"[\n {\n \"politics\": [\n {\n \"type\": \"admin2\",\n \"friendly_type\": \"country\",\n \"name\": \"United States\",\n \"code\": \"usa\"\n },\n {\n \"type\": \"constituency\",\n \"friendly_type\": \"constituency\",\n \"name\": \"Eighth district, TX\",\n \"code\": \"48_08\"\n },\n {\n \"type\": \"admin6\",\n \"friendly_type\": \"county\",\n \"name\": \"Orange\",\n \"code\": \"48_361\"\n },\n {\n \"type\": \"admin4\",\n \"friendly_type\": \"state\",\n \"name\": \"Texas\",\n \"code\": \"us48\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"Orange\",\n \"code\": \"48_54132\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"Pinehurst\",\n \"code\": \"48_57608\"\n },\n {\n \"type\": \"admin5\",\n \"friendly_type\": \"city\",\n \"name\": \"\",\n \"code\": \"_\"\n }\n ],\n \"location\": {\n \"latitude\": 30.109684,\n \"longitude\": -93.767675\n }\n }\n]"
), .Names = c("http://www.datasciencetoolkit.org/coordinates2politics/-23.682803%2c-46.5955455",
"http://www.datasciencetoolkit.org/coordinates2politics/30.109684%2c-93.767675"
)), country = structure(c(1L, 1L), .Label = "Brazil", class = "factor")), .Names = c("place_lat",
"place_lon", "location", "sentiment", "id", "politics", "country"
), row.names = 1:2, class = "data.frame")
这是从 lat/lon 获取国家/地区名称的另一种方法(其中一种)。这不需要 API 调用服务器。 (将 GeoJSON 文件保存在本地以供 real/production 使用):
library(rgdal)
library(magrittr)
world <- readOGR("https://raw.githubusercontent.com/AshKyd/geojson-regions/master/data/source/ne_50m_admin_0_countries.geo.json", "OGRGeoJSON")
places %>%
select(place_lon, place_lat) %>%
coordinates %>%
SpatialPoints(CRS(proj4string(world))) %over% world %>%
select(iso_a2, name) %>%
cbind(places, .)
## place_lat place_lon location sentiment id iso_a2 name
## 1 -23.68280 -46.59555 South West positive 1 BR Brazil
## 2 30.10968 -93.76767 North West positive 2 US United States
## 3 36.23286 -115.22312 North West neutral 3 US United States
## 4 26.67500 -81.81660 North West positive 4 US United States
## 5 40.65514 -73.94878 North West neutral 5 US United States
## 6 40.00134 -74.18803 North West positive 6 US United States
## 7 44.07523 -103.23341 North West positive 7 US United States
## 8 32.23099 -90.15802 North West neutral 8 US United States
## 9 -9.53333 -35.68711 South West positive 9 BR Brazil
## 10 38.30456 -92.43677 North West neutral 10 US United States
您可以使用 gadm2 shapefile 获取更精细的位置数据,但它 巨大 并且需要一段时间(即使在我的系统上)加载:
# this takes _forever_
big_world <- readOGR("gadm2.shp", "gadm2")
# this part takes a while, too, so best save off temp results
big_res <- places %>%
select(place_lon, place_lat) %>%
coordinates %>%
SpatialPoints(CRS(proj4string(big_world))) %over% big_world
big_res %>%
select(iso_a2=ISO, name=NAME_0, name_1=NAME_1, name_2=NAME_2) %>%
cbind(places, .)
## place_lat place_lon location sentiment id iso_a2 name name_1 name_2
## 1 -23.68280 -46.59555 South West positive 1 BRA Brazil São Paulo Diadema
## 2 30.10968 -93.76767 North West positive 2 USA United States Texas Orange
## 3 36.23286 -115.22312 North West neutral 3 USA United States Nevada Clark
## 4 26.67500 -81.81660 North West positive 4 USA United States Florida Lee
## 5 40.65514 -73.94878 North West neutral 5 USA United States New York Kings
## 6 40.00134 -74.18803 North West positive 6 USA United States New Jersey Ocean
## 7 44.07523 -103.23341 North West positive 7 USA United States South Dakota Pennington
## 8 32.23099 -90.15802 North West neutral 8 USA United States Mississippi Rankin
## 9 -9.53333 -35.68711 South West positive 9 BRA Brazil Alagoas Maceió (capital)
## 10 38.30456 -92.43677 North West neutral 10 USA United States Missouri Miller
如果您可以使用 geonames
包,您可以改为查询该服务。
> require(geonames)
> options(geonamesUsername="myusername")
您需要 GNCountryCode 函数的矢量化版本:
> vg = Vectorize(GNcountryCode)
然后dplyr:
> df1 %>% mutate(cc=unlist(vg(place_lat, place_lon)["countryCode",]))
place_lat place_lon location sentiment id cc
1 -23.68280 -46.59555 South West positive 1 BR
2 30.10968 -93.76767 North West positive 2 US
3 36.23286 -115.22312 North West neutral 3 US
4 26.67500 -81.81660 North West positive 4 US
5 40.65514 -73.94878 North West neutral 5 US
6 40.00134 -74.18803 North West positive 6 US
7 44.07523 -103.23341 North West positive 7 US
8 32.23099 -90.15802 North West neutral 8 US
9 -9.53333 -35.68711 South West positive 9 BR
10 38.30456 -92.43677 North West neutral 10 US
如果你想要名字,请使用 "countryName",但你会得到 "Federative Republic of Brazil",因为其他人称之为 "Brasil"(或 "Brazil")。