如何使用 KML 多边形过滤坐标数据框?
How to filter a dataframe of geocoordinates using a KML polygon?
我有一个覆盖整个地球 (the US Geological Service's earthquake feed) 的数据点 CSV,我只想过滤美国境内的地震。
我从 U.S 中提取的 KML 文件。人口普查局:
https://www.census.gov/geo/maps-data/data/kml/kml_nation.html
在 R 中,rgdal
库可以加载 KML 文件:
library(rgdal)
kml = readOGR("kmls/cb_2014_us_nation_20m.kml", 'cb_2014_us_nation_20m')
如何使用 dplyr
/plyr
/等。过滤 data.frame(地理数据的列为 latitude
和 longitude
)仅针对位于 KML 指定边界内的行?
编辑,post-答案:
这是我在 中用来快速可视化的内容:
library(sp)
library(rgdal)
# download earthquakes
url <- "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv"
fil <- "all_week.csv"
if (!file.exists(fil)) download.file(url, fil)
quakes <- read.csv("all_week.csv", stringsAsFactors=FALSE)
# create spatial object
sp::coordinates(quakes) <- ~longitude+latitude
# download nation KML
url <- "http://www2.census.gov/geo/tiger/GENZ2014/kml/cb_2014_us_nation_20m.zip"
fil <- "uskml.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="uskml")
# read KML file
us <- rgdal::readOGR("./uskml/cb_2014_us_nation_20m.kml", "cb_2014_us_nation_20m")
sp::proj4string(quakes) <- sp::proj4string(us)
length(quakes)
# 1514
usquakes = quakes[us,]
length(usquakes)
# 1260
### visualize
plot(us)
# plot all quakes
points(quakes$longitude, quakes$latitude)
# plot just US
points(usquakes$longitude, usquakes$latitude)
结果图像:
谢谢@hrbrmstr!
这将以 cpl 方式进行:
library(sp)
library(maptools)
library(rgeos) # not entirely necessary
library(rgdal) # not entirely necessary
url <- "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv"
fil <- "all_week.csv"
if (!file.exists(fil)) download.file(url, fil)
quakes <- read.csv("all_week.csv", stringsAsFactors=FALSE)
coordinates(quakes) <- ~longitude+latitude
url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_nation_20m.zip"
fil <- "us.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="us")
us <- readShapePoly("us/cb_2014_us_nation_20m.shp")
# alternatively
# us <- rgdal::readOGR("us/cbcb_2014_us_nation_20m.shp", "cb_2014_us_nation_20m")
# TRUE if in
in_us_rgeos <- rgeos::gIntersects(quakes, us, byid=TRUE)
# <NA> if in
in_us_over <- quakes %over% us
gIntersects
需要更长的时间。 rgdal
和 rgeos
可以 在某些系统上工作。 YMMV
使用美国 KML 需要(大部分)rgdal
:
# you wanted KML shapefile tho
url <- "http://www2.census.gov/geo/tiger/GENZ2014/kml/cb_2014_us_nation_20m.zip"
fil <- "uskml.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="uskml")
us <- rgdal::readOGR("uskml/cb_2014_us_nation_20m.kml", "cb_2014_us_nation_20m")
proj4string(quakes) <- proj4string(us)
rgeos::gIntersects(quakes, us, byid=TRUE)
head(quakes %over% us)
我有一个覆盖整个地球 (the US Geological Service's earthquake feed) 的数据点 CSV,我只想过滤美国境内的地震。
我从 U.S 中提取的 KML 文件。人口普查局:
https://www.census.gov/geo/maps-data/data/kml/kml_nation.html
在 R 中,rgdal
库可以加载 KML 文件:
library(rgdal)
kml = readOGR("kmls/cb_2014_us_nation_20m.kml", 'cb_2014_us_nation_20m')
如何使用 dplyr
/plyr
/等。过滤 data.frame(地理数据的列为 latitude
和 longitude
)仅针对位于 KML 指定边界内的行?
编辑,post-答案:
这是我在
library(sp)
library(rgdal)
# download earthquakes
url <- "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv"
fil <- "all_week.csv"
if (!file.exists(fil)) download.file(url, fil)
quakes <- read.csv("all_week.csv", stringsAsFactors=FALSE)
# create spatial object
sp::coordinates(quakes) <- ~longitude+latitude
# download nation KML
url <- "http://www2.census.gov/geo/tiger/GENZ2014/kml/cb_2014_us_nation_20m.zip"
fil <- "uskml.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="uskml")
# read KML file
us <- rgdal::readOGR("./uskml/cb_2014_us_nation_20m.kml", "cb_2014_us_nation_20m")
sp::proj4string(quakes) <- sp::proj4string(us)
length(quakes)
# 1514
usquakes = quakes[us,]
length(usquakes)
# 1260
### visualize
plot(us)
# plot all quakes
points(quakes$longitude, quakes$latitude)
# plot just US
points(usquakes$longitude, usquakes$latitude)
结果图像:
谢谢@hrbrmstr!
这将以 cpl 方式进行:
library(sp)
library(maptools)
library(rgeos) # not entirely necessary
library(rgdal) # not entirely necessary
url <- "http://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.csv"
fil <- "all_week.csv"
if (!file.exists(fil)) download.file(url, fil)
quakes <- read.csv("all_week.csv", stringsAsFactors=FALSE)
coordinates(quakes) <- ~longitude+latitude
url <- "http://www2.census.gov/geo/tiger/GENZ2014/shp/cb_2014_us_nation_20m.zip"
fil <- "us.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="us")
us <- readShapePoly("us/cb_2014_us_nation_20m.shp")
# alternatively
# us <- rgdal::readOGR("us/cbcb_2014_us_nation_20m.shp", "cb_2014_us_nation_20m")
# TRUE if in
in_us_rgeos <- rgeos::gIntersects(quakes, us, byid=TRUE)
# <NA> if in
in_us_over <- quakes %over% us
gIntersects
需要更长的时间。 rgdal
和 rgeos
可以 在某些系统上工作。 YMMV
使用美国 KML 需要(大部分)rgdal
:
# you wanted KML shapefile tho
url <- "http://www2.census.gov/geo/tiger/GENZ2014/kml/cb_2014_us_nation_20m.zip"
fil <- "uskml.zip"
if (!file.exists(fil)) download.file(url, fil)
unzip(fil, exdir="uskml")
us <- rgdal::readOGR("uskml/cb_2014_us_nation_20m.kml", "cb_2014_us_nation_20m")
proj4string(quakes) <- proj4string(us)
rgeos::gIntersects(quakes, us, byid=TRUE)
head(quakes %over% us)