如何根据 shapefile 将 latitude/longitude 数据分组到不同的组中?
How to group latitude/longitude data into different groups based on a shapefile?
原始问题:我有一个数据集,其中每一行的纬度和经度都在纽约范围内。现在我需要将每一行分组到纽约的一个邮政编码中。我有包含 https://gis.ny.gov/gisdata/inventories/details.cfm?DSID=934.
中所有可用边界的 shapefile
添加纬度、经度的样本数据http://pastebin.com/mXntxhK2
这里有一个方法:
library(raster)
library(rgeos)
# example data
filename <- system.file("external/lux.shp", package="raster")
zip <- shapefile(filename)
set.seed(0)
xy <- coordinates(spsample(zip, 10, 'random'))
plot(zip, col='gray')
points(xy, pch=20, col='red', cex=2)
#
extract(zip, xy)
你也可以使用sp::over
over
/ %over%
正如@RobertH 建议的那样工作得很好。
library(sp)
library(raster)
library(rgdal)
library(dplyr)
# get the shapefile without wasting bandwidth
URL <- "http://gis.ny.gov/gisdata/data/ds_934/zip_codes_shp.zip"
fil <- "nyzips.zip"
if (!file.exists(fil)) download.file(URL, fil)
shp <- grep("shp$", unzip(fil), value=TRUE)
ny <- readOGR(shp[2], ogrListLayers(shp[2])[1], stringsAsFactors=FALSE)
# you didn't give us data so we have to create some by random sampling
# within the bounding box
ny_area <- as(extent(bbox(ny)), "SpatialPolygons")
set.seed(1492) # reproducible
pts <- spsample(ny_area, 3000, "random")
proj4string(pts) <- proj4string(ny)
# this does the lon/lat to zip mapping
zip_where <- pts %over% ny
# since we fabricated data, not all will be in a zip code since
# ny isn't a rectangle, so we remove the "bad" data
zip_where <- zip_where[complete.cases(zip_where),]
arrange(count(zip_where, POSTAL), desc(n))
## Source: local data frame [602 x 2]
##
## POSTAL n
## (chr) (int)
## 1 12847 16
## 2 12980 14
## 3 13367 14
## 4 13625 10
## 5 12843 9
## 6 12986 9
## 7 12134 8
## 8 12852 7
## 9 13324 7
## 10 13331 7
## .. ... ...
由于您提供了坐标示例,下面介绍了如何读取它们并将它们转换为 NY shapefile 的投影以便进行聚合:
pts <- read.csv("http://pastebin.com/raw.php?i=mXntxhK2", na.strings="null")
pts <- pts[complete.cases(pts),]
coordinates(pts) <- ~longitude+latitude
proj4string(pts) <- CRS("+proj=longlat +datum=WGS84")
pts <- spTransform(pts, proj4string(ny))
# this does the lon/lat to zip mapping
zip_where <- pts %over% ny
# but since we fabricated data, not all will be in a zip code since
# ny isn't a rectangle, so we remove the "bad" data
zip_where <- zip_where[complete.cases(zip_where),]
arrange(count(zip_where, POSTAL), desc(n))
## Source: local data frame [158 x 2]
##
## POSTAL n
## (chr) (int)
## 1 11238 28
## 2 11208 25
## 3 11230 20
## 4 10027 19
## 5 11229 17
## 6 11219 16
## 7 11385 16
## 8 11206 15
## 9 11211 15
## 10 11214 14
## .. ... ...
原始问题:我有一个数据集,其中每一行的纬度和经度都在纽约范围内。现在我需要将每一行分组到纽约的一个邮政编码中。我有包含 https://gis.ny.gov/gisdata/inventories/details.cfm?DSID=934.
中所有可用边界的 shapefile添加纬度、经度的样本数据http://pastebin.com/mXntxhK2
这里有一个方法:
library(raster)
library(rgeos)
# example data
filename <- system.file("external/lux.shp", package="raster")
zip <- shapefile(filename)
set.seed(0)
xy <- coordinates(spsample(zip, 10, 'random'))
plot(zip, col='gray')
points(xy, pch=20, col='red', cex=2)
#
extract(zip, xy)
你也可以使用sp::over
over
/ %over%
正如@RobertH 建议的那样工作得很好。
library(sp)
library(raster)
library(rgdal)
library(dplyr)
# get the shapefile without wasting bandwidth
URL <- "http://gis.ny.gov/gisdata/data/ds_934/zip_codes_shp.zip"
fil <- "nyzips.zip"
if (!file.exists(fil)) download.file(URL, fil)
shp <- grep("shp$", unzip(fil), value=TRUE)
ny <- readOGR(shp[2], ogrListLayers(shp[2])[1], stringsAsFactors=FALSE)
# you didn't give us data so we have to create some by random sampling
# within the bounding box
ny_area <- as(extent(bbox(ny)), "SpatialPolygons")
set.seed(1492) # reproducible
pts <- spsample(ny_area, 3000, "random")
proj4string(pts) <- proj4string(ny)
# this does the lon/lat to zip mapping
zip_where <- pts %over% ny
# since we fabricated data, not all will be in a zip code since
# ny isn't a rectangle, so we remove the "bad" data
zip_where <- zip_where[complete.cases(zip_where),]
arrange(count(zip_where, POSTAL), desc(n))
## Source: local data frame [602 x 2]
##
## POSTAL n
## (chr) (int)
## 1 12847 16
## 2 12980 14
## 3 13367 14
## 4 13625 10
## 5 12843 9
## 6 12986 9
## 7 12134 8
## 8 12852 7
## 9 13324 7
## 10 13331 7
## .. ... ...
由于您提供了坐标示例,下面介绍了如何读取它们并将它们转换为 NY shapefile 的投影以便进行聚合:
pts <- read.csv("http://pastebin.com/raw.php?i=mXntxhK2", na.strings="null")
pts <- pts[complete.cases(pts),]
coordinates(pts) <- ~longitude+latitude
proj4string(pts) <- CRS("+proj=longlat +datum=WGS84")
pts <- spTransform(pts, proj4string(ny))
# this does the lon/lat to zip mapping
zip_where <- pts %over% ny
# but since we fabricated data, not all will be in a zip code since
# ny isn't a rectangle, so we remove the "bad" data
zip_where <- zip_where[complete.cases(zip_where),]
arrange(count(zip_where, POSTAL), desc(n))
## Source: local data frame [158 x 2]
##
## POSTAL n
## (chr) (int)
## 1 11238 28
## 2 11208 25
## 3 11230 20
## 4 10027 19
## 5 11229 17
## 6 11219 16
## 7 11385 16
## 8 11206 15
## 9 11211 15
## 10 11214 14
## .. ... ...