用 R 中的另一个变量填充 sp 地图对象的多边形

Question

我的目标是生成一个由另一个变量填充的洛杉矶邮政编码地图，比如使用 ggplot 的犯罪事件数量。由于sp对象中原本没有存储的填充变量，here建议的方法可能不能直接使用

首先，我有一个名为 ztca 的巨大数据集，它是一个 SpatialPolygonDataFrame 并包含所有多边形（其中一个图层称为 ZCTA5CE10，包含我处理的邮政编码） of USA from the US Census Bureau, primary sp object 的编写主要基于.

library(sp)
library(maptools)
library(zipcode)
data(zipcode) # load the zipcode as a data frame
library(ggmap)



# grab the zip code boundaries based on 2017 US Census Bureau
url <- "http://www2.census.gov/geo/tiger/GENZ2017/shp/cb_2017_us_zcta510_500k.zip"
fil <- "ztca.zip"

# If we have the file in the working directory, don't need to download again
if (!file.exists(fil)) { download.file(url, fil) }
unzip(fil, exdir="ztca")

# read them in (this takes a bit)
ztca <- readShapePoly("ztca/cb_2017_us_zcta510_500k.shp", verbose=TRUE)
class(ztca)

然后，我 select 感兴趣的一些邮政编码

zip.interest <- c(91306, 90008, 90014, 90094, 91326, 90212, 90001, 90006, 90301, 91401, 90095, 90021, 90089, 90057, 90029, 90502, 90036, 90745, 91330, 91411, 91601, 91607, 90302, 90073, 91340, 91602, 90071, 90039, 90011, 90248, 91303, 90020, 90732, 90405, 91342, 90077, 90061, 90015, 90002, 90041, 90211, 90059, 90066, 90210, 91343, 90062, 91606, 91403, 90063, 90012, 91307, 90293, 91402, 91364, 90007, 91331, 90005, 91040, 90031, 91324, 90043, 91316, 91352, 90069, 91325, 91344, 90019, 90056, 90032, 90291, 90027, 90247, 90232, 90013, 90042, 90035, 90024, 90026, 90034, 90016, 90018, 90004, 90067, 90010, 90025, 90047, 90037, 91605, 90049, 90744, 90028, 90272, 90275, 90731, 90710, 91042, 91345, 90003, 91335, 91304, 91311, 91214, 91604, 91608, 91405, 91367, 91406, 91436, 91504, 91423, 91505, 91356, 90292, 90402, 90065, 90501, 90230, 90048, 90046, 90038, 90044, 90810, 90023, 90068, 90045, 90717, 90064, 90058, 90033, 90017, 90090)

LA <- ztca[as.character(ztca$ZCTA5CE10) %in% as.character(zip.interest),]
class(LA)

# My data (want to be used as filling variable)
data <- matrix(c(90001, 90002, 90003, 90004, 90005, 90006, 90007, 90008, 90010, 90011, 90012, 90013, 90014, 90015, 90016, 90017, 90018, 90019, 90020, 90021, 90023, 90024, 90025, 90026, 90027, 90028, 90029, 90031, 90032, 90033, 90034, 90035, 90036, 90037, 90038, 90039, 90041, 90042, 90043, 90044, 90045, 90046, 90047, 90048, 90049, 90056, 90057, 90058, 90059, 90061, 90062, 90063, 90064, 90065, 90066, 90067, 90068, 90069, 90071, 90073, 90077, 90089, 90090, 90094, 90095, 90210, 90211, 90212, 90230, 90232, 90247, 90248, 90272, 90275, 90291, 90292, 90293, 90301, 90302, 90402, 90405, 90501, 90502, 90710, 90717, 90731, 90732, 90744, 90745, 90810, 91040, 91042, 91214, 91303, 91304, 91306, 91307, 91311, 91316, 91324, 91325, 91326, 91330, 91331, 91335, 91340, 91342, 91343, 91344, 91345, 91352, 91356, 91364, 91367, 91401, 91402, 91403, 91405, 91406, 91411, 91423, 91436, 91504, 91505, 91601, 91602, 91604, 91605, 91606, 91607, 91608, 7086, 20731, 45814, 24883, 15295, 23099, 22163, 24461, 6351, 40047, 16028, 22822, 9500, 22859, 23201, 19959, 21000, 25806, 12917, 11767, 12791, 9221, 14967, 26850, 21314, 36956, 14556, 12233, 12986, 19864, 17239, 10843, 17399, 35476, 13786, 10083, 9214, 18613, 22267, 46012, 26956, 14627, 24949, 10958, 10238,  294, 21961, 3037, 15441, 12432, 20470, 3673, 12165, 14869, 14516, 2299, 10013, 2458, 1862,  473, 1755, 3772, 691, 1418, 200, 1689, 813,  181, 3553, 912, 3807, 2693, 5017, 558, 18841, 5872, 3890, 573,  322, 592,  331, 5795, 1119, 7323,  667, 29879, 5318, 22950, 110, 144, 5919, 7465, 135, 17871, 14904, 16814, 6811, 14521, 9806, 17144, 9857, 7175,  647, 32723, 25985, 3182, 29040, 23947, 18500, 7194, 19031, 12226, 11075, 16189, 17455, 26382, 9818, 22774, 22124, 12000, 14122, 4965,  566,  522, 20601, 7204, 14051, 24116, 18162, 11174, 478),ncol=2)

colnames(data) <- c("ZCTA5CE10", "cnt" )
data <- as.data.frame(data)

LA@data <- cbind(LA@data, cnt=data$cnt)

# Suppose plot by
ggplot(data = LA, aes(x = long, y = lat, fill = cnt, group = group)) +
geom_polygon(colour = "black") +
coord_equal() + coord_fixed(1.3)

但是我得到了“为每个多边形定义的区域错误：美学必须是长度 1 或与数据相同 (6877)：x、y、填充、组。我想当我 select 那些多边形 and/or 添加 cnt.

感谢您的帮助和评论！！！

Answer 1

为此，您可能需要从 GitHub 安装 ggplot2。可能不是，但我一直在使用它的开发版本，不能真正降级这个例子。

切换到sf：

library(sf)
library(tidyverse)

sf::st_read(
  path.expand("~/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp"),
  stringsAsFactors = FALSE
) -> ztca
## Reading layer `cb_2017_us_zcta510_500k' from data source `/Users/hrbrmstr/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33144 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs

ztca
## Simple feature collection with 33144 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
##    ZCTA5CE10     AFFGEOID10 GEOID10    ALAND10  AWATER10                       geometry
## 1      35442 8600000US35442   35442  610213891  10838694 MULTIPOLYGON (((-88.25262 3...
## 2      85365 8600000US85365   85365 3545016067   9766486 MULTIPOLYGON (((-114.6847 3...
## 3      71973 8600000US71973   71973  204670474   1264709 MULTIPOLYGON (((-94.46643 3...
## 4      95445 8600000US95445   95445  221559097   7363179 MULTIPOLYGON (((-123.6431 3...
## 5      06870 8600000US06870   06870    5945321   3841130 MULTIPOLYGON (((-73.58766 4...
## 6      19964 8600000US19964   19964   24601672    124382 MULTIPOLYGON (((-75.74767 3...
## 7      32233 8600000US32233   32233   26427514   8163517 MULTIPOLYGON (((-81.45931 3...
## 8      90401 8600000US90401   90401    2199165    439378 MULTIPOLYGON (((-118.5028 3...
## 9      30817 8600000US30817   30817  498860155 105853904 MULTIPOLYGON (((-82.5951 33...
## 10     30458 8600000US30458   30458  378422575  12351325 MULTIPOLYGON (((-81.73032 3...

它应该读得更快。

现在我们需要您的数据。首先是拉链：

c(
  91306, 90008, 90014, 90094, 91326, 90212, 90001, 90006, 90301, 91401, 90095,
  90021, 90089, 90057, 90029, 90502, 90036, 90745, 91330, 91411, 91601, 91607,
  90302, 90073, 91340, 91602, 90071, 90039, 90011, 90248, 91303, 90020, 90732,
  90405, 91342, 90077, 90061, 90015, 90002, 90041, 90211, 90059, 90066, 90210,
  91343, 90062, 91606, 91403, 90063, 90012, 91307, 90293, 91402, 91364, 90007,
  91331, 90005, 91040, 90031, 91324, 90043, 91316, 91352, 90069, 91325, 91344,
  90019, 90056, 90032, 90291, 90027, 90247, 90232, 90013, 90042, 90035, 90024,
  90026, 90034, 90016, 90018, 90004, 90067, 90010, 90025, 90047, 90037, 91605,
  90049, 90744, 90028, 90272, 90275, 90731, 90710, 91042, 91345, 90003, 91335,
  91304, 91311, 91214, 91604, 91608, 91405, 91367, 91406, 91436, 91504, 91423,
  91505, 91356, 90292, 90402, 90065, 90501, 90230, 90048, 90046, 90038, 90044,
  90810, 90023, 90068, 90045, 90717, 90064, 90058, 90033, 90017, 90090
) %>% as.character() -> zip_interest

ZCTA5CE10 字段是 zip (IIRC)，因此我们将其过滤掉：

la_df <- filter(ztca, ZCTA5CE10 %in% zip_interest)

然后，我们制作您的压缩填充数据：

data_frame(
  zipcode = c(
    90001, 90002, 90003, 90004, 90005, 90006,
    90007, 90008, 90010, 90011, 90012, 90013, 90014, 90015, 90016,
    90017, 90018, 90019, 90020, 90021, 90023, 90024, 90025, 90026,
    90027, 90028, 90029, 90031, 90032, 90033, 90034, 90035, 90036,
    90037, 90038, 90039, 90041, 90042, 90043, 90044, 90045, 90046,
    90047, 90048, 90049, 90056, 90057, 90058, 90059, 90061, 90062,
    90063, 90064, 90065, 90066, 90067, 90068, 90069, 90071, 90073,
    90077, 90089, 90090, 90094, 90095, 90210, 90211, 90212, 90230,
    90232, 90247, 90248, 90272, 90275, 90291, 90292, 90293, 90301,
    90302, 90402, 90405, 90501, 90502, 90710, 90717, 90731, 90732,
    90744, 90745, 90810, 91040, 91042, 91214, 91303, 91304, 91306,
    91307, 91311, 91316, 91324, 91325, 91326, 91330, 91331, 91335,
    91340, 91342, 91343, 91344, 91345, 91352, 91356, 91364, 91367,
    91401, 91402, 91403, 91405, 91406, 91411, 91423, 91436, 91504,
    91505, 91601, 91602, 91604, 91605, 91606, 91607, 91608) %>%
    as.character(),
  fillvar = c(
    7086,
    20731, 45814, 24883, 15295, 23099, 22163, 24461, 6351, 40047,
    16028, 22822, 9500, 22859, 23201, 19959, 21000, 25806, 12917,
    11767, 12791, 9221, 14967, 26850, 21314, 36956, 14556, 12233,
    12986, 19864, 17239, 10843, 17399, 35476, 13786, 10083, 9214,
    18613, 22267, 46012, 26956, 14627, 24949, 10958, 10238, 294,
    21961, 3037, 15441, 12432, 20470, 3673, 12165, 14869, 14516,
    2299, 10013, 2458, 1862, 473, 1755, 3772, 691, 1418, 200, 1689,
    813, 181, 3553, 912, 3807, 2693, 5017, 558, 18841, 5872, 3890,
    573, 322, 592, 331, 5795, 1119, 7323, 667, 29879, 5318, 22950,
    110, 144, 5919, 7465, 135, 17871, 14904, 16814, 6811, 14521,
    9806, 17144, 9857, 7175, 647, 32723, 25985, 3182, 29040, 23947,
    18500, 7194, 19031, 12226, 11075, 16189, 17455, 26382, 9818,
    22774, 22124, 12000, 14122, 4965, 566, 522, 20601, 7204, 14051,
    24116, 18162, 11174, 478)
) -> zip_df

现在，只需将该数据连接到 sf 对象并绘制它即可：

left_join(zip_df, by=c("ZCTA5CE10"="zipcode")) %>%
  ggplot() +
  geom_sf(aes(fill=fillvar), size=0.125, color="#b2b2b2") + # thinner lines
  coord_sf(datum = NA) + # this gets rid of the graticule
  viridis::scale_fill_viridis(name="Better legend name:", labels=scales::comma) + # color-blind friendly color map
  ggthemes::theme_map() + # clean map
  theme(legend.position="top") + # legend on top
  theme(legend.key.width = unit(3, "lines")) # wider legend

用 R 中的另一个变量填充 sp 地图对象的多边形

Filling polygons of a sp map object by another variable in R

r

polygon

ggplot2

sp