用 R 中的另一个变量填充 sp 地图对象的多边形

Filling polygons of a sp map object by another variable in R

我的目标是生成一个由另一个变量填充的洛杉矶邮政编码地图,比如使用 ggplot 的犯罪事件数量。由于sp对象中原本没有存储的填充变量,here建议的方法可能不能直接使用

首先,我有一个名为 ztca 的巨大数据集,它是一个 SpatialPolygonDataFrame 并包含所有多边形(其中一个图层称为 ZCTA5CE10,包含我处理的邮政编码) of USA from the US Census Bureau, primary sp object 的编写主要基于.

library(sp)
library(maptools)
library(zipcode)
data(zipcode) # load the zipcode as a data frame
library(ggmap)



# grab the zip code boundaries based on 2017 US Census Bureau
url <- "http://www2.census.gov/geo/tiger/GENZ2017/shp/cb_2017_us_zcta510_500k.zip"
fil <- "ztca.zip"

# If we have the file in the working directory, don't need to download again
if (!file.exists(fil)) { download.file(url, fil) }
unzip(fil, exdir="ztca")

# read them in (this takes a bit)
ztca <- readShapePoly("ztca/cb_2017_us_zcta510_500k.shp", verbose=TRUE)
class(ztca)

然后,我 select 感兴趣的一些邮政编码

zip.interest <- c(91306, 90008, 90014, 90094, 91326, 90212, 90001, 90006, 90301, 91401, 90095, 90021, 90089, 90057, 90029, 90502, 90036, 90745, 91330, 91411, 91601, 91607, 90302, 90073, 91340, 91602, 90071, 90039, 90011, 90248, 91303, 90020, 90732, 90405, 91342, 90077, 90061, 90015, 90002, 90041, 90211, 90059, 90066, 90210, 91343, 90062, 91606, 91403, 90063, 90012, 91307, 90293, 91402, 91364, 90007, 91331, 90005, 91040, 90031, 91324, 90043, 91316, 91352, 90069, 91325, 91344, 90019, 90056, 90032, 90291, 90027, 90247, 90232, 90013, 90042, 90035, 90024, 90026, 90034, 90016, 90018, 90004, 90067, 90010, 90025, 90047, 90037, 91605, 90049, 90744, 90028, 90272, 90275, 90731, 90710, 91042, 91345, 90003, 91335, 91304, 91311, 91214, 91604, 91608, 91405, 91367, 91406, 91436, 91504, 91423, 91505, 91356, 90292, 90402, 90065, 90501, 90230, 90048, 90046, 90038, 90044, 90810, 90023, 90068, 90045, 90717, 90064, 90058, 90033, 90017, 90090)

LA <- ztca[as.character(ztca$ZCTA5CE10) %in% as.character(zip.interest),]
class(LA)

# My data (want to be used as filling variable)
data <- matrix(c(90001, 90002, 90003, 90004, 90005, 90006, 90007, 90008, 90010, 90011, 90012, 90013, 90014, 90015, 90016, 90017, 90018, 90019, 90020, 90021, 90023, 90024, 90025, 90026, 90027, 90028, 90029, 90031, 90032, 90033, 90034, 90035, 90036, 90037, 90038, 90039, 90041, 90042, 90043, 90044, 90045, 90046, 90047, 90048, 90049, 90056, 90057, 90058, 90059, 90061, 90062, 90063, 90064, 90065, 90066, 90067, 90068, 90069, 90071, 90073, 90077, 90089, 90090, 90094, 90095, 90210, 90211, 90212, 90230, 90232, 90247, 90248, 90272, 90275, 90291, 90292, 90293, 90301, 90302, 90402, 90405, 90501, 90502, 90710, 90717, 90731, 90732, 90744, 90745, 90810, 91040, 91042, 91214, 91303, 91304, 91306, 91307, 91311, 91316, 91324, 91325, 91326, 91330, 91331, 91335, 91340, 91342, 91343, 91344, 91345, 91352, 91356, 91364, 91367, 91401, 91402, 91403, 91405, 91406, 91411, 91423, 91436, 91504, 91505, 91601, 91602, 91604, 91605, 91606, 91607, 91608, 7086, 20731, 45814, 24883, 15295, 23099, 22163, 24461, 6351, 40047, 16028, 22822, 9500, 22859, 23201, 19959, 21000, 25806, 12917, 11767, 12791, 9221, 14967, 26850, 21314, 36956, 14556, 12233, 12986, 19864, 17239, 10843, 17399, 35476, 13786, 10083, 9214, 18613, 22267, 46012, 26956, 14627, 24949, 10958, 10238,  294, 21961, 3037, 15441, 12432, 20470, 3673, 12165, 14869, 14516, 2299, 10013, 2458, 1862,  473, 1755, 3772, 691, 1418, 200, 1689, 813,  181, 3553, 912, 3807, 2693, 5017, 558, 18841, 5872, 3890, 573,  322, 592,  331, 5795, 1119, 7323,  667, 29879, 5318, 22950, 110, 144, 5919, 7465, 135, 17871, 14904, 16814, 6811, 14521, 9806, 17144, 9857, 7175,  647, 32723, 25985, 3182, 29040, 23947, 18500, 7194, 19031, 12226, 11075, 16189, 17455, 26382, 9818, 22774, 22124, 12000, 14122, 4965,  566,  522, 20601, 7204, 14051, 24116, 18162, 11174, 478),ncol=2)

colnames(data) <- c("ZCTA5CE10", "cnt" )
data <- as.data.frame(data)

LA@data <- cbind(LA@data, cnt=data$cnt)

# Suppose plot by
ggplot(data = LA, aes(x = long, y = lat, fill = cnt, group = group)) +
geom_polygon(colour = "black") +
coord_equal() + coord_fixed(1.3)

但是我得到了“为每个多边形定义的区域 错误:美学必须是长度 1 或与数据相同 (6877):x、y、填充、组。我想当我 select 那些多边形 and/or 添加 cnt.

感谢您的帮助和评论!!!

为此,您可能需要从 GitHub 安装 ggplot2。可能不是,但我一直在使用它的开发版本,不能真正降级这个例子。

切换到sf

library(sf)
library(tidyverse)

sf::st_read(
  path.expand("~/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp"),
  stringsAsFactors = FALSE
) -> ztca
## Reading layer `cb_2017_us_zcta510_500k' from data source `/Users/hrbrmstr/Data/cb_2017_us_zcta510_500k/cb_2017_us_zcta510_500k.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33144 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs

ztca
## Simple feature collection with 33144 features and 5 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -176.6847 ymin: -14.37374 xmax: 145.8304 ymax: 71.34122
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
## First 10 features:
##    ZCTA5CE10     AFFGEOID10 GEOID10    ALAND10  AWATER10                       geometry
## 1      35442 8600000US35442   35442  610213891  10838694 MULTIPOLYGON (((-88.25262 3...
## 2      85365 8600000US85365   85365 3545016067   9766486 MULTIPOLYGON (((-114.6847 3...
## 3      71973 8600000US71973   71973  204670474   1264709 MULTIPOLYGON (((-94.46643 3...
## 4      95445 8600000US95445   95445  221559097   7363179 MULTIPOLYGON (((-123.6431 3...
## 5      06870 8600000US06870   06870    5945321   3841130 MULTIPOLYGON (((-73.58766 4...
## 6      19964 8600000US19964   19964   24601672    124382 MULTIPOLYGON (((-75.74767 3...
## 7      32233 8600000US32233   32233   26427514   8163517 MULTIPOLYGON (((-81.45931 3...
## 8      90401 8600000US90401   90401    2199165    439378 MULTIPOLYGON (((-118.5028 3...
## 9      30817 8600000US30817   30817  498860155 105853904 MULTIPOLYGON (((-82.5951 33...
## 10     30458 8600000US30458   30458  378422575  12351325 MULTIPOLYGON (((-81.73032 3...

它应该读得更快。

现在我们需要您的数据。首先是拉链:

c(
  91306, 90008, 90014, 90094, 91326, 90212, 90001, 90006, 90301, 91401, 90095,
  90021, 90089, 90057, 90029, 90502, 90036, 90745, 91330, 91411, 91601, 91607,
  90302, 90073, 91340, 91602, 90071, 90039, 90011, 90248, 91303, 90020, 90732,
  90405, 91342, 90077, 90061, 90015, 90002, 90041, 90211, 90059, 90066, 90210,
  91343, 90062, 91606, 91403, 90063, 90012, 91307, 90293, 91402, 91364, 90007,
  91331, 90005, 91040, 90031, 91324, 90043, 91316, 91352, 90069, 91325, 91344,
  90019, 90056, 90032, 90291, 90027, 90247, 90232, 90013, 90042, 90035, 90024,
  90026, 90034, 90016, 90018, 90004, 90067, 90010, 90025, 90047, 90037, 91605,
  90049, 90744, 90028, 90272, 90275, 90731, 90710, 91042, 91345, 90003, 91335,
  91304, 91311, 91214, 91604, 91608, 91405, 91367, 91406, 91436, 91504, 91423,
  91505, 91356, 90292, 90402, 90065, 90501, 90230, 90048, 90046, 90038, 90044,
  90810, 90023, 90068, 90045, 90717, 90064, 90058, 90033, 90017, 90090
) %>% as.character() -> zip_interest

ZCTA5CE10 字段是 zip (IIRC),因此我们将其过滤掉:

la_df <- filter(ztca, ZCTA5CE10 %in% zip_interest)

然后,我们制作您的压缩填充数据:

data_frame(
  zipcode = c(
    90001, 90002, 90003, 90004, 90005, 90006,
    90007, 90008, 90010, 90011, 90012, 90013, 90014, 90015, 90016,
    90017, 90018, 90019, 90020, 90021, 90023, 90024, 90025, 90026,
    90027, 90028, 90029, 90031, 90032, 90033, 90034, 90035, 90036,
    90037, 90038, 90039, 90041, 90042, 90043, 90044, 90045, 90046,
    90047, 90048, 90049, 90056, 90057, 90058, 90059, 90061, 90062,
    90063, 90064, 90065, 90066, 90067, 90068, 90069, 90071, 90073,
    90077, 90089, 90090, 90094, 90095, 90210, 90211, 90212, 90230,
    90232, 90247, 90248, 90272, 90275, 90291, 90292, 90293, 90301,
    90302, 90402, 90405, 90501, 90502, 90710, 90717, 90731, 90732,
    90744, 90745, 90810, 91040, 91042, 91214, 91303, 91304, 91306,
    91307, 91311, 91316, 91324, 91325, 91326, 91330, 91331, 91335,
    91340, 91342, 91343, 91344, 91345, 91352, 91356, 91364, 91367,
    91401, 91402, 91403, 91405, 91406, 91411, 91423, 91436, 91504,
    91505, 91601, 91602, 91604, 91605, 91606, 91607, 91608) %>%
    as.character(),
  fillvar = c(
    7086,
    20731, 45814, 24883, 15295, 23099, 22163, 24461, 6351, 40047,
    16028, 22822, 9500, 22859, 23201, 19959, 21000, 25806, 12917,
    11767, 12791, 9221, 14967, 26850, 21314, 36956, 14556, 12233,
    12986, 19864, 17239, 10843, 17399, 35476, 13786, 10083, 9214,
    18613, 22267, 46012, 26956, 14627, 24949, 10958, 10238, 294,
    21961, 3037, 15441, 12432, 20470, 3673, 12165, 14869, 14516,
    2299, 10013, 2458, 1862, 473, 1755, 3772, 691, 1418, 200, 1689,
    813, 181, 3553, 912, 3807, 2693, 5017, 558, 18841, 5872, 3890,
    573, 322, 592, 331, 5795, 1119, 7323, 667, 29879, 5318, 22950,
    110, 144, 5919, 7465, 135, 17871, 14904, 16814, 6811, 14521,
    9806, 17144, 9857, 7175, 647, 32723, 25985, 3182, 29040, 23947,
    18500, 7194, 19031, 12226, 11075, 16189, 17455, 26382, 9818,
    22774, 22124, 12000, 14122, 4965, 566, 522, 20601, 7204, 14051,
    24116, 18162, 11174, 478)
) -> zip_df

现在,只需将该数据连接到 sf 对象并绘制它即可:

left_join(zip_df, by=c("ZCTA5CE10"="zipcode")) %>%
  ggplot() +
  geom_sf(aes(fill=fillvar), size=0.125, color="#b2b2b2") + # thinner lines
  coord_sf(datum = NA) + # this gets rid of the graticule
  viridis::scale_fill_viridis(name="Better legend name:", labels=scales::comma) + # color-blind friendly color map
  ggthemes::theme_map() + # clean map
  theme(legend.position="top") + # legend on top
  theme(legend.key.width = unit(3, "lines")) # wider legend