为什么我在 R 中的经纬度交点不与正确的邮政编码对齐?

Why is my intersection of lat lon points in R not aligning with the correct zip codes?

这让我抓狂了一整天。我正在将 lat/lon 坐标映射到北卡罗来纳州的邮政编码。 6000 个点中约有 5000 个映射得很好,但是,军事基地(布拉格堡)周围的点没有映射到任何邮政编码。我编写了代码以获取到每个点最近的邮政编码以将其映射到那些邮政编码,但是当我返回检查以确保它正常工作时,它正在将这些点映射到邮政编码甚至不接近 lat/lon坐标是.

这是 shapefile 的 link https://www.nconemap.gov/datasets/nconemap::zip-code-tabulation-areas/about

library(sf)
library(tidyverse)

### Sample data from 3 points that did not work.

POINT_LAT = c(35.18, 35.181, 35.182)
POINT_LON = c(-79.19, -79.272, -79.24)

all_points = cbind(POINT_LAT, POINT_LON)

zipcode_nc = read.sf(NC_zipcodes.shp)
zipmap = st_transform(zipcode_nc, crs = 4326)

locations_zip = st_as_sf(all_points, coords = "POINT_LON", "POINT_LAT"), crs = st_crs(zipmap))

point_zips = locations_zip %>%
    mutate(intersection = as.integer(st_intersects(geometry, zipmap)), area = if_else(is.na(intersection), ' ', zipmap$GEOID10,[intersection]))

## Try to map missing points to closest zip code.

points_zips_external_nearest = point_zips %>%
    filter(is.na(intersection)) %>%
    st_nearest_feature(zipmap)

points_zips_external = points_zips %>%
    filter(is.na(intersection)) %>%
    mutate(zip = point_zips$ZIP[points_zips_external_nearest])

这给出了错误的点邮政编码。

首先,您的示例代码导致了我的一些错误:

  • 找不到 read.sf() 函数 - 你是说 read_sf() 吗?
  • 我无法使用 st_as_sf() 制作 locations_zip 而不是先制作 all_points 一个 tibble。
  • 当制作 points_zips 时,特别是 mutateing area - zipmap$GEOID10,[intersection] 中的逗号不需要并且会导致错误。

除了确保您的代码正常工作之外,如果您包括了您得到的“错误”结果以及您期望的“正确”结果,这将很有帮助。这样其他人就可以看到他们自己是否获得了 right/wrong 结果。


我稍微简化了一些代码,我想我得到了正确的邮政编码。

首先,加载库和数据。我使用 st_read() 从下载时附带的文件夹的名称导入 shapefile。

library(sf)
library(tidyverse)
library(tmap)

# sample points
POINT_LAT = c(35.18, 35.181, 35.182)
POINT_LON = c(-79.19, -79.272, -79.24)
all_points = cbind(POINT_LAT, POINT_LON)

# zipcode_nc = read.sf(NC_zipcodes.shp)
zipcode_nc <- st_read('ZIP_Code_Tabulation_Areas/ZIP_Code_Tabulation_Areas.shp')

zipmap = st_transform(zipcode_nc, crs = 4326)

zipmap

# Simple feature collection with 808 features and 8 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32186 ymin: 33.84231 xmax: -75.46062 ymax: 36.58811
# Geodetic CRS:  WGS 84
# First 10 features:
#    OBJECTID ZCTA5CE10     AFFGEOID10 GEOID10   ALAND10 AWATER10   ShapeSTAre  ShapeSTLen                       geometry
# 1         1     28306 8600000US28306   28306 177888344  2457841 180219025.11 155040.4114 MULTIPOLYGON (((-79.06381 3...
# 2         2     28334 8600000US28334   28334 414866754  3998968 418736167.65 161264.1606 MULTIPOLYGON (((-78.73546 3...
# 3         3     28169 8600000US28169   28169   1450978        0   1413700.15   6504.1990 MULTIPOLYGON (((-81.43786 3...
# 4         4     27278 8600000US27278   27278 262556156  2234207 264733578.27 138272.0708 MULTIPOLYGON (((-79.20634 3...
# 5         5     28325 8600000US28325   28325   2203868        0   2175834.07   7088.4067 MULTIPOLYGON (((-78.11489 3...
# 6         6     28472 8600000US28472   28472 469545124  1646985 471198707.46 225630.0920 MULTIPOLYGON (((-78.85764 3...
# 7         7     27841 8600000US27841   27841    684051        0    669653.06   4155.6886 MULTIPOLYGON (((-77.28181 3...
# 8         8     28280 8600000US28280   28280     19577        0     19572.48    560.1441 MULTIPOLYGON (((-80.8442 35...
# 9         9     28560 8600000US28560   28560 304195338 65656182 314791215.62 206410.4354 MULTIPOLYGON (((-77.15578 3...
# 10       10     27881 8600000US27881   27881   1760084        0   1765449.38   8334.0644 MULTIPOLYGON (((-77.44656 3...



# locations_zip = st_as_sf(all_points, coords = c("POINT_LON", "POINT_LAT"), crs = st_crs(zipmap))
locations_zip = st_as_sf(all_points %>% as_tibble, coords = c("POINT_LON", "POINT_LAT"), crs = st_crs(zipmap))

locations_zip

# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: -79.272 ymin: 35.18 xmax: -79.19 ymax: 35.182
# Geodetic CRS:  WGS 84
# # A tibble: 3 x 1
# geometry
# *      <POINT [°]>
# 1   (-79.19 35.18)
# 2 (-79.272 35.181)
# 3  (-79.24 35.182)

首先,我尝试使用 tmap 包在地图上绘制数据:

# a map
tm_shape(zipmap)+
  tm_polygons()
# Error: Shape contains invalid polygons. Please fix it or set tmap_options(check.and.fix = TRUE) and rerun the plot

该错误表明某些多边形无效,并且是由于自相交等各种原因造成的。有关详细信息,请参阅?st_make_valid

因此,我使用 st_make_valid() 使多边形有效,然后使用 st_cast() 分隔每个单独的多边形。如果没有这个,一些邮政编码有多个多边形,因此在下一步中绘制标签很棘手。

# make the polygons valid
zipmap %>% 
  st_make_valid() %>% 
  st_cast('POLYGON') %>% 
  {. ->> zipmap_2}

zipmap_2

# Simple feature collection with 966 features and 8 fields
# Geometry type: POLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32186 ymin: 33.84231 xmax: -75.46062 ymax: 36.58811
# Geodetic CRS:  WGS 84
# First 10 features:
#     OBJECTID ZCTA5CE10     AFFGEOID10 GEOID10   ALAND10 AWATER10 ShapeSTAre ShapeSTLen                       geometry
# 1          1     28306 8600000US28306   28306 177888344  2457841  180219025 155040.411 POLYGON ((-79.06394 34.9998...
# 1.1        1     28306 8600000US28306   28306 177888344  2457841  180219025 155040.411 POLYGON ((-78.8688 34.89214...
# 1.2        1     28306 8600000US28306   28306 177888344  2457841  180219025 155040.411 POLYGON ((-78.86443 34.9142...
# 1.3        1     28306 8600000US28306   28306 177888344  2457841  180219025 155040.411 POLYGON ((-78.8571 34.90978...
# 1.4        1     28306 8600000US28306   28306 177888344  2457841  180219025 155040.411 POLYGON ((-79.05672 34.9803...
# 2          2     28334 8600000US28334   28334 414866754  3998968  418736168 161264.161 POLYGON ((-78.73424 35.1739...
# 3          3     28169 8600000US28169   28169   1450978        0    1413700   6504.199 POLYGON ((-81.43807 35.3547...
# 4          4     27278 8600000US27278   27278 262556156  2234207  264733578 138272.071 POLYGON ((-79.20665 35.9857...
# 5          5     28325 8600000US28325   28325   2203868        0    2175834   7088.407 POLYGON ((-78.11554 35.1526...
# 6          6     28472 8600000US28472   28472 469545124  1646985  471198707 225630.092 POLYGON ((-78.85624 34.4161...

同时为这三个点创建一个 id 列,这样我们就可以在地图上绘制一个标签,以查看哪个是哪个以及哪个邮政编码最近。

# make point ID for the 3 points
locations_zip %>% 
  mutate(
    id = row_number()
  ) %>% 
  {. ->> locations_zip_2}

locations_zip_2
# 
# Simple feature collection with 3 features and 1 field
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: -79.272 ymin: 35.18 xmax: -79.19 ymax: 35.182
# Geodetic CRS:  WGS 84
# # A tibble: 3 x 2
#           geometry    id
# *      <POINT [°]> <int>
# 1   (-79.19 35.18)     1
# 2 (-79.272 35.181)     2
# 3  (-79.24 35.182)     3

现在我们可以绘制邮政编码和三个点的地图。我们也为每个功能添加了标签。我在 tm_shape() 里面设置了一个 bbox ,所以它专注于我们的三个点,并添加邮政编码标签(我假设 ZCTA5CE10 列中的值是邮政编码?)。我使用 st_centroid() 找到每个多边形的中心,作为放置邮政编码标签的坐标。

# now make the map again
tmap_mode('view')

tm_shape(zipmap_2, bbox = locations_zip_2 %>% st_buffer(3000) %>% st_bbox)+
  tm_polygons()+
  tm_shape(zipmap_2 %>% st_centroid)+
  tm_text(text = 'ZCTA5CE10', size = 2)+
  tm_shape(locations_zip_2)+
  tm_dots(col = 'red')+
  tm_text(text = 'id', ymod = -3, col = 'red', size = 2)

然后,我们使用st_nearest_feature()通过索引找到最近的多边形,他们提取索引的邮政编码。

# which is the closest zip code to each point?
locations_zip_2 %>% 
  mutate(
    nearest_index = st_nearest_feature(., zipmap_2),
    nearest_zip_code = zipmap_2$ZCTA5CE10[nearest_index]
  )

# Simple feature collection with 3 features and 3 fields
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: -79.272 ymin: 35.18 xmax: -79.19 ymax: 35.182
# Geodetic CRS:  WGS 84
# # A tibble: 3 x 4
#           geometry    id nearest_index nearest_zip_code
# *      <POINT [°]> <int>         <int> <chr>           
# 1   (-79.19 35.18)     1            76 28315           
# 2 (-79.272 35.181)     2           117 28394           
# 3  (-79.24 35.182)     3            76 28315   

据我所知,这些邮政编码与地图相符,所以我认为它们是“正确的”。如果没有,请随时根据您得到的输出和您的期望编辑问题。