为什么我的空间连接使用 sp 包与 sf 包返回不同的结果?

Why is my spatial join returning different results using the sp versus the sf package?

在下面的 reprex 中,我 运行 对一些点和多边形数据进行了空间连接,但在使用 sp 包时意外地得到了与我使用 sf 包时不同的结果包裹。这是为什么?

我正在尝试计算 prio 方格内的 acled 点,但如下所示,即使 运行 加入 st_covers,我的包之间的计数也不同来自 sf,据我所知在功能上应该与使用来自 spover 方法相同。

library(sp) # packageVersion("sp") #> [1] ‘1.2.7’
library(sf) # packageVersion("sf") #> [1] ‘0.6.3’
library(rgdal)
library(maptools)
library(dplyr); library(tibble)

这是我正在使用的示例数据:

# prio (polygon squares) and acled (points); in both sp and sf objects:

# prio sf polygons object
priosf <- structure(list(
  CELL_ID = c(180365, 176783, 150830, 145866, 140055), 
  gwno = c(615L, 616L, 432L, 626L, 475L), 
  POP = c(111983.7, 107369.7, 12169.35, 23005.76, 527012.1), 
  prio_country = c("Algeria", "Tunisia", "Mali", "South Sudan", "Nigeria"), 
  geometry = structure(list(structure(list(structure(c(2, 2, 2.5, 2.5, 2, 35, 35.5, 35.5, 35, 35), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(11, 11, 11.5, 11.5, 11, 32.5, 33, 33, 32.5, 32.5), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(-5.5, -5.5, -5, -5, -5.5, 14.5, 15, 15, 14.5, 14.5), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(32.5, 32.5, 33, 33, 32.5, 11, 11.5, 11.5, 11, 11),
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg")), 
                            structure(list(structure(c(7, 7, 7.5, 7.5, 7, 7, 7.5, 7.5, 7, 7), 
                                                     .Dim = c(5L, 2L))), class = c("XY", "POLYGON", "sfg"))), 
                       class = c("sfc_POLYGON", "sfc"), precision = 0, 
                       bbox = structure(c(-5.5, 7, 33, 35.5), 
                                        .Names = c("xmin", "ymin", "xmax", "ymax"), 
                                        class = "bbox"), 
                       crs = structure(list(epsg = 4326L, proj4string = "+proj=longlat +datum=WGS84 +no_defs"), 
                                       .Names = c("epsg", "proj4string"), class = "crs"), n_empty = 0L)), 
              .Names = c("CELL_ID", "gwno", "POP", "prio_country", "geometry"),
  row.names = c(NA, -5L), class = c("sf", "tbl_df", "tbl", "data.frame"),
  sf_column = "geometry", agr = structure(c(NA_integer_, NA_integer_, NA_integer_, NA_integer_), 
                                          class = "factor", .Label = c("constant", "aggregate", "identity"), 
                                          .Names = c("CELL_ID", "gwno", "POP", "prio_country")))

# prio sp polygons object
priosp <- as(priosf, 'Spatial')

# acled data
acled <- structure(list(
  EVENT_ID_CNTY = c("ALG3195", "ALG3316", "ALG4228", 
      "ALG4824", "MLI1050", "MLI1144", "MLI1423", "MLI1672", "NIG4606", 
      "NIG4951", "NIG6196", "NIG7661", "NIG9100", "SSD1216", "SSD1504", 
      "SSD3232", "SSD3234", "SSD3231", "SSD3239", "TUN1376", "TUN2597", 
      "TUN3217", "TUN3633"), 
  COUNTRY = c("Algeria", "Algeria", "Algeria", 
              "Algeria", "Mali", "Mali", "Mali", "Mali", "Nigeria", "Nigeria", 
              "Nigeria", "Nigeria", "Nigeria", "South Sudan", "South Sudan", 
              "South Sudan", "South Sudan", "South Sudan", "South Sudan", "Tunisia", 
              "Tunisia", "Tunisia", "Tunisia"), 
  LATITUDE = c(35.2122, 35.4343, 35.2122, 35.2122, 14.8252, 14.8252, 14.7414, 14.8252, 7.3028, 
               7.3028, 7.3028, 7.3028, 7.3588, 11.05, 11.05, 11.05, 11.05, 11.05, 11.05, 32.8487, 32.7149, 32.7149, 32.7149), 
  LONGITUDE = c(2.3189, 2.2166, 2.3189, 2.3189, -5.2547, -5.2547, -5.3282, -5.2547, 7.0382, 7.0382, 7.0382, 7.0382, 7.0994, 32.7, 32.7, 32.7, 32.7, 32.7, 32.7, 11.4309, 11.012, 11.012, 11.012)), 
  row.names = c(NA, -23L), 
  class = c("tbl_df", "tbl", "data.frame"), 
  .Names = c("EVENT_ID_CNTY", "COUNTRY", "LATITUDE", "LONGITUDE"))

# acled sf points object
acledsf <- st_as_sf(
  acled,
  coords = c('LATITUDE', 'LONGITUDE'),
  crs = 4326
)

# acled sp points object
coordinates(acled) <- ~LONGITUDE+LATITUDE
  proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)

sp 打包空间连接结果。我绑定与每个点相交的多边形,将结果连接到点,然后计算 CELL_IDs (多边形)的数量:

# sp spatial join:
addPolyDataToPts <- function (points, poly) {
  polysByPoint <- over(points, poly)
  points <- spCbind(points, polysByPoint)
}

acj <- addPolyDataToPts(acledsp, priosp)

(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
  group_by(CELL_ID, prio_country, POP) %>%
  summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
  rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups:   CELL_ID, prio_country_sp [5]
#>   CELL_ID prio_country_sp     POP acled_sp
#>     <dbl> <chr>             <dbl>    <int>
#> 1 140055. Nigeria         527012.        5
#> 2 145866. South Sudan      23006.        6
#> 3 150830. Mali             12169.        4
#> 4 176783. Tunisia         107370.        4
#> 5 180365. Algeria         111984.        4

类似的 sf 包空间连接结果,其中我的计数列 acled_sf 与上面的 acled_sp 列不同,除了一个多边形正方形。 (140055;尼日利亚):

# sf spatial join:
(acled_count_sf <- 
  st_join(priosf, acledsf, join = st_covers) %>%
  group_by(CELL_ID, POP, prio_country) %>%
  summarize(acled_sf = n()) %>% ungroup %>% 
  arrange(CELL_ID) %>%
  rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type:  POLYGON
#> dimension:      XY
#> bbox:           xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#>   CELL_ID     POP prio_country_sf acled_sf                        geometry
#>     <dbl>   <dbl> <chr>              <int>                   <POLYGON [°]>
#> 1 140055. 527012. Nigeria                5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2 145866.  23006. South Sudan            4 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3 150830.  12169. Mali                   1 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4 176783. 107370. Tunisia                6 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5 180365. 111984. Algeria                1 ((2 35, 2 35.5, 2.5 35.5, 2.5 …

我的 运行ning 理论是一种方法以错误的顺序绑定值,但我不确定是哪种方法。在我较大的样本中,我得到相似的值但绑定到不同的多边形,即“2706”点与 sf 连接的单元格 1 和 sp 连接的单元格 2 匹配。

(并且,在某些情况下,sf 联接中完全缺少某些值)

任何关于我的结果如何或为什么以这种方式不同的见解将不胜感激。

所以我花了 mapview 中的数据来弄清楚这里发生了什么,但至少在你给定的 reprex 中,你的问题是因为你在创建 [=11 时向后指定了经度和纬度=] 对象。以正确的顺序创建并且连接输出匹配:

# acled sf points object
acledsf <- st_as_sf(
  acled,
  coords = c('LONGITUDE', 'LATITUDE'),  ###notice the correct order here
  crs = 4326
) 

# acled sp points object
coordinates(acled) <- c("LONGITUDE", "LATITUDE")
proj4string(acled) <- proj4string(priosp)
acledsp <- acled; rm(acled)


addPolyDataToPts <- function (points, poly) {
  polysByPoint <- over(points, poly)
  points <- spCbind(points, polysByPoint)
}

acj <- addPolyDataToPts(acledsp, priosp)

(acled_count_sp <- acj@data %>% filter(!is.na(CELL_ID)) %>%
    group_by(CELL_ID, prio_country, POP) %>%
    summarize(acled_sp = n()) %>% arrange(CELL_ID) %>%
    rename(prio_country_sp = prio_country))
#> # A tibble: 5 x 4
#> # Groups:   CELL_ID, prio_country_sp [5]
#>   CELL_ID prio_country_sp     POP acled_sp
#>     <dbl> <chr>             <dbl>    <int>
#> 1  140055 Nigeria         527012.        5
#> 2  145866 South Sudan      23006.        6
#> 3  150830 Mali             12169.        4
#> 4  176783 Tunisia         107370.        4
#> 5  180365 Algeria         111984.        4


### sf
(acled_count_sf <- 
    st_join(priosf, acledsf, join = st_covers) %>%
    group_by(CELL_ID, prio_country,  POP) %>%
    summarize(acled_sf = n()) %>% ungroup %>% 
    arrange(CELL_ID) %>%
    rename(prio_country_sf = prio_country))
#> although coordinates are longitude/latitude, st_covers assumes that they are planar
#> Simple feature collection with 5 features and 4 fields
#> geometry type:  POLYGON
#> dimension:      XY
#> bbox:           xmin: -5.5 ymin: 7 xmax: 33 ymax: 35.5
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#> # A tibble: 5 x 5
#>   CELL_ID prio_country_sf     POP acled_sf                        geometry
#>     <dbl> <chr>             <dbl>    <int>                   <POLYGON [°]>
#> 1  140055 Nigeria         527012.        5 ((7 7, 7 7.5, 7.5 7.5, 7.5 7, …
#> 2  145866 South Sudan      23006.        6 ((32.5 11, 32.5 11.5, 33 11.5,…
#> 3  150830 Mali             12169.        4 ((-5.5 14.5, -5.5 15, -5 15, -…
#> 4  176783 Tunisia         107370.        4 ((11 32.5, 11 33, 11.5 33, 11.…
#> 5  180365 Algeria         111984.        4 ((2 35, 2 35.5, 2.5 35.5, 2.5 …