缓冲区重叠时的计数点

Question

我在下面包含了我的所有代码和一个 link 示例数据。

简要说明：我有重叠的缓冲区；我想统计某学校一定米范围内的店铺数量

我特别想知道一个学校1000米以内有多少家店，一个学校2000米以内有多少家店，比较一下区别。当然，其中一些学校缓冲区是重叠的。因此，虽然一家商店可能距离 A 学校 1500 米，但距离学校 B 仅 750 米。因此，它算作距离学校 1000 米以内，对于学校 B 应该只算在 1000 米以内，而不应该算作在 1000 米以内计入学校 A。如果一家商店距离两所学校 2000 米以内（但不在 1000 米以内），则需要计入距离最近的学校。

理想情况下，我希望数据集看起来像：

School	Stores1000m	Stores2000m
School A	3	6
School B	2	7

所以我使用了sf中的st_union函数来合并缓冲区。这对于制作漂亮的地图效果很好，但是当我使用长度和 st_intersects 来计算缓冲区内的商店时，它只为每种类型的区域返回一个数字（1000 米对 2000 米）

示例数据：Sample data

county.sf <- get_acs(state = "MO",
                     county = c( "St. Louis City"),
                     geography = "tract",
                     variables = "B03002_001", 
                     output="wide", 
                     geometry = TRUE) %>%
  sf::st_transform(crs = "ESRI:102003")
  
class(county.sf)

# School data
school <- read.csv("C:\myfile1.csv")
school.sf <- st_as_sf(school, coords = c("long", "lat"), crs = "epsg:4326") 
school.sf.utm <- st_transform(school.sf, crs = "ESRI:102003")


# Store data
store <- import("C:\myfile2.csv")
store.sf <- st_as_sf(store, coords = c("XCoord", "YCoord"), crs = "ESRI:102696") 
store.sf.utm <- st_transform(store.sf, crs = "ESRI:102003")


elem.buff <-st_buffer(school.sf.utm, 1000)     
elem.buff2 <-st_buffer(school.sf.utm, 2000) 

pts_com<-st_union(elem.buff)
pts_pol<-st_cast(pts_com, "POLYGON")

pts_com2<-st_union(elem.buff2)
pts_pol2<-st_cast(pts_com2, "POLYGON")


#unmerged zone map
ex.map<- tm_shape(county.sf) +
  tm_polygons() + 
  
  tm_shape(elem.buff) +
  tm_borders(col="red") +  
  
  tm_shape(school.sf.utm) +
  tm_dots(col = "red") +
  
  tm_shape(elem.buff2) +
  tm_borders(col="blue") + 
    
  tm_shape(pts_pol) +
  tm_borders(col="black") +
  
  tm_shape(store.sf.utm) +
  tm_dots() 
ex.map




#merged zones map

ex.map<- tm_shape(county.sf) +
  tm_polygons() + 
  
  #(elem.buff) +
  #tm_borders(col="red") +  
  
  tm_shape(school.sf.utm) +
  tm_dots(col = "red") +
  
  #tm_shape(elem.buff2) +
  #tm_borders(col="blue") + 
  
  tm_shape(pts_pol) +
  tm_borders(col="red") +
  
  tm_shape(store.sf.utm) +
  tm_dots() +

  tm_shape(pts_pol2) +
  tm_borders(col="blue")
ex.map



(school$pt_count <- lengths(st_intersects(elem.buff, store.sf.utm))) #gives per school but ignores overlapping
(school$pt_count <- lengths(st_intersects(pts_com, store.sf.utm)))

(school$pt_count <- lengths(st_intersects(elem.buff2, store.sf.utm)))
(school$pt_count <- lengths(st_intersects(pts_com2, store.sf.utm)))

Answer 1

如果我正确地解释了问题，我想我有答案了。

根据我从问题中得到的信息，您想知道每所学校 1000 和 2000 米范围内有多少家商店，但商店只计入距离它们最近的学校 - 这样对吗？

最少的代码设置，将示例数据保存为工作目录中的 .xlsx 文件：

library(readxl)
library(tidyverse)
library(sf)

read_xlsx('Schools and Stores.xlsx', sheet = 1) %>% 
  st_as_sf(., coords = c("long", "lat"), crs = "epsg:4326") %>% 
  st_transform(crs = "ESRI:102003") %>% 
  {. ->> school.sf.utm}

read_xlsx('Schools and Stores.xlsx', sheet = 2) %>% 
  st_as_sf(., coords = c("XCoord", "YCoord"), crs = "ESRI:102696") %>% 
  st_transform(crs = "ESRI:102003") %>% 
  {. ->> store.sf.utm}

首先，为了减少数据集中的商店数量，我们只保留所有学校 2 公里缓冲区内的商店（这可能是您在 st_buffer() 之后使用 st_union() 所做的） .这将商店数量从 2603 家减少到 191 家。

# step 1 - keep only stores within a 2km buffer of all schools, to reduce number of stores to work with
stores.sf.utm %>% 
  filter(
    st_intersects(stores.sf.utm, school.sf.utm %>% st_buffer(2000), sparse = FALSE)
  ) %>% 
  rename(
    geometry_stores = geometry
  ) %>% 
  {. ->> stores_2000}

stores_2000

# Simple feature collection with 191 features and 0 fields
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 496820.2 ymin: 138115.8 xmax: 500484.2 ymax: 141987.8
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 191 x 1
#       geometry_stores
#           <POINT [m]>
# 1   (496820.2 139441)
# 2 (496848.1 140725.7)
# 3 (496987.8 138959.5)
# 4 (497052.2 139815.4)
# 5   (497030 140286.7)
# 6 (497122.5 138900.1)
# 7 (497033.2 140646.1)
# 8 (497099.8 140279.6)
# 9 (497199.7 138687.5)
# 10 (497154.4 139805.9)
# # ... with 181 more rows

接下来，我们生成学校和剩余商店的所有可能组合。我分配了一个 store_id 这样我们就可以知道哪个商店是哪个（不使用它是 geometry）。

# generate all schools~stores combos
stores_2000 %>% 
  mutate(
    store_id = row_number(),
    schools = list(school.sf.utm)
  ) %>% 
  unnest(cols = c('schools')) %>% 
  rename(
    geometry_school = geometry
  ) %>% 
  {. ->> all_combos}

all_combos

# Simple feature collection with 3438 features and 2 fields
# Active geometry column: geometry_stores
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 496820.2 ymin: 138115.8 xmax: 500484.2 ymax: 141987.8
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 3,438 x 4
#      geometry_stores store_id School                                    geometry_school
#          <POINT [m]>    <int> <chr>                                         <POINT [m]>
#  1 (496820.2 139441)        1 AcademyOf Envt Sci/math Middle School (498610.1 140067.7)
#  2 (496820.2 139441)        1 Collegiate School Of Med/bio          (496797.7 140597.6)
#  3 (496820.2 139441)        1 Dewey Sch.-internat'l. Studies        (499626.5 139130.3)
#  4 (496820.2 139441)        1 Eagle Fox Park                        (498015.9 139324.1)
#  5 (496820.2 139441)        1 Education Therap Support At Madison   (476270.1 131682.7)
#  6 (496820.2 139441)        1 Hodgen Elementary School              (497853.4 140290.1)
#  7 (496820.2 139441)        1 Humboldt Academy Of Higher Lrning     (499410.4 138707.3)
#  8 (496820.2 139441)        1 Lafayette Preparatory Academy           (498812.6 140006)
#  9 (496820.2 139441)        1 Lift For Life Academy                 (500025.8 139526.4)
# 10 (496820.2 139441)        1 Lift For Life Academy High School     (500025.8 139526.4)
# # ... with 3,428 more rows

这意味着我们可以算出每家商店到每所学校的距离。然后，我们只保留彼此相距 2000 米以内的组合（这些组合由位于原始 2 公里缓冲区两侧的商店和学校组成，这就是它们的距离超过 2 公里的原因）。

# calculate distance from each store to each school
all_combos %>% 
  mutate(
    distance = as.numeric(st_distance(geometry_stores, geometry_school, by_element = TRUE))
  ) %>% 
  filter(
    distance <= 2000
  ) %>% 
  {. ->> all_combos_2}

all_combos_2

# Simple feature collection with 2231 features and 3 fields
# Active geometry column: geometry_stores
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 496820.2 ymin: 138115.8 xmax: 500484.2 ymax: 141987.8
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 2,231 x 5
#        geometry_stores store_id School                                    geometry_school distance
# *          <POINT [m]>    <int> <chr>                                         <POINT [m]>    <dbl>
# 1    (496820.2 139441)        1 AcademyOf Envt Sci/math Middle School (498610.1 140067.7)   1896. 
# 2    (496820.2 139441)        1 Collegiate School Of Med/bio          (496797.7 140597.6)   1157. 
# 3    (496820.2 139441)        1 Eagle Fox Park                        (498015.9 139324.1)   1201. 
# 4    (496820.2 139441)        1 Hodgen Elementary School              (497853.4 140290.1)   1337. 
# 5    (496820.2 139441)        1 Mckinley Class. Leadership Ac.        (498355.8 139560.4)   1540. 
# 6    (496820.2 139441)        1 Nahed Chapman New American Academy    (496615.8 140605.6)   1182. 
# 7    (496820.2 139441)        1 Shenandoah Elementary School            (496821 139360.4)     80.6
# 8    (496820.2 139441)        1 Sigel Elementary Comm. Ed. Center     (498603.2 139613.7)   1791. 
# 9    (496820.2 139441)        1 St. Louis Christian Academy           (497245.5 140196.9)    867. 
# 10 (496848.1 140725.7)        2 AcademyOf Envt Sci/math Middle School (498610.1 140067.7)   1881. 
# # ... with 2,221 more rows

现在如果我的理解是正确的，每家商店只计入离它最近的学校。因此，我们只保留每家商店距离最近的学校 filter():

# first, keep only the closest school to each store
all_combos_2 %>% 
  arrange(store_id, distance) %>% 
  group_by(store_id) %>% 
  filter(
    distance == min(distance)
  ) %>% 
  {. ->> all_combos_3}
# so now we have the closest school to each store

all_combos_3

# Simple feature collection with 223 features and 3 fields
# Active geometry column: geometry_stores
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 496820.2 ymin: 138115.8 xmax: 500484.2 ymax: 141987.8
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 223 x 5
# # Groups:   store_id [191]
#        geometry_stores store_id School                           geometry_school distance
# *          <POINT [m]>    <int> <chr>                                <POINT [m]>    <dbl>
# 1    (496820.2 139441)        1 Shenandoah Elementary School   (496821 139360.4)     80.6
# 2  (496848.1 140725.7)        2 Collegiate School Of Med/bio (496797.7 140597.6)    138. 
# 3  (496987.8 138959.5)        3 Shenandoah Elementary School   (496821 139360.4)    434. 
# 4  (497052.2 139815.4)        4 St. Louis Christian Academy  (497245.5 140196.9)    428. 
# 5    (497030 140286.7)        5 St. Louis Christian Academy  (497245.5 140196.9)    233. 
# 6  (497122.5 138900.1)        6 Shenandoah Elementary School   (496821 139360.4)    550. 
# 7  (497033.2 140646.1)        7 Collegiate School Of Med/bio (496797.7 140597.6)    240. 
# 8  (497099.8 140279.6)        8 St. Louis Christian Academy  (497245.5 140196.9)    168. 
# 9  (497199.7 138687.5)        9 Shenandoah Elementary School   (496821 139360.4)    772. 
# 10 (497154.4 139805.9)       10 St. Louis Christian Academy  (497245.5 140196.9)    402. 
# # ... with 213 more rows

请注意，我们现在有 223 行。这意味着有 32 个重复项 (223 - 191)；其中有两所（或更多）最近的学校，并且它们与商店的距离相同（在此示例中，最大重复数 = 2）。您选择如何处理这些取决于您。在这个例子中，我将把它们留在数据中，但如果你只想要一所学校，你可以选择第一个字母顺序或随机选择等。

所以现在，我们可以计算出（最近的）学校 1000 米内有多少商店：

# now, how many closest stores are within 1000 m of each school
all_combos_3 %>% 
  filter(
    distance <= 1000
  ) %>% 
  group_by(School) %>% 
  summarise(
    Stores1000m = n()
  ) %>% 
  st_drop_geometry %>% 
  {. ->> combo_sum_1000}

combo_sum_1000

# # A tibble: 16 x 2
#    School                                Stores1000m
#  * <chr>                                       <int>
#  1 AcademyOf Envt Sci/math Middle School           2
#  2 Collegiate School Of Med/bio                    4
#  3 Dewey Sch.-internat'l. Studies                  6
#  4 Eagle Fox Park                                 37
#  5 Hodgen Elementary School                       17
#  6 Humboldt Academy Of Higher Lrning              10
#  7 Lafayette Preparatory Academy                   1
#  8 Lift For Life Academy                           8
#  9 Lift For Life Academy High School               8
# 10 Mckinley Class. Leadership Ac.                  7
# 11 Peabody Elementary School                      48
# 12 Shenandoah Elementary School                    6
# 13 Sigel Elementary Comm. Ed. Center               7
# 14 St. Louis Christian Academy                     7
# 15 St. Louis College Prep High School             14
# 16 St. Louis College Prep Middle School           14

对于 2000 米以内的商店也采用相同的方法：

# 2000 m
all_combos_3 %>% 
  filter(
    distance <= 2000
  ) %>% 
  group_by(School) %>% 
  summarise(
    Stores2000m = n()
  ) %>% 
  st_drop_geometry %>% 
  {. ->> combo_sum_2000}

combo_sum_2000

# # A tibble: 16 x 2
#    School                                Stores2000m
#  * <chr>                                       <int>
#  1 AcademyOf Envt Sci/math Middle School           2
#  2 Collegiate School Of Med/bio                    4
#  3 Dewey Sch.-internat'l. Studies                  6
#  4 Eagle Fox Park                                 37
#  5 Hodgen Elementary School                       18
#  6 Humboldt Academy Of Higher Lrning              10
#  7 Lafayette Preparatory Academy                   1
#  8 Lift For Life Academy                           8
#  9 Lift For Life Academy High School               8
# 10 Mckinley Class. Leadership Ac.                  7
# 11 Peabody Elementary School                      53
# 12 Shenandoah Elementary School                    7
# 13 Sigel Elementary Comm. Ed. Center               7
# 14 St. Louis Christian Academy                     7
# 15 St. Louis College Prep High School             24
# 16 St. Louis College Prep Middle School           24

当然，我们可以加入这两个数据集以匹配您想要的输出。

combo_sum_1000 %>% 
  full_join(combo_sum_2000) %>% 
  {. ->> combo_sum_joined}

combo_sum_joined

# # A tibble: 16 x 3
#    School                                Stores1000m Stores2000m
#    <chr>                                       <int>       <int>
#  1 AcademyOf Envt Sci/math Middle School           2           2
#  2 Collegiate School Of Med/bio                    4           4
#  3 Dewey Sch.-internat'l. Studies                  6           6
#  4 Eagle Fox Park                                 37          37
#  5 Hodgen Elementary School                       17          18
#  6 Humboldt Academy Of Higher Lrning              10          10
#  7 Lafayette Preparatory Academy                   1           1
#  8 Lift For Life Academy                           8           8
#  9 Lift For Life Academy High School               8           8
# 10 Mckinley Class. Leadership Ac.                  7           7
# 11 Peabody Elementary School                      48          53
# 12 Shenandoah Elementary School                    6           7
# 13 Sigel Elementary Comm. Ed. Center               7           7
# 14 St. Louis Christian Academy                     7           7
# 15 St. Louis College Prep High School             14          24
# 16 St. Louis College Prep Middle School           14          24

我希望我对问题的解释是正确的，我承认这有点混乱，因为我们在按商店分组和按学校等分组之间切换。但我认为这可行。

Answer 2

针对更大数据的更新、更有效的答案：

我承认，之前的答案靠的是做一个list所有学校的列，然后用unnest()找每一个组合，不适合大数据。

正如@JindraLacko 在评论中所建议的，st_nearest_feature() 是你的朋友；毫不奇怪，它比我提出的 'manual' 方法更有效。

同上，加载库和数据

library(readxl)
library(tidyverse)
library(sf)

library(tmap)
tmap_mode('view')


read_xlsx('Schools and Stores_all.xlsx', sheet = 1) %>% 
  st_as_sf(., coords = c("long", "lat"), crs = "epsg:4326") %>% 
  st_transform(crs = "ESRI:102003") %>% 
  {. ->> school.sf.utm}

read_xlsx('Schools and Stores_all.xlsx', sheet = 2) %>% 
  st_as_sf(., coords = c("XCoord", "YCoord"), crs = "ESRI:102696") %>% 
  st_transform(crs = "ESRI:102003") %>% 
  {. ->> store.sf.utm}

然后，我们使用 st_join() 来连接商店和学校数据，并指定 join = st_nearest_feature 以便它连接每家商店最近的学校（的名称）。然后我们用left_join()加入每个学校的几何。有关详细信息，请参阅 ?st_join。所以最终，这给了我们每家商店最近的学校。

# find the closest school to each store (this is the school it counts towards)
store.sf.utm %>% 
  rename(
    store_geometry = geometry
  ) %>% 
  st_join(
    school.sf.utm, 
    join = st_nearest_feature
  ) %>% 
  left_join(
    school.sf.utm %>% 
      as_tibble %>% 
      rename(
        school_geometry = geometry
      )
  ) %>% 
  {. ->> all_combos}

all_combos

# Simple feature collection with 2603 features and 1 field
# Active geometry column: store_geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 489948.3 ymin: 131719.1 xmax: 501438.8 ymax: 157382.1
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 2,603 x 3
#        store_geometry School                               school_geometry
#           <POINT [m]> <chr>                                    <POINT [m]>
# 1  (489948.3 137420.8) Community Access Job Training    (491117.8 136616.5)
# 2  (490119.7 136712.7) Community Access Job Training    (491117.8 136616.5)
# 3  (490171.8 138758.2) Gateway Science Acad/st Louis    (491307.4 138787.2)
# 4  (490370.2 139681.3) Wilkinson Early Childhood Center (490930.4 140461.2)
# 5  (490568.3 137056.8) Community Access Job Training    (491117.8 136616.5)
# 6    (490475 139013.4) Gateway Science Acad/st Louis    (491307.4 138787.2)
# 7  (490527.6 139633.1) Wilkinson Early Childhood Center (490930.4 140461.2)
# 8  (490715.3 136690.1) Community Access Job Training    (491117.8 136616.5)
# 9  (490552.5 139805.9) Wilkinson Early Childhood Center (490930.4 140461.2)
# 10   (490790 138069.5) Gateway Science Acad/st Louis    (491307.4 138787.2)
# # ... with 2,593 more rows

有趣的是，我们从 156 所学校减少到 134 所。我认为这意味着有 22 所学校不是离商店最近的。

# how many schools in all_combos?
all_combos %>% 
  summarise(
    n_schools = n_distinct(School)
  ) %>% 
  pull(n_schools)

# [1] 134

现在我们知道哪所学校最近，计算每家商店与最近学校之间的距离。

# calculate distance from each store to each school
all_combos %>% 
  mutate(
    distance = as.numeric(st_distance(store_geometry, school_geometry, by_element = TRUE))
  ) %>% 
  filter(
    distance <= 2000
  ) %>% 
  {. ->> all_combos_2}

all_combos_2

# Simple feature collection with 2595 features and 2 fields
# Active geometry column: store_geometry
# Geometry type: POINT
# Dimension:     XY
# Bounding box:  xmin: 489948.3 ymin: 131719.1 xmax: 501438.8 ymax: 152889.7
# Projected CRS: USA_Contiguous_Albers_Equal_Area_Conic
# # A tibble: 2,595 x 4
#        store_geometry School                               school_geometry distance
# *          <POINT [m]> <chr>                                    <POINT [m]>    <dbl>
# 1  (489948.3 137420.8) Community Access Job Training    (491117.8 136616.5)    1419.
# 2  (490119.7 136712.7) Community Access Job Training    (491117.8 136616.5)    1003.
# 3  (490171.8 138758.2) Gateway Science Acad/st Louis    (491307.4 138787.2)    1136.
# 4  (490370.2 139681.3) Wilkinson Early Childhood Center (490930.4 140461.2)     960.
# 5  (490568.3 137056.8) Community Access Job Training    (491117.8 136616.5)     704.
# 6    (490475 139013.4) Gateway Science Acad/st Louis    (491307.4 138787.2)     863.
# 7  (490527.6 139633.1) Wilkinson Early Childhood Center (490930.4 140461.2)     921.
# 8  (490715.3 136690.1) Community Access Job Training    (491117.8 136616.5)     409.
# 9  (490552.5 139805.9) Wilkinson Early Childhood Center (490930.4 140461.2)     756.
# 10   (490790 138069.5) Gateway Science Acad/st Louis    (491307.4 138787.2)     885.
# # ... with 2,585 more rows

算出离他们最近的学校 1000 米以内有多少家商店。

all_combos_2 %>%
  filter(
    distance <= 1000
  ) %>% 
  group_by(School) %>% 
  summarise(
    Stores1000m = n()
  ) %>% 
  st_drop_geometry %>% 
  {. ->> combo_sum_1000}

combo_sum_1000

# # A tibble: 134 x 2
#   School                                    Stores1000m
# * <chr>                                           <int>
# 1 Academy At Boys & Girls Town                       24
# 2 AcademyOf Envt Sci/math Elementary School          18
# 3 AcademyOf Envt Sci/math Middle School               2
# 4 Adams Elementary School                            12
# 5 Ames Visual/perf. Arts                             25
# 6 Ashland Elementary And Br.                         49
# 7 Aspire Academy                                     26
# 8 Beaumont Cte High School                           46
# 9 Bishop DuBourg High School                          4
# 10 Bryan Hill Elementary School                       19
# # ... with 124 more rows

2000 米也一样。

all_combos_2 %>%
  filter(
    distance <= 2000
  ) %>% 
  group_by(School) %>% 
  summarise(
    Stores2000m = n()
  ) %>% 
  st_drop_geometry %>% 
  {. ->> combo_sum_2000}

combo_sum_2000

# # A tibble: 134 x 2
#   School                                    Stores2000m
# * <chr>                                           <int>
# 1 Academy At Boys & Girls Town                       24
# 2 AcademyOf Envt Sci/math Elementary School          18
# 3 AcademyOf Envt Sci/math Middle School               2
# 4 Adams Elementary School                            12
# 5 Ames Visual/perf. Arts                             25
# 6 Ashland Elementary And Br.                         49
# 7 Aspire Academy                                     28
# 8 Beaumont Cte High School                           52
# 9 Bishop DuBourg High School                          4
# 10 Bryan Hill Elementary School                       19
# # ... with 124 more rows

然后将两个表连接在一起。

combo_sum_1000 %>% 
  full_join(combo_sum_2000) %>% 
  {. ->> combo_sum_joined}

combo_sum_joined

# # A tibble: 134 x 3
#    School                                    Stores1000m Stores2000m
#    <chr>                                           <int>       <int>
# 1  Academy At Boys & Girls Town                       24          24
# 2  AcademyOf Envt Sci/math Elementary School          18          18
# 3  AcademyOf Envt Sci/math Middle School               2           2
# 4  Adams Elementary School                            12          12
# 5  Ames Visual/perf. Arts                             25          25
# 6  Ashland Elementary And Br.                         49          49
# 7  Aspire Academy                                     26          28
# 8  Beaumont Cte High School                           46          52
# 9  Bishop DuBourg High School                          4           4
# 10 Bryan Hill Elementary School                       19          19
# # ... with 124 more rows

缓冲区重叠时的计数点

Counting points when buffers are overlapping

r

geospatial

geo

sf

针对更大数据的更新、更有效的答案：