如何保留空间特征的唯一交集并移除边界外的所有内容?
How to keep the only intersection of the spatial features & remove everything outside of a boundary?
我正试图摆脱落在我读取的 shapefile 边界之外的空间几何。如果没有像 Photoshop 这样的手动软件,是否可以做到这一点?或者我手动删除跨越城市边界的区域。比如我取出了14张,结果是这样的:
我已经提供了所有数据子集和密钥,您可以自己测试。代码脚本如下,数据集为https://github.com/THsTestingGround/SO_geoSpatial_crop_Quest.
我在将 Geomtry
转换为 sf
后完成了 st_intersection(gainsville_df$Geomtry$x, gnv_poly$geometry)
,但我不知道下一步该怎么做才能摆脱这些部分。
library(sf)
library(tigris)
library(tidyverse)
library(tidycensus)
library(readr)
library(data.table)
#reading the shapefile
gnv_poly <- sf::st_read("PATH\GIS_cgbound\cgbound.shp") %>%
sf::st_transform(crs = 4326) %>%
sf::st_polygonize() %>%
sf::st_union()
#I have taken the "geometry" of latitude and longitude because it was corrupting my csv, but we can rebuild like so
gnv_latlon <- readr::read_csv("new_dataframe_data.csv") %>%
dplyr::select(ID,
Latitude,
Longitude,
Location) %>%
dplyr::mutate(Location = gsub(x= Location, pattern = "POINT \(|\)", replacement = "")) %>%
tidyr::separate(col = "Location", into = c("lon", "lat"), sep = " ") %>%
sf::st_as_sf(coords = c(4,5)) %>%
sf::st_set_crs(4326)
#then you can match the ID from gnv_latlon to
gainsville_df <- fread("new_dataframe_data.csv", drop = c("Latitude","Longitude", "Census Code"))
gainsville_df <- merge(gnv_latlon, gainsville_df, by = "ID")
#remove latitude and longitude points that fall outside of the polygon
dplyr::mutate(gainsville_df, check = as.vector(sf::st_intersects(x = gnv_latlon, y = gnv_poly, sparse = FALSE))) -> outliers_before
sf::st_filter(x= outliers_before, y= gnv_poly, predicate= st_intersects) -> gainsville_df
#Took out my census api key because of a feed back from a SO member. Please add a comment
#if you would like my census key.
#I use this function from tidycensus to retrieve the country shapfiles.
alachua <- tidycensus::get_acs(state = "FL", county = "Alachua", geography = "tract", geometry = T, variables = "B01003_001")
gainsville_df$Geomtry <- NULL
gainsville_df$Geomtry <- alachua$geometry[match(as.character(gainsville_df$`Geo ID`), alachua$GEOID)]
#gets us the first graph with bounry
ggplot() +
geom_sf(data = gainsville_df,aes(geometry= Geomtry, fill= Population), alpha= 0.2) +
coord_sf(crs = "+init=epsg:4326")+
geom_sf(data= gnv_poly) #with alpha added, we get the transparent boundary
现在我想获取第二张图像,而无需进行任何未来的手动操作。
由此.....
对此,可能吗?
找到这个 但这里的人只想从一个 shapefile 中删除边界。我试图将其操纵为零。
EDIT 这是我在 SymbolixAU 方向之后尝试过的,但是我的 idx
变量是来自 1:7
的数字
fl <- sf::st_read("PATH\GIS_cgbound\cgbound.shp") %>% sf::st_transform(crs = 4326)
gainsville_df$Geomtry <- sf::st_as_sf(gainsville_df$Geomtry) %>% sf::st_transform(crs= 4326)
#normal boundry plot
plot( fl[, "geometry"] )
# And we can make a boundary by selecting some of the goemetries and union-ing them
boundary <- fl[ gnv_poly$geometry %in% gainsville_df$Geomtry, ]
boundary <- sf::st_union( fl ) %>% sf::st_as_sf()
## So now 'boundary' represents the area you want to cut out of your total shapes
## So you can find the intersection by an appropriate method
## st_contains will tell you all the shapes from 'fl' contained within the boundary
idx <- sf::st_contains(x = boundary, y = fl)
#doesn't work, thus no way of knowing the overlaps
#plot( fl[ idx[[1]], "geometry" ] )
#several more plots which i can't make sense of
plot( fl[ st_intersection(gainsville_df$Geomtry, gnv_poly$geometry), ])
plot(gainsville_df$Geomtry) #this just plots tracts
我打算用library(mapdeck)
来绘制一切,主要是因为它是我开发的一个库,所以我非常熟悉它。它使用 Mapbox 地图,因此您需要一个 Mapbox 令牌才能使用它。
首先,获取数据
library(sf)
library(data.table)
fl <- sf::st_read("~/Documents/github/SO_geoSpatial_crop_Quest/GIS_cgbound/cgbound.shp") %>% sf::st_transform(crs = 4326)
gainsville_df <- fread("~/Documents/github/SO_geoSpatial_crop_Quest/new_dataframe_data.csv")
sf_gainsville <- sf::st_as_sf(gainsville_df, wkt = "Location")
## no need to transform, because it's already in Lon / Lat (?)
sf::st_crs( sf_gainsville ) <- 4326
#install.packages("tidycensus")
library(tidycensus)
tidycensus::census_api_key("21adc0b3d6e900378af9b7910d04110cdd38cd75", install = T, overwrite = T)
alachua <- tidycensus::get_acs(state = "FL", county = "Alachua", geography = "tract", geometry = T, variables = "B01003_001")
alachua <- sf::st_transform( alachua, crs = 4326 )
这就是我们正在处理的问题。我正在绘制多边形和边界路径
library(mapdeck)
set_token( secret::get_secret("MAPBOX") )
## this is what the polygons and the Alachua boundary looks like
mapdeck() %>%
add_polygon(
data = alachua
, fill_colour = "NAME"
) %>%
add_path(
data = fl
, stroke_width = 50
)
首先我要制作边界的多边形
boundary_poly <- sf::st_cast(fl, "POLYGON")
然后我们可以得到那些完全在边界内的多边形
idx <- sf::st_contains(
x = boundary_poly
, y = alachua
)
idx <- unlist( sapply( idx, `[`) )
sf_contain <- alachua[ idx, ]
mapdeck() %>%
add_polygon(
data = sf_contain
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
以及'touch'边界
idx <- sf::st_crosses(
x = fl
, y = alachua
)
idx <- unlist( idx )
sf_crosses <- alachua[ idx, ]
mapdeck() %>%
add_polygon(
data = sf_crosses
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
完全在外面的多边形既不接触边界也不在边界内
sf_outside <- sf::st_difference(
x = alachua
, y = sf::st_union( sf_crosses )
)
sf_outside <- sf::st_difference(
x = sf_outside
, y= sf::st_union( sf_contain )
)
mapdeck() %>%
add_polygon(
data = sf_outside
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
我们需要的是 'cut' 那些触及边界(sf_crosses
)的方法,因此每个多边形都有一个 'inside' 和一个 'outside' 部分
我们需要一次对每个多边形进行操作,'split' 通过与它相交的线。
可能有一种方法可以用 lwgeom::st_split
做到这一点,但我一直收到错误消息
为了解决这个问题,我使用了我的 sfheaders
库的开发版本
# devtools::install_github("dcooley/sfheaders")
res <- lapply( 1:nrow( sf_crosses ), function(x) {
## get the intersection of the polygon and the boundary
sf_int <- sf::st_intersection(
x = sf_crosses[x, ]
, y = fl
)
## we only need lines, not MULTILINES
sf_lines <- sfheaders::sf_cast(
sf_int, "LINESTRING"
)
## put a small buffer around the lines to make them polygons
sf_polys <- sf::st_buffer( sf_lines, dist = 0.0005 )
## Find the difference of these buffers and the polygon
sf_diff <- sf::st_difference(
sf_crosses[x, ]
, sf::st_union( sf_polys )
)
## this result is a MULTIPOLYGON, which is the original polygon from
## sf_crosses[x, ], split by the lines which cross it
sf_diff
})
## The result of this is all the polygons which touch the boundary path have been split
sf_res <- do.call(rbind, res)
所以 sf_res
现在应该是 'touch' 路径中的所有多边形,但在路径穿过它们的地方拆分
mapdeck() %>%
add_polygon(
data = sf_res
, stroke_colour = "#FFFFFF"
, stroke_width = 100
) %>%
add_path(
data = fl
, stroke_colour = "#FF00FF"
)
我们可以通过放大看到这个
现在我们可以找到哪些是在路径内部和路径外部
sf_in <- sf::st_join(
x = sf_res
, y = boundary_poly
, left = FALSE
)
sf_out <- sf::st_difference(
x = sf_res
, y = sf::st_union( boundary_poly )
)
mapdeck() %>%
add_path(
data = fl
, stroke_width = 50
, stroke_colour = "#000000"
) %>%
add_polygon(
data = sf_in
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "in"
) %>%
add_polygon(
data = sf_out
, fill_colour = "NAME"
, palette = "plasma"
, layer_id = "out"
)
现在我们关心的对象都有了
sf_contain
- 完全在边界内的所有多边形
sf_in
- 所有接触内部边界的多边形
sf_out
- 所有接触外部边界的多边形
sf_outside
- 所有其他多边形
mapdeck() %>%
add_path(
data = fl
, stroke_width = 50
, stroke_colour = "#000000"
) %>%
add_polygon(
data = sf_contain
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "contained_within_boundary"
) %>%
add_polygon(
data = sf_in
, fill_colour = "NAME"
, palette = "cividis"
, layer_id = "touching_boundary_inside"
) %>%
add_polygon(
data = sf_out
, fill_colour = "NAME"
, palette = "plasma"
, layer_id = "touching_boundary_outside"
) %>%
add_polygon(
data = sf_outside
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "outside_boundary"
)
我正试图摆脱落在我读取的 shapefile 边界之外的空间几何。如果没有像 Photoshop 这样的手动软件,是否可以做到这一点?或者我手动删除跨越城市边界的区域。比如我取出了14张,结果是这样的:
我已经提供了所有数据子集和密钥,您可以自己测试。代码脚本如下,数据集为https://github.com/THsTestingGround/SO_geoSpatial_crop_Quest.
我在将 Geomtry
转换为 sf
后完成了 st_intersection(gainsville_df$Geomtry$x, gnv_poly$geometry)
,但我不知道下一步该怎么做才能摆脱这些部分。
library(sf)
library(tigris)
library(tidyverse)
library(tidycensus)
library(readr)
library(data.table)
#reading the shapefile
gnv_poly <- sf::st_read("PATH\GIS_cgbound\cgbound.shp") %>%
sf::st_transform(crs = 4326) %>%
sf::st_polygonize() %>%
sf::st_union()
#I have taken the "geometry" of latitude and longitude because it was corrupting my csv, but we can rebuild like so
gnv_latlon <- readr::read_csv("new_dataframe_data.csv") %>%
dplyr::select(ID,
Latitude,
Longitude,
Location) %>%
dplyr::mutate(Location = gsub(x= Location, pattern = "POINT \(|\)", replacement = "")) %>%
tidyr::separate(col = "Location", into = c("lon", "lat"), sep = " ") %>%
sf::st_as_sf(coords = c(4,5)) %>%
sf::st_set_crs(4326)
#then you can match the ID from gnv_latlon to
gainsville_df <- fread("new_dataframe_data.csv", drop = c("Latitude","Longitude", "Census Code"))
gainsville_df <- merge(gnv_latlon, gainsville_df, by = "ID")
#remove latitude and longitude points that fall outside of the polygon
dplyr::mutate(gainsville_df, check = as.vector(sf::st_intersects(x = gnv_latlon, y = gnv_poly, sparse = FALSE))) -> outliers_before
sf::st_filter(x= outliers_before, y= gnv_poly, predicate= st_intersects) -> gainsville_df
#Took out my census api key because of a feed back from a SO member. Please add a comment
#if you would like my census key.
#I use this function from tidycensus to retrieve the country shapfiles.
alachua <- tidycensus::get_acs(state = "FL", county = "Alachua", geography = "tract", geometry = T, variables = "B01003_001")
gainsville_df$Geomtry <- NULL
gainsville_df$Geomtry <- alachua$geometry[match(as.character(gainsville_df$`Geo ID`), alachua$GEOID)]
#gets us the first graph with bounry
ggplot() +
geom_sf(data = gainsville_df,aes(geometry= Geomtry, fill= Population), alpha= 0.2) +
coord_sf(crs = "+init=epsg:4326")+
geom_sf(data= gnv_poly) #with alpha added, we get the transparent boundary
现在我想获取第二张图像,而无需进行任何未来的手动操作。
由此.....
对此,可能吗?
找到这个
EDIT 这是我在 SymbolixAU 方向之后尝试过的,但是我的 idx
变量是来自 1:7
fl <- sf::st_read("PATH\GIS_cgbound\cgbound.shp") %>% sf::st_transform(crs = 4326)
gainsville_df$Geomtry <- sf::st_as_sf(gainsville_df$Geomtry) %>% sf::st_transform(crs= 4326)
#normal boundry plot
plot( fl[, "geometry"] )
# And we can make a boundary by selecting some of the goemetries and union-ing them
boundary <- fl[ gnv_poly$geometry %in% gainsville_df$Geomtry, ]
boundary <- sf::st_union( fl ) %>% sf::st_as_sf()
## So now 'boundary' represents the area you want to cut out of your total shapes
## So you can find the intersection by an appropriate method
## st_contains will tell you all the shapes from 'fl' contained within the boundary
idx <- sf::st_contains(x = boundary, y = fl)
#doesn't work, thus no way of knowing the overlaps
#plot( fl[ idx[[1]], "geometry" ] )
#several more plots which i can't make sense of
plot( fl[ st_intersection(gainsville_df$Geomtry, gnv_poly$geometry), ])
plot(gainsville_df$Geomtry) #this just plots tracts
我打算用library(mapdeck)
来绘制一切,主要是因为它是我开发的一个库,所以我非常熟悉它。它使用 Mapbox 地图,因此您需要一个 Mapbox 令牌才能使用它。
首先,获取数据
library(sf)
library(data.table)
fl <- sf::st_read("~/Documents/github/SO_geoSpatial_crop_Quest/GIS_cgbound/cgbound.shp") %>% sf::st_transform(crs = 4326)
gainsville_df <- fread("~/Documents/github/SO_geoSpatial_crop_Quest/new_dataframe_data.csv")
sf_gainsville <- sf::st_as_sf(gainsville_df, wkt = "Location")
## no need to transform, because it's already in Lon / Lat (?)
sf::st_crs( sf_gainsville ) <- 4326
#install.packages("tidycensus")
library(tidycensus)
tidycensus::census_api_key("21adc0b3d6e900378af9b7910d04110cdd38cd75", install = T, overwrite = T)
alachua <- tidycensus::get_acs(state = "FL", county = "Alachua", geography = "tract", geometry = T, variables = "B01003_001")
alachua <- sf::st_transform( alachua, crs = 4326 )
这就是我们正在处理的问题。我正在绘制多边形和边界路径
library(mapdeck)
set_token( secret::get_secret("MAPBOX") )
## this is what the polygons and the Alachua boundary looks like
mapdeck() %>%
add_polygon(
data = alachua
, fill_colour = "NAME"
) %>%
add_path(
data = fl
, stroke_width = 50
)
首先我要制作边界的多边形
boundary_poly <- sf::st_cast(fl, "POLYGON")
然后我们可以得到那些完全在边界内的多边形
idx <- sf::st_contains(
x = boundary_poly
, y = alachua
)
idx <- unlist( sapply( idx, `[`) )
sf_contain <- alachua[ idx, ]
mapdeck() %>%
add_polygon(
data = sf_contain
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
以及'touch'边界
idx <- sf::st_crosses(
x = fl
, y = alachua
)
idx <- unlist( idx )
sf_crosses <- alachua[ idx, ]
mapdeck() %>%
add_polygon(
data = sf_crosses
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
完全在外面的多边形既不接触边界也不在边界内
sf_outside <- sf::st_difference(
x = alachua
, y = sf::st_union( sf_crosses )
)
sf_outside <- sf::st_difference(
x = sf_outside
, y= sf::st_union( sf_contain )
)
mapdeck() %>%
add_polygon(
data = sf_outside
, fill_colour = "NAME"
) %>%
add_path(
data = fl
)
我们需要的是 'cut' 那些触及边界(sf_crosses
)的方法,因此每个多边形都有一个 'inside' 和一个 'outside' 部分
我们需要一次对每个多边形进行操作,'split' 通过与它相交的线。
可能有一种方法可以用 lwgeom::st_split
做到这一点,但我一直收到错误消息
为了解决这个问题,我使用了我的 sfheaders
库的开发版本
# devtools::install_github("dcooley/sfheaders")
res <- lapply( 1:nrow( sf_crosses ), function(x) {
## get the intersection of the polygon and the boundary
sf_int <- sf::st_intersection(
x = sf_crosses[x, ]
, y = fl
)
## we only need lines, not MULTILINES
sf_lines <- sfheaders::sf_cast(
sf_int, "LINESTRING"
)
## put a small buffer around the lines to make them polygons
sf_polys <- sf::st_buffer( sf_lines, dist = 0.0005 )
## Find the difference of these buffers and the polygon
sf_diff <- sf::st_difference(
sf_crosses[x, ]
, sf::st_union( sf_polys )
)
## this result is a MULTIPOLYGON, which is the original polygon from
## sf_crosses[x, ], split by the lines which cross it
sf_diff
})
## The result of this is all the polygons which touch the boundary path have been split
sf_res <- do.call(rbind, res)
所以 sf_res
现在应该是 'touch' 路径中的所有多边形,但在路径穿过它们的地方拆分
mapdeck() %>%
add_polygon(
data = sf_res
, stroke_colour = "#FFFFFF"
, stroke_width = 100
) %>%
add_path(
data = fl
, stroke_colour = "#FF00FF"
)
我们可以通过放大看到这个
现在我们可以找到哪些是在路径内部和路径外部
sf_in <- sf::st_join(
x = sf_res
, y = boundary_poly
, left = FALSE
)
sf_out <- sf::st_difference(
x = sf_res
, y = sf::st_union( boundary_poly )
)
mapdeck() %>%
add_path(
data = fl
, stroke_width = 50
, stroke_colour = "#000000"
) %>%
add_polygon(
data = sf_in
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "in"
) %>%
add_polygon(
data = sf_out
, fill_colour = "NAME"
, palette = "plasma"
, layer_id = "out"
)
现在我们关心的对象都有了
sf_contain
- 完全在边界内的所有多边形sf_in
- 所有接触内部边界的多边形sf_out
- 所有接触外部边界的多边形sf_outside
- 所有其他多边形
mapdeck() %>%
add_path(
data = fl
, stroke_width = 50
, stroke_colour = "#000000"
) %>%
add_polygon(
data = sf_contain
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "contained_within_boundary"
) %>%
add_polygon(
data = sf_in
, fill_colour = "NAME"
, palette = "cividis"
, layer_id = "touching_boundary_inside"
) %>%
add_polygon(
data = sf_out
, fill_colour = "NAME"
, palette = "plasma"
, layer_id = "touching_boundary_outside"
) %>%
add_polygon(
data = sf_outside
, fill_colour = "NAME"
, palette = "viridis"
, layer_id = "outside_boundary"
)