在 R 中使用 shapefile 过滤经纬度点
Filter lat-lon points using shapefile in R
我正在尝试使用 shapefile 过滤热带气旋的经纬度点:
我正在做与之前 post 类似的问题:
但在这种情况下,我使用的是不规则形状文件(不是方框)。
我只想过滤具有相同标识符的点(下面示例数据中的 "SN" 列)通过 shapefile。因此,只会过滤唯一的 TC。
这是数据的一个子集:
dat <- structure(list(SN = c(200608L, 200608L, 200608L, 200608L, 200608L,
200608L, 200608L, 200608L, 200608L, 200608L, 200608L, 200608L,
200610L, 200610L, 200610L, 200612L, 200612L, 200612L, 200612L,
200612L, 200612L, 200612L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L), CY = c(8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L), Year = c(2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L), Month = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), Day = c(9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 9L, 9L, 9L, 14L,
14L, 14L, 15L, 15L, 15L, 15L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L), Hour = c(0L,
6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L,
6L, 12L, 18L, 0L, 6L, 12L, 18L, 6L, 12L, 18L, 0L, 6L, 12L, 18L,
0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L), Lat = c(23.7, 24.7,
25.3, 25.9, 26.4, 27, 27.2, 27.4, 27.7, 28.1, 28.5, 28.9, 22.8,
22.9, 22.4, 18.7, 19.8, 20.2, 21, 22.4, 23.9, 26.9, 17.4, 17,
16.7, 16.6, 16.5, 16.4, 16.3, 16.2, 16, 15.8, 15.5, 15.6, 15.9,
16.1, 16.6, 17.2, 17.9, 18.7, 19.4, 20.2, 21, 21.9, 22.7, 23.4,
24, 24.4, 24.8, 25.2, 25.6, 26), Lon = c(128.4, 126.9, 125.3,
123.9, 122.5, 121.1, 119.9, 118.7, 117.4, 115.8, 115, 114.4,
119.8, 118.6, 117.7, 131.3, 132.4, 133.9, 135.6, 137.2, 139.1,
140.4, 135.4, 135.2, 134.8, 134.3, 133.7, 133, 132.3, 131.5,
130.7, 129.9, 129.2, 128.5, 128, 127.6, 127.1, 126.6, 126.1,
125.6, 124.8, 124.2, 123.5, 122.8, 122, 121.2, 119.9, 119.4,
119, 118.6, 118.3, 118)), class = "data.frame", row.names = c(NA,
-52L))
预期结果:
我的预期结果与上面的 link 相似。 csv 文件中的另外两个列指示 TC 是否在 shapefile 中。
每个 TC 都有一个唯一的标识符:SN 列。因此,如果具有相同 SN 编号的经纬度点在 shapefile 中,它们将被标记为 TRUE(所有具有相同 SN 编号的经纬度点)。
dat[SN %in% dat[inBounds == TRUE, unique(SN)], passesThroughBox := T ]
dat[is.na(passesThroughBox), passesThroughBox := F]
关于我如何在 R 中执行此操作的任何建议?
真诚的,
林兹
不是最好的样本数据,因为似乎没有一个观察结果通过 shapefile 的 bbox。
不过,她是我解决问题的方法..
library( tidyverse )
library( sf )
sf 和 tidyverse 解决方案
#first, get the boundaries of the shapefile
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox()
# xmin ymin xmax ymax
# 122.29929 11.71056 124.42607 14.50061
dat %>%
#check if measurement is withing boundaries (yes, or not (no))
mutate( passes_through_box = ifelse( Lat >= bbox[2] & Lat <= bbox[4] & Lon >= bbox[1] & Lon <= bbox[3], "yes", "no" ) ) %>%
#group by SN
group_by( SN ) %>%
#check if any value of 'passes_through_box' in a group is "yes", if so 'yes', else 'no'
mutate( passes_through_box_anyime = ifelse( any( passes_through_box == "yes"), "yes", "no" ) )
# # A tibble: 52 x 10
# # Groups: SN [4]
# SN CY Year Month Day Hour Lat Lon passes_through_box passes_through_box_anyime
# <int> <int> <int> <int> <int> <int> <dbl> <dbl> <chr> <chr>
# 1 200608 8 2006 8 9 0 23.7 128. no no
# 2 200608 8 2006 8 9 6 24.7 127. no no
# 3 200608 8 2006 8 9 12 25.3 125. no no
# 4 200608 8 2006 8 9 18 25.9 124. no no
# 5 200608 8 2006 8 10 0 26.4 122. no no
# 6 200608 8 2006 8 10 6 27 121. no no
所有sf解决方案
library( tidyverse )
library( sf )
#first, get the boundaries of the shapefile
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox() %>% st_as_sfc( crs = "+proj=longlat +datum=WGS84" )
# xmin ymin xmax ymax
# 122.29929 11.71056 124.42607 14.50061
#create aspatial data.frame
spdf <- st_as_sf( x = dat,
coords = c("Lon", "Lat"),
crs = "+proj=longlat +datum=WGS84" )
spdf %>%
#check if a line the spatial df intersecgt with the defined boundary-box
mutate( passes_through_box = as.numeric( st_intersects(spdf, bbox) ) ) %>%
group_by( SN ) %>%
mutate( passes_through_box_anyime = ifelse( any( passes_through_box == 1), "yes", "no" ) )
为了完整起见,另一种 sf 方法
library( tidyverse )
library( sf )
#first, get the boundaries of the shapefile and create a box
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox() %>% st_as_sfc( crs = "+proj=longlat +datum=WGS84" )
#create a spatial points data.frame using the sample data provided
spdf <- st_as_sf( x = dat,
coords = c("Lon", "Lat"),
crs = "+proj=longlat +datum=WGS84" )
#create a spatial lines data.frame, bases on lat-lon groupes by SN
sldf <- spdf %>%
group_by( SN ) %>%
summarise( m = mean( Year ) ) %>%
st_cast( "LINESTRING" ) %>%
select( -m )
#let's see the printed results
library(mapview)
mapview( list(bbox, sldf) )
#any intersections? ... #nope, no intersections
st_intersects( bbox, sldf )
# although coordinates are longitude/latitude, st_intersects assumes that they are planar
# Sparse geometry binary predicate list of length 1, where the predicate was `intersects'
# 1: (empty)
我正在尝试使用 shapefile 过滤热带气旋的经纬度点:
我正在做与之前 post 类似的问题:
但在这种情况下,我使用的是不规则形状文件(不是方框)。
我只想过滤具有相同标识符的点(下面示例数据中的 "SN" 列)通过 shapefile。因此,只会过滤唯一的 TC。
这是数据的一个子集:
dat <- structure(list(SN = c(200608L, 200608L, 200608L, 200608L, 200608L,
200608L, 200608L, 200608L, 200608L, 200608L, 200608L, 200608L,
200610L, 200610L, 200610L, 200612L, 200612L, 200612L, 200612L,
200612L, 200612L, 200612L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L, 200709L, 200709L,
200709L, 200709L, 200709L, 200709L, 200709L), CY = c(8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 10L, 10L, 10L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L), Year = c(2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L), Month = c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), Day = c(9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 9L, 9L, 9L, 14L,
14L, 14L, 15L, 15L, 15L, 15L, 12L, 12L, 12L, 13L, 13L, 13L, 13L,
14L, 14L, 14L, 14L, 15L, 15L, 15L, 15L, 16L, 16L, 16L, 16L, 17L,
17L, 17L, 17L, 18L, 18L, 18L, 18L, 19L, 19L, 19L), Hour = c(0L,
6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L,
6L, 12L, 18L, 0L, 6L, 12L, 18L, 6L, 12L, 18L, 0L, 6L, 12L, 18L,
0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L, 12L), Lat = c(23.7, 24.7,
25.3, 25.9, 26.4, 27, 27.2, 27.4, 27.7, 28.1, 28.5, 28.9, 22.8,
22.9, 22.4, 18.7, 19.8, 20.2, 21, 22.4, 23.9, 26.9, 17.4, 17,
16.7, 16.6, 16.5, 16.4, 16.3, 16.2, 16, 15.8, 15.5, 15.6, 15.9,
16.1, 16.6, 17.2, 17.9, 18.7, 19.4, 20.2, 21, 21.9, 22.7, 23.4,
24, 24.4, 24.8, 25.2, 25.6, 26), Lon = c(128.4, 126.9, 125.3,
123.9, 122.5, 121.1, 119.9, 118.7, 117.4, 115.8, 115, 114.4,
119.8, 118.6, 117.7, 131.3, 132.4, 133.9, 135.6, 137.2, 139.1,
140.4, 135.4, 135.2, 134.8, 134.3, 133.7, 133, 132.3, 131.5,
130.7, 129.9, 129.2, 128.5, 128, 127.6, 127.1, 126.6, 126.1,
125.6, 124.8, 124.2, 123.5, 122.8, 122, 121.2, 119.9, 119.4,
119, 118.6, 118.3, 118)), class = "data.frame", row.names = c(NA,
-52L))
预期结果:
我的预期结果与上面的 link 相似。 csv 文件中的另外两个列指示 TC 是否在 shapefile 中。 每个 TC 都有一个唯一的标识符:SN 列。因此,如果具有相同 SN 编号的经纬度点在 shapefile 中,它们将被标记为 TRUE(所有具有相同 SN 编号的经纬度点)。
dat[SN %in% dat[inBounds == TRUE, unique(SN)], passesThroughBox := T ]
dat[is.na(passesThroughBox), passesThroughBox := F]
关于我如何在 R 中执行此操作的任何建议?
真诚的, 林兹
不是最好的样本数据,因为似乎没有一个观察结果通过 shapefile 的 bbox。 不过,她是我解决问题的方法..
library( tidyverse )
library( sf )
sf 和 tidyverse 解决方案
#first, get the boundaries of the shapefile
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox()
# xmin ymin xmax ymax
# 122.29929 11.71056 124.42607 14.50061
dat %>%
#check if measurement is withing boundaries (yes, or not (no))
mutate( passes_through_box = ifelse( Lat >= bbox[2] & Lat <= bbox[4] & Lon >= bbox[1] & Lon <= bbox[3], "yes", "no" ) ) %>%
#group by SN
group_by( SN ) %>%
#check if any value of 'passes_through_box' in a group is "yes", if so 'yes', else 'no'
mutate( passes_through_box_anyime = ifelse( any( passes_through_box == "yes"), "yes", "no" ) )
# # A tibble: 52 x 10
# # Groups: SN [4]
# SN CY Year Month Day Hour Lat Lon passes_through_box passes_through_box_anyime
# <int> <int> <int> <int> <int> <int> <dbl> <dbl> <chr> <chr>
# 1 200608 8 2006 8 9 0 23.7 128. no no
# 2 200608 8 2006 8 9 6 24.7 127. no no
# 3 200608 8 2006 8 9 12 25.3 125. no no
# 4 200608 8 2006 8 9 18 25.9 124. no no
# 5 200608 8 2006 8 10 0 26.4 122. no no
# 6 200608 8 2006 8 10 6 27 121. no no
所有sf解决方案
library( tidyverse )
library( sf )
#first, get the boundaries of the shapefile
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox() %>% st_as_sfc( crs = "+proj=longlat +datum=WGS84" )
# xmin ymin xmax ymax
# 122.29929 11.71056 124.42607 14.50061
#create aspatial data.frame
spdf <- st_as_sf( x = dat,
coords = c("Lon", "Lat"),
crs = "+proj=longlat +datum=WGS84" )
spdf %>%
#check if a line the spatial df intersecgt with the defined boundary-box
mutate( passes_through_box = as.numeric( st_intersects(spdf, bbox) ) ) %>%
group_by( SN ) %>%
mutate( passes_through_box_anyime = ifelse( any( passes_through_box == 1), "yes", "no" ) )
为了完整起见,另一种 sf 方法
library( tidyverse )
library( sf )
#first, get the boundaries of the shapefile and create a box
bbox <- read_sf(dsn = "./Bicol_Region/Bicol_region.shp") %>% st_bbox() %>% st_as_sfc( crs = "+proj=longlat +datum=WGS84" )
#create a spatial points data.frame using the sample data provided
spdf <- st_as_sf( x = dat,
coords = c("Lon", "Lat"),
crs = "+proj=longlat +datum=WGS84" )
#create a spatial lines data.frame, bases on lat-lon groupes by SN
sldf <- spdf %>%
group_by( SN ) %>%
summarise( m = mean( Year ) ) %>%
st_cast( "LINESTRING" ) %>%
select( -m )
#let's see the printed results
library(mapview)
mapview( list(bbox, sldf) )
#any intersections? ... #nope, no intersections
st_intersects( bbox, sldf )
# although coordinates are longitude/latitude, st_intersects assumes that they are planar
# Sparse geometry binary predicate list of length 1, where the predicate was `intersects'
# 1: (empty)