使用 sf 快速制作 SpatialPointsDataFrame
Make a SpatialPointsDataFrame with sf the fast way
我尝试使用 R 中的 sp
包来完成的任务非常简单,但我正在尝试学习 sf
因此我的问题。我正在尝试在 R 中创建点的形状。我有很多点,所以它必须高效。我在 sp
和 sf
中都成功做到了,但是 sf 方法很慢。作为 sf
的新手,我感觉我没有以最有效的方式来做这件事。
我制作了 3 个不同的功能,它们做同样的事情:
1) 100% sp
f_rgdal <- function(dat) {
coordinates(dat) <- ~x+y
}
2) 100% sf
(可能不好...)
f_sf <- function(dat) {
dat <- st_sfc(
lapply(
apply(dat[,c("x", "y")], 1, list), function(xx) st_point(xx[[1]])
)
)
}
3) 两者的混合:
f_rgdal_sp <- function(dat) {
coordinates(dat) <- ~x+y
dat <- as(dat, "sf")
}
如果我对它们进行基准测试,您会发现函数 2 和 3 都比函数 1 慢得多:
set.seed(1234)
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
library(sp)
library(sf)
library(rbenchmark)
benchmark(f_rgdal(dd), f_sf(dd), f_rgdal_sp(dd), columns = c("test", "elapsed"))
test elapsed
1 f_rgdal(dd) 0.22
3 f_rgdal_sp(dd) 4.82
2 f_sf(dd) 4.08
他们是加速 sf
的方法吗?最后,我想使用 st_write
,它比 writeOGR
更快,所以留在 sp
并不理想。
library(sf)
library(microbenchmark)
set.seed(1234)
nb_pt <- 100
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
print(st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT"))
## Geometry set for 100 features
## geometry type: POINT
## dimension: XY
## bbox: xmin: 0.9495756 ymin: 1.110341 xmax: 99.21504 ymax: 49.93704
## epsg (SRID): NA
## proj4string: NA
## First 5 geometries:
## POINT (11.3703411305323 1.77283635130152)
## POINT (62.2299404814839 28.253805602435)
## POINT (60.9274732880294 14.0128888073377)
## POINT (62.3379441676661 10.2098158211447)
## POINT (86.0915383556858 6.68694493360817)
microbenchmark(
sf=st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT")
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## sf 1.834133 1.960914 2.608143 2.0314 2.280842 39.04158 100
一个更"compact"的选择可以是:
library(sf)
set.seed(1234)
nb_pt <- 10000
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
sf <- sf::st_as_sf(dd, coords = c("x","y"))
sf
#> Simple feature collection with 10000 features and 1 field
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 0.03418126 ymin: 0.02131674 xmax: 99.95938 ymax: 49.99873
#> epsg (SRID): NA
#> proj4string: NA
#> First 10 features:
#> f1 geometry
#> 1 -1.81689753 POINT (11.3703411305323 10....
#> 2 0.62716684 POINT (62.2299404814839 24....
#> 3 0.51809210 POINT (60.9274732880294 31....
#> 4 0.14092183 POINT (62.3379441676661 47....
#> 5 1.45727195 POINT (86.0915383556858 8.9...
#> 6 -0.49359652 POINT (64.0310605289415 14....
#> 7 -2.12224406 POINT (0.94957563560456 19....
#> 8 -0.13356660 POINT (23.2550506014377 3.8...
#> 9 -0.42760035 POINT (66.6083758231252 14....
#> 10 0.08779481 POINT (51.4251141343266 23....
它具有保留数据属性的优势,并且对于较大的数据集似乎更快:
library(microbenchmark)
microbenchmark::microbenchmark(
st_cast = st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT"),
st_asf = sf::st_as_sf(dd, coords = c("x","y"))
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> st_cast 208.6751 256.8995 294.2232 284.2213 316.1777 454.6856 100
#> st_asf 157.1974 176.6357 207.9863 200.1610 226.1047 323.5700 100
我尝试使用 R 中的 sp
包来完成的任务非常简单,但我正在尝试学习 sf
因此我的问题。我正在尝试在 R 中创建点的形状。我有很多点,所以它必须高效。我在 sp
和 sf
中都成功做到了,但是 sf 方法很慢。作为 sf
的新手,我感觉我没有以最有效的方式来做这件事。
我制作了 3 个不同的功能,它们做同样的事情:
1) 100% sp
f_rgdal <- function(dat) {
coordinates(dat) <- ~x+y
}
2) 100% sf
(可能不好...)
f_sf <- function(dat) {
dat <- st_sfc(
lapply(
apply(dat[,c("x", "y")], 1, list), function(xx) st_point(xx[[1]])
)
)
}
3) 两者的混合:
f_rgdal_sp <- function(dat) {
coordinates(dat) <- ~x+y
dat <- as(dat, "sf")
}
如果我对它们进行基准测试,您会发现函数 2 和 3 都比函数 1 慢得多:
set.seed(1234)
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
library(sp)
library(sf)
library(rbenchmark)
benchmark(f_rgdal(dd), f_sf(dd), f_rgdal_sp(dd), columns = c("test", "elapsed"))
test elapsed
1 f_rgdal(dd) 0.22
3 f_rgdal_sp(dd) 4.82
2 f_sf(dd) 4.08
他们是加速 sf
的方法吗?最后,我想使用 st_write
,它比 writeOGR
更快,所以留在 sp
并不理想。
library(sf)
library(microbenchmark)
set.seed(1234)
nb_pt <- 100
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
print(st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT"))
## Geometry set for 100 features
## geometry type: POINT
## dimension: XY
## bbox: xmin: 0.9495756 ymin: 1.110341 xmax: 99.21504 ymax: 49.93704
## epsg (SRID): NA
## proj4string: NA
## First 5 geometries:
## POINT (11.3703411305323 1.77283635130152)
## POINT (62.2299404814839 28.253805602435)
## POINT (60.9274732880294 14.0128888073377)
## POINT (62.3379441676661 10.2098158211447)
## POINT (86.0915383556858 6.68694493360817)
microbenchmark(
sf=st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT")
)
## Unit: milliseconds
## expr min lq mean median uq max neval
## sf 1.834133 1.960914 2.608143 2.0314 2.280842 39.04158 100
一个更"compact"的选择可以是:
library(sf)
set.seed(1234)
nb_pt <- 10000
dd <- data.frame(x = runif(nb_pt, 0, 100),
y = runif(nb_pt, 0,50),
f1 = rnorm(nb_pt))
sf <- sf::st_as_sf(dd, coords = c("x","y"))
sf
#> Simple feature collection with 10000 features and 1 field
#> geometry type: POINT
#> dimension: XY
#> bbox: xmin: 0.03418126 ymin: 0.02131674 xmax: 99.95938 ymax: 49.99873
#> epsg (SRID): NA
#> proj4string: NA
#> First 10 features:
#> f1 geometry
#> 1 -1.81689753 POINT (11.3703411305323 10....
#> 2 0.62716684 POINT (62.2299404814839 24....
#> 3 0.51809210 POINT (60.9274732880294 31....
#> 4 0.14092183 POINT (62.3379441676661 47....
#> 5 1.45727195 POINT (86.0915383556858 8.9...
#> 6 -0.49359652 POINT (64.0310605289415 14....
#> 7 -2.12224406 POINT (0.94957563560456 19....
#> 8 -0.13356660 POINT (23.2550506014377 3.8...
#> 9 -0.42760035 POINT (66.6083758231252 14....
#> 10 0.08779481 POINT (51.4251141343266 23....
它具有保留数据属性的优势,并且对于较大的数据集似乎更快:
library(microbenchmark)
microbenchmark::microbenchmark(
st_cast = st_cast(st_sfc(st_multipoint(as.matrix(dd[,1:2]))), "POINT"),
st_asf = sf::st_as_sf(dd, coords = c("x","y"))
)
#> Unit: milliseconds
#> expr min lq mean median uq max neval
#> st_cast 208.6751 256.8995 294.2232 284.2213 316.1777 454.6856 100
#> st_asf 157.1974 176.6357 207.9863 200.1610 226.1047 323.5700 100