rgeos::gCentroid() 和 sf::st_centroid() 返回的值是否不同?
Do the values returned by rgeos::gCentroid() and sf::st_centroid() differ?
问题
由 rgeos::gCentroid()
and sf::st_centroid()
编辑的值 return 是否不同?如果是,怎么做?
上下文
阅读 relevant commands exported by rgeos
section within the r-spatial/sf
wiki, I was thrilled to see that I only needed the sf
package - and no longer needed to import the rgeos
包后 - 计算给定几何体的质心。
然而,sf::st_centroid()
的使用给了我这个警告,which is addressed here:
Warning message:
In st_centroid.sfc(x = comarea606$geometry) :
st_centroid does not give correct centroids for longitude/latitude data
那个警告让我测试了两种质心检索方法之间的相等性,只是为了确保无论使用哪种方法,坐标都是相同的。
虽然我使用 identical()
and %in%
resulted in non-identical matches, all.equal()
和绘制每个方法的质心的地图 似乎 是说这两种方法几乎相同。
为什么一种方法会 return 一组与另一种方法不同的值?
有什么原因吗?
可重现的例子
# load neccessary packages
library( sf )
library( rgeos )
# load data
comarea606 <-
read_sf(
dsn = "https://data.cityofchicago.org/api/geospatial/cauq-8yn6?method=export&format=GeoJSON"
, layer = "OGRGeoJSON"
)
# find the centroid of each polygon
comarea606$centroids <-
st_centroid( x = comarea606$geometry ) %>%
st_geometry()
# Warning message:
# In st_centroid.sfc(x = comarea606$geometry) :
# st_centroid does not give correct centroids for longitude/latitude data
# Ensure the st_centroid() method
# contains identical values to gCentroid()
sf.centroids <-
st_coordinates( x = comarea606$centroids )
rgeos.centroids <-
gCentroid(
spgeom = methods::as(
object = comarea606
, Class = "Spatial"
)
, byid = TRUE
)@coords
# ensure the colnames are the same
colnames( rgeos.centroids ) <-
colnames( sf.centroids )
# Test for equality
identical(
x = sf.centroids
, y = rgeos.centroids
) # [1] FALSE
all.equal(
target = sf.centroids
, current = rgeos.centroids
) # [1] TRUE
# View the first six results
head( sf.centroids )
# X Y
# 1 -87.61868 41.83512
# 2 -87.60322 41.82375
# 3 -87.63242 41.80909
# 4 -87.61786 41.81295
# 5 -87.59618 41.80892
# 6 -87.68752 41.97517
head( rgeos.centroids )
# X Y
# 1 -87.61868 41.83512
# 2 -87.60322 41.82375
# 3 -87.63242 41.80909
# 4 -87.61786 41.81295
# 5 -87.59618 41.80892
# 6 -87.68752 41.97517
# Identify the numbers which aren't identical
sf.centroids[ , "X" ] %in% rgeos.centroids[ , "X" ]
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sf.centroids[ , "Y" ] %in% rgeos.centroids[ , "Y" ]
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# view results
par(
mar = c( 2, 0, 3, 0 )
, bg = "black"
)
plot(
x = comarea606$geometry
, main = "`sf` v. `rgeos`:\nComparing Centroid Coordinates"
, col.main = "white"
, col = "black"
, border = "white"
)
plot(
x = comarea606$centroids
, add = TRUE
, pch = 24
, col = "#B4DDF2"
, bg = "#B4DDF2"
, cex = 1.2
)
points(
x = rgeos.centroids[ , "X" ]
, y = rgeos.centroids[ , "Y" ]
, pch = 24
, col = "#FB0D1B"
, bg = "#FB0D1B"
, cex = 0.6
)
legend(
x = "left"
, legend = c(
"Centroids from `sf::st_coordinate()`"
, "Centroids from `rgeos::gCentroid()`"
)
, col = c( "#B4DDF2", "#FB0D1B" )
, pt.bg = c( "#B4DDF2", "#FB0D1B" )
, pch = 24
, bty = "n"
, cex = 0.9
, text.col = "white"
)
mtext(
adj = 0.99
, line = 1
, side = 1
, cex = 0.9
, text = "Source: Chicago Data Portal"
, col = "white"
)
# end of script #
Session Info
R version 3.4.4 (2018-03-15)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods
[7] base
other attached packages:
[1] rgeos_0.3-26 sf_0.6-0
loaded via a namespace (and not attached):
[1] modeltools_0.2-21 kernlab_0.9-25 reshape2_1.4.3
[4] lattice_0.20-35 colorspace_1.3-2 htmltools_0.3.6
[7] stats4_3.4.4 viridisLite_0.3.0 yaml_2.1.18
[10] utf8_1.1.3 rlang_0.2.0 R.oo_1.21.0
[13] e1071_1.6-8 pillar_1.2.1 withr_2.1.2
[16] DBI_0.8 prabclus_2.2-6 R.utils_2.6.0
[19] sp_1.2-7 fpc_2.1-11 plyr_1.8.4
[22] robustbase_0.92-8 stringr_1.3.0 munsell_0.4.3
[25] gtable_0.2.0 raster_2.6-7 R.methodsS3_1.7.1
[28] devtools_1.13.5 mvtnorm_1.0-7 memoise_1.1.0
[31] evaluate_0.10.1 knitr_1.20 flexmix_2.3-14
[34] class_7.3-14 DEoptimR_1.0-8 trimcluster_0.1-2
[37] Rcpp_0.12.16 udunits2_0.13 scales_0.5.0
[40] backports_1.1.2 diptest_0.75-7 classInt_0.1-24
[43] squash_1.0.8 gridExtra_2.3 ggplot2_2.2.1
[46] digest_0.6.15 stringi_1.1.6 grid_3.4.4
[49] rprojroot_1.3-2 rgdal_1.2-18 cli_1.0.0
[52] tools_3.4.4 magrittr_1.5 lazyeval_0.2.1
[55] tibble_1.4.2 cluster_2.0.6 crayon_1.3.4
[58] whisker_0.3-2 dendextend_1.7.0 MASS_7.3-49
[61] assertthat_0.2.0 rmarkdown_1.9 rstudioapi_0.7
[64] viridis_0.5.0 mclust_5.4 units_0.5-1
[67] nnet_7.3-12 compiler_3.4.4
这个问题简化为:all.equal()
与 identical()
有何不同,我们查看这些函数的文档:
all.equal(x, y) is a utility to compare R objects x and y testing ‘near equality’.
和 identical()
The safe and reliable way to test two objects for being exactly equal. It returns TRUE in this case, FALSE in every other case.
让我们仔细看看 all.equal.numeric()
,这是对这两个对象的调用,因为 return "double"
和 typeof()
都是如此。我们看到 all.equal.numeric()
中有一个 tolerance
参数,默认设置为 sqrt(.Machine$double.eps)
。 .Machine$double.eps
是您的机器可以添加到 1
并能够将其与 1
区分开来的最小数字。这不准确,但在那个数量级上。 all.equal.numeric()
本质上是检查一个向量中的所有值是否都是 near()
另一个向量中的所有值。您可以查看源代码(主要是错误检查)以确切了解它是如何做到这一点的。
为了让自己相信它们实际上不是 identical()
,请尝试查看 sf.centroids - rgeos.centroids
的输出。
head(sf.centroids - rgeos.centroids)
# X Y
# 1 -5.056506e-10 2.623175e-09
# 2 -2.961613e-09 -4.269602e-09
# 3 4.235119e-10 4.358100e-09
# 4 -7.688925e-10 -1.051717e-09
# 5 1.226582e-09 1.668568e-10
# 6 -2.957009e-09 4.875247e-10
这两个矩阵几乎完全相同(但 none 的值完全相同)。
问题
由 rgeos::gCentroid()
and sf::st_centroid()
编辑的值 return 是否不同?如果是,怎么做?
上下文
阅读 relevant commands exported by rgeos
section within the r-spatial/sf
wiki, I was thrilled to see that I only needed the sf
package - and no longer needed to import the rgeos
包后 - 计算给定几何体的质心。
然而,sf::st_centroid()
的使用给了我这个警告,which is addressed here:
Warning message:
In st_centroid.sfc(x = comarea606$geometry) :
st_centroid does not give correct centroids for longitude/latitude data
那个警告让我测试了两种质心检索方法之间的相等性,只是为了确保无论使用哪种方法,坐标都是相同的。
虽然我使用 identical()
and %in%
resulted in non-identical matches, all.equal()
和绘制每个方法的质心的地图 似乎 是说这两种方法几乎相同。
为什么一种方法会 return 一组与另一种方法不同的值?
有什么原因吗?可重现的例子
# load neccessary packages
library( sf )
library( rgeos )
# load data
comarea606 <-
read_sf(
dsn = "https://data.cityofchicago.org/api/geospatial/cauq-8yn6?method=export&format=GeoJSON"
, layer = "OGRGeoJSON"
)
# find the centroid of each polygon
comarea606$centroids <-
st_centroid( x = comarea606$geometry ) %>%
st_geometry()
# Warning message:
# In st_centroid.sfc(x = comarea606$geometry) :
# st_centroid does not give correct centroids for longitude/latitude data
# Ensure the st_centroid() method
# contains identical values to gCentroid()
sf.centroids <-
st_coordinates( x = comarea606$centroids )
rgeos.centroids <-
gCentroid(
spgeom = methods::as(
object = comarea606
, Class = "Spatial"
)
, byid = TRUE
)@coords
# ensure the colnames are the same
colnames( rgeos.centroids ) <-
colnames( sf.centroids )
# Test for equality
identical(
x = sf.centroids
, y = rgeos.centroids
) # [1] FALSE
all.equal(
target = sf.centroids
, current = rgeos.centroids
) # [1] TRUE
# View the first six results
head( sf.centroids )
# X Y
# 1 -87.61868 41.83512
# 2 -87.60322 41.82375
# 3 -87.63242 41.80909
# 4 -87.61786 41.81295
# 5 -87.59618 41.80892
# 6 -87.68752 41.97517
head( rgeos.centroids )
# X Y
# 1 -87.61868 41.83512
# 2 -87.60322 41.82375
# 3 -87.63242 41.80909
# 4 -87.61786 41.81295
# 5 -87.59618 41.80892
# 6 -87.68752 41.97517
# Identify the numbers which aren't identical
sf.centroids[ , "X" ] %in% rgeos.centroids[ , "X" ]
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
sf.centroids[ , "Y" ] %in% rgeos.centroids[ , "Y" ]
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [11] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [21] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [31] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [41] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [51] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# [71] FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# view results
par(
mar = c( 2, 0, 3, 0 )
, bg = "black"
)
plot(
x = comarea606$geometry
, main = "`sf` v. `rgeos`:\nComparing Centroid Coordinates"
, col.main = "white"
, col = "black"
, border = "white"
)
plot(
x = comarea606$centroids
, add = TRUE
, pch = 24
, col = "#B4DDF2"
, bg = "#B4DDF2"
, cex = 1.2
)
points(
x = rgeos.centroids[ , "X" ]
, y = rgeos.centroids[ , "Y" ]
, pch = 24
, col = "#FB0D1B"
, bg = "#FB0D1B"
, cex = 0.6
)
legend(
x = "left"
, legend = c(
"Centroids from `sf::st_coordinate()`"
, "Centroids from `rgeos::gCentroid()`"
)
, col = c( "#B4DDF2", "#FB0D1B" )
, pt.bg = c( "#B4DDF2", "#FB0D1B" )
, pch = 24
, bty = "n"
, cex = 0.9
, text.col = "white"
)
mtext(
adj = 0.99
, line = 1
, side = 1
, cex = 0.9
, text = "Source: Chicago Data Portal"
, col = "white"
)
# end of script #
Session Info
R version 3.4.4 (2018-03-15)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods
[7] base
other attached packages:
[1] rgeos_0.3-26 sf_0.6-0
loaded via a namespace (and not attached):
[1] modeltools_0.2-21 kernlab_0.9-25 reshape2_1.4.3
[4] lattice_0.20-35 colorspace_1.3-2 htmltools_0.3.6
[7] stats4_3.4.4 viridisLite_0.3.0 yaml_2.1.18
[10] utf8_1.1.3 rlang_0.2.0 R.oo_1.21.0
[13] e1071_1.6-8 pillar_1.2.1 withr_2.1.2
[16] DBI_0.8 prabclus_2.2-6 R.utils_2.6.0
[19] sp_1.2-7 fpc_2.1-11 plyr_1.8.4
[22] robustbase_0.92-8 stringr_1.3.0 munsell_0.4.3
[25] gtable_0.2.0 raster_2.6-7 R.methodsS3_1.7.1
[28] devtools_1.13.5 mvtnorm_1.0-7 memoise_1.1.0
[31] evaluate_0.10.1 knitr_1.20 flexmix_2.3-14
[34] class_7.3-14 DEoptimR_1.0-8 trimcluster_0.1-2
[37] Rcpp_0.12.16 udunits2_0.13 scales_0.5.0
[40] backports_1.1.2 diptest_0.75-7 classInt_0.1-24
[43] squash_1.0.8 gridExtra_2.3 ggplot2_2.2.1
[46] digest_0.6.15 stringi_1.1.6 grid_3.4.4
[49] rprojroot_1.3-2 rgdal_1.2-18 cli_1.0.0
[52] tools_3.4.4 magrittr_1.5 lazyeval_0.2.1
[55] tibble_1.4.2 cluster_2.0.6 crayon_1.3.4
[58] whisker_0.3-2 dendextend_1.7.0 MASS_7.3-49
[61] assertthat_0.2.0 rmarkdown_1.9 rstudioapi_0.7
[64] viridis_0.5.0 mclust_5.4 units_0.5-1
[67] nnet_7.3-12 compiler_3.4.4
这个问题简化为:all.equal()
与 identical()
有何不同,我们查看这些函数的文档:
all.equal(x, y) is a utility to compare R objects x and y testing ‘near equality’.
和 identical()
The safe and reliable way to test two objects for being exactly equal. It returns TRUE in this case, FALSE in every other case.
让我们仔细看看 all.equal.numeric()
,这是对这两个对象的调用,因为 return "double"
和 typeof()
都是如此。我们看到 all.equal.numeric()
中有一个 tolerance
参数,默认设置为 sqrt(.Machine$double.eps)
。 .Machine$double.eps
是您的机器可以添加到 1
并能够将其与 1
区分开来的最小数字。这不准确,但在那个数量级上。 all.equal.numeric()
本质上是检查一个向量中的所有值是否都是 near()
另一个向量中的所有值。您可以查看源代码(主要是错误检查)以确切了解它是如何做到这一点的。
为了让自己相信它们实际上不是 identical()
,请尝试查看 sf.centroids - rgeos.centroids
的输出。
head(sf.centroids - rgeos.centroids)
# X Y
# 1 -5.056506e-10 2.623175e-09
# 2 -2.961613e-09 -4.269602e-09
# 3 4.235119e-10 4.358100e-09
# 4 -7.688925e-10 -1.051717e-09
# 5 1.226582e-09 1.668568e-10
# 6 -2.957009e-09 4.875247e-10
这两个矩阵几乎完全相同(但 none 的值完全相同)。