我如何使用 sf 从数据框中的记录按行创建多边形,类似于 Postgis

How can i rowwise create polygons from records in a dataframe using sf, similar to Postgis

我在 Postgis 中有一个 table,其中包含字段 beginpunt,这是一个包含单个点的几何字段。 接下来,我将创建一个包含该点的 100*100m 正方形。 在 Postgis 中,我将使用下面的查询并且效果很好。

select 
*
,st_transform(
    ST_MakeEnvelope(floor(st_x(st_transform(beginpunt,28992))/100) *100,
                floor(st_y(st_transform(beginpunt,28992))/100) *100, 
                floor(st_x(st_transform(beginpunt,28992))/100) *100 +100,
                floor(st_y(st_transform(beginpunt,28992))/100) *100 +100, 28992),4326)  as cbs_begin          
from cbs

出于特定原因,我想在不使用 Postgis 的情况下实现相同的目标,但在 R 中完成整个练习。 这就是我卡住的地方。我可以创建一个多边形,但我的问题是在数据框中按行进行。

我在 R

中有相同的 table (cbs)

为简单起见,我创建了一些额外的字段:

    dput(cbs)  #this is the input dataframe
    
    cbs$beginpunt_rd<-st_transform(cbs$beginpunt,28992) #transform crs
    cbs$begincbsx<-floor(st_coordinates(cbs$beginpunt_rd)[,1]/100)*100 #define origin x
    cbs$begincbsy<-floor(st_coordinates(cbs$beginpunt_rd)[,2]/100)*100 #define origin y
    
    cbs$pol<-NA #create an extra empty field in the dataframe
    tbldata<-st_sfc()  #create an empty table 
    for(i in 1:nrow(cbs)){
      cbs_begin <- st_polygon(
        list(
          cbind(
            rbind(cbs$begincbsx[i],cbs$begincbsx[i]+100,cbs$begincbsx[i]+100 , cbs$begincbsx[i],cbs$begincbsx[i]),
            rbind(cbs$begincbsy[i],cbs$begincbsy[i],cbs$begincbsy[i]+100 , cbs$begincbsy[i]+100,cbs$begincbsy[i])
          )
        ))
      
      if(i<2){tbldata<-cbs[FALSE,]} #create empty dataframe
      tbldata[nrow(tbldata) + 1, ] <- c(cbs[i,1:5],st_sfc(st_polygon(cbs_begin),crs=28992)) #add row to dataframe
    }

dput(tbldata) #this is the output dataframe

上面的代码为我提供了一个(新)数据框,其中包含一个包含具有正确坐标的列表的字段“pol”, 但缺少将其变成多边形或 sf 对象的步骤。

我看过一些如何实现的示例,但我无法让它工作

编辑:输入-table cbs dput(cbs) 结果是:

structure(list(id = c(9594L, 9520L, 9492L, 83859L, 
                              9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337, 
                                                                                          51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573, 
                                                                                                                                                     51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459, 
                                                                                                                                                                                                                51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685, 
                                                                                                                                                                                                                                                                          51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874, 
                                                                                                                                                                                                                                                                                                                                     51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628, 
                                                                                                                                                                                                                                                                                                                                                                                                51.5812475), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                          "sfc"), precision = 0, bbox = structure(c(xmin = 5.0129685, ymin = 51.5164541, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    xmax = 5.08459, ymax = 51.5865527), class = "bbox"), crs = structure(list(
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"), class = "crs"), n_empty = 0L)), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                6L), class = "data.frame")

和 output/result table: tbldata dput(tbldata)

structure(list(id = c(9594L, 9520L, 9492L, 83859L, 
9438L, 9490L), beginpunt = structure(list(structure(c(5.0499337, 
51.5676501), class = c("XY", "POINT", "sfg")), structure(c(5.0146573, 
51.5818484), class = c("XY", "POINT", "sfg")), structure(c(5.08459, 
51.557595), class = c("XY", "POINT", "sfg")), structure(c(5.0129685, 
51.5865527), class = c("XY", "POINT", "sfg")), structure(c(5.0548874, 
51.5164541), class = c("XY", "POINT", "sfg")), structure(c(5.0647628, 
51.5812475), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 5.0499337, 
ymin = 51.5676501, xmax = 5.0499337, ymax = 51.5676501), class = "bbox"), crs = structure(list(
    input = "EPSG:4326", wkt = "GEOGCRS[\"WGS 84\",\n    DATUM[\"World Geodetic System 1984\",\n        ELLIPSOID[\"WGS 84\",6378137,298.257223563,\n            LENGTHUNIT[\"metre\",1]]],\n    PRIMEM[\"Greenwich\",0,\n        ANGLEUNIT[\"degree\",0.0174532925199433]],\n    CS[ellipsoidal,2],\n        AXIS[\"geodetic latitude (Lat)\",north,\n            ORDER[1],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        AXIS[\"geodetic longitude (Lon)\",east,\n            ORDER[2],\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n    USAGE[\n        SCOPE[\"Horizontal component of 3D system.\"],\n        AREA[\"World.\"],\n        BBOX[-90,-180,90,180]],\n    ID[\"EPSG\",4326]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT", 
"sfc")), beginpunt_rd = structure(list(structure(c(131616.180272543, 
397688.855374183), class = c("XY", "POINT", "sfg")), structure(c(129178.446528786, 
399280.343643684), class = c("XY", "POINT", "sfg")), structure(c(134014.355256952, 
396559.663404012), class = c("XY", "POINT", "sfg")), structure(c(129064.081985984, 
399804.302704657), class = c("XY", "POINT", "sfg")), structure(c(131933.668374674, 
391991.666781811), class = c("XY", "POINT", "sfg")), structure(c(132651.016139436, 
399196.932221466), class = c("XY", "POINT", "sfg"))), precision = 0, bbox = structure(c(xmin = 131616.180272543, 
ymin = 397688.855374183, xmax = 131616.180272543, ymax = 397688.855374183
), class = "bbox"), crs = structure(list(input = "EPSG:28992", 
    wkt = "PROJCRS[\"Amersfoort / RD New\",\n    BASEGEOGCRS[\"Amersfoort\",\n        DATUM[\"Amersfoort\",\n            ELLIPSOID[\"Bessel 1841\",6377397.155,299.1528128,\n                LENGTHUNIT[\"metre\",1]]],\n        PRIMEM[\"Greenwich\",0,\n            ANGLEUNIT[\"degree\",0.0174532925199433]],\n        ID[\"EPSG\",4289]],\n    CONVERSION[\"RD New\",\n        METHOD[\"Oblique Stereographic\",\n            ID[\"EPSG\",9809]],\n        PARAMETER[\"Latitude of natural origin\",52.1561605555556,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8801]],\n        PARAMETER[\"Longitude of natural origin\",5.38763888888889,\n            ANGLEUNIT[\"degree\",0.0174532925199433],\n            ID[\"EPSG\",8802]],\n        PARAMETER[\"Scale factor at natural origin\",0.9999079,\n            SCALEUNIT[\"unity\",1],\n            ID[\"EPSG\",8805]],\n        PARAMETER[\"False easting\",155000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8806]],\n        PARAMETER[\"False northing\",463000,\n            LENGTHUNIT[\"metre\",1],\n            ID[\"EPSG\",8807]]],\n    CS[Cartesian,2],\n        AXIS[\"easting (X)\",east,\n            ORDER[1],\n            LENGTHUNIT[\"metre\",1]],\n        AXIS[\"northing (Y)\",north,\n            ORDER[2],\n            LENGTHUNIT[\"metre\",1]],\n    USAGE[\n        SCOPE[\"Engineering survey, topographic mapping.\"],\n        AREA[\"Netherlands - onshore, including Waddenzee, Dutch Wadden Islands and 12-mile offshore coastal zone.\"],\n        BBOX[50.75,3.2,53.7,7.22]],\n    ID[\"EPSG\",28992]]"), class = "crs"), classes = character(0), n_empty = 0L, class = c("sfc_POINT", 
"sfc")), begincbsx = c(131600, 129100, 134000, 129000, 131900, 
132600), begincbsy = c(397600, 399200, 396500, 399800, 391900, 
399100), pol = list(structure(c(131600, 131700, 131700, 131600, 
131600, 397600, 397600, 397700, 397700, 397600), .Dim = c(5L, 
2L)), structure(c(129100, 129200, 129200, 129100, 129100, 399200, 
399200, 399300, 399300, 399200), .Dim = c(5L, 2L)), structure(c(134000, 
134100, 134100, 134000, 134000, 396500, 396500, 396600, 396600, 
396500), .Dim = c(5L, 2L)), structure(c(129000, 129100, 129100, 
129000, 129000, 399800, 399800, 399900, 399900, 399800), .Dim = c(5L, 
2L)), structure(c(131900, 132000, 132000, 131900, 131900, 391900, 
391900, 392000, 392000, 391900), .Dim = c(5L, 2L)), structure(c(132600, 
132700, 132700, 132600, 132600, 399100, 399100, 399200, 399200, 
399100), .Dim = c(5L, 2L)))), row.names = c(NA, 6L), class = "data.frame")

预期输出为

您只需输入以下代码行,它就会起作用:

cbs <- st_sf(cbs, sf_column_name = "beginpunt", crs = 4326)

输出:

#> Simple feature collection with 6 features and 1 field
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS:  WGS 84
#>      id                 beginpunt
#> 1  9594 POINT (5.049934 51.56765)
#> 2  9520 POINT (5.014657 51.58185)
#> 3  9492  POINT (5.08459 51.55759)
#> 4 83859 POINT (5.012969 51.58655)
#> 5  9438 POINT (5.054887 51.51645)
#> 6  9490 POINT (5.064763 51.58125)

reprex package (v2.0.1)

于 2021-09-28 创建

编辑 1

根据@skaqqs 的评论,我完成了我的回答,没有考虑他的建议,这通常非常有效......但前提是这些点以正确的顺序排列以产生有效的多边形(这通常是最常见的情况)。

数据框中点的原始顺序未生成有效的多边形:保持原始顺序,我们得到相交线。因此,如果我们选择@skaqqs 提出的经典解决方案并绘制多边形,结果如下:


cbs_standard <- st_combine(cbs)
(cbs_standard <- st_cast(cbs_standard, "POLYGON"))
#> Geometry set for 1 feature 
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 5.012968 ymin: 51.51645 xmax: 5.08459 ymax: 51.58655
#> Geodetic CRS:  WGS 84
#> POLYGON ((5.049934 51.56765, 5.014657 51.58185,...
plot(cbs_standard)

为了避免这个困难,我建议安装和加载“concaveman”包的另一种解决方案。

因此,请在下面找到(非常简单的)获取拓扑有效多边形的过程。

install.packages("concaveman")
library(concaveman)

(cbs_concaveman <- concaveman(cbs))
#> Simple feature collection with 1 feature and 0 fields
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 5.013 ymin: 51.5165 xmax: 5.0846 ymax: 51.5866
#> Geodetic CRS:  WGS 84
#>                         polygons
#> 1 POLYGON ((5.0147 51.5818, 5...
plot(cbs_concaveman)

reprex package (v2.0.1)

于 2021-09-28 创建

编辑 2

O.K。我们不了解对方@GeoDude。为了跟进您的评论,这里是(我希望!)您的问题的解决方案。您只需键入以下几行代码,它就会起作用:

z <- st_polygon()
for (i in seq(length(tbldata$pol))){
  z[i] <-  st_polygon(list(tbldata$pol[[i]]))
}

sfc <- st_sfc(z, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc, row.names = 1:length(z))
tbldata$pol <- NULL

tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#>      id                 beginpunt              beginpunt_rd begincbsx begincbsy
#> 1  9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9)    131600    397600
#> 2  9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3)    129100    399200
#> 3  9492  POINT (5.08459 51.55759) POINT (134014.4 396559.7)    134000    396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3)    129000    399800
#> 5  9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7)    131900    391900
#> 6  9490 POINT (5.064763 51.58125)   POINT (132651 399196.9)    132600    399100
#>                             geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((131600 397600, 13...
#> 3 POLYGON ((131600 397600, 13...
#> 4 POLYGON ((131600 397600, 13...
#> 5 POLYGON ((131600 397600, 13...
#> 6 POLYGON ((131600 397600, 13...

reprex package (v2.0.1)

于 2021-09-29 创建

预期输出:

library(tmap)

tmap_mode("view")
#> tmap mode set to interactive viewing
tmap::tm_shape(tbldata) +
  tm_basemap(server = "OpenStreetMap")+
  tm_fill(NA) +
  tm_borders(col="black")

reprex package (v2.0.1)

于 2021-09-29 创建

编辑 3

作为您对@Geodude 的评论的跟进,请在下方找到修复方法。此外,我删除了 for 循环以考虑@nniloc 的评论(为此,我构建了函数 polygon_list(),我已使用 sapply() 应用于 tbldata$pol)。

polygon_list <-  function(x){
  st_polygon(list(x))
}

h <- st_polygon()
h <- sapply(tbldata$pol, polygon_list, simplify = FALSE)
sfc <- st_sfc(h, crs = 28992)
tbldata <- st_sf(tbldata, geom = sfc)
tbldata$pol <- NULL
tbldata
#> Simple feature collection with 6 features and 3 fields
#> Active geometry column: geom
#> Geometry type: POLYGON
#> Dimension:     XY
#> Bounding box:  xmin: 129000 ymin: 391900 xmax: 134100 ymax: 399900
#> Projected CRS: Amersfoort / RD New
#>      id                 beginpunt              beginpunt_rd begincbsx begincbsy
#> 1  9594 POINT (5.049934 51.56765) POINT (131616.2 397688.9)    131600    397600
#> 2  9520 POINT (5.014657 51.58185) POINT (129178.4 399280.3)    129100    399200
#> 3  9492  POINT (5.08459 51.55759) POINT (134014.4 396559.7)    134000    396500
#> 4 83859 POINT (5.012969 51.58655) POINT (129064.1 399804.3)    129000    399800
#> 5  9438 POINT (5.054887 51.51645) POINT (131933.7 391991.7)    131900    391900
#> 6  9490 POINT (5.064763 51.58125)   POINT (132651 399196.9)    132600    399100
#>                             geom
#> 1 POLYGON ((131600 397600, 13...
#> 2 POLYGON ((129100 399200, 12...
#> 3 POLYGON ((134000 396500, 13...
#> 4 POLYGON ((129000 399800, 12...
#> 5 POLYGON ((131900 391900, 13...
#> 6 POLYGON ((132600 399100, 13...

通过检查其中一个多边形,问题似乎已解决:

tbldata$geom[[3]]
#> POLYGON ((134000 396500, 134100 396500, 134100 396600, 134000 396600, 134000 396500))

reprex package (v2.0.1)

于 2021-10-01 创建

一个tidyverse解决方案。与@lovalery 大致相同,但跳过了 for 循环。

library(tidyverse)
library(sf)
library(ggspatial)

df_sf <- tbldata %>%
  mutate(pol2 = st_sfc(st_multipolygon(list(pol)))) %>%
  select(-beginpunt, -beginpunt_rd) %>%
  st_as_sf(crs = 28992)

# plot to verify
ggplot(df_sf) +
  annotation_map_tile(zoom = 13) +
  geom_sf(col = 'blue', fill = NA)