将几何特征向量转换为 sfc_LINESTRING
Convert character vector of geometry into sfc_LINESTRING
我有一个数据table如下,需要转换成sf对象:
library(data.table)
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15
)
每个ID的几何信息是一个字符向量,由不同数量的坐标构成线串。 ID“B”有4个坐标; ID“C”有 5.
geo <- c("c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)"
)
正在将几何信息添加到 DT:
DT$geometry <- geo
现在,我需要将 DT 转换为几何指定为 sfc_LINESTRING 的 sf 对象。我尝试使用 st_cast 首先将基于字符的几何变量转换为线串,但它产生了一个错误。
DT_sf <- st_cast(DT$geometry, "LINESTRING")
Error in UseMethod("st_cast") :
no applicable method for 'st_cast' applied to an object of class "character"
需要对近 20,000 行进行此转换。所以,我正在寻找一种计算效率高的方法来实现所需的结果。
您可以删除 c()
符号,然后将坐标拆分为列表。然后您可以创建一个数据框并使用例如上半部分的行和纬度。这可以转换为矩阵,格式 st_linestring
理解:
library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
geo <- c(
"c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)"
)
parsed_geo <-
geo %>%
map(~ {
.x %>%
str_remove_all("c[(]|[)]$") %>%
str_split(",") %>%
first() %>%
map_chr(str_trim) %>%
as_tibble() %>%
mutate(lon_lat = row_number() <= n() / 2) %>%
pivot_wider(names_from = lon_lat, values_fn = list) %>%
unnest() %>%
type_convert() %>%
as.matrix() %>%
st_linestring()
})
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15
)
DT$geometry <- parsed_geo
DT
#> ID Value geometry
#> 1: A 11 LINESTRING (-112.1165 33.37...,...
#> 2: B 12 LINESTRING (-112.2829 33.49...,...
#> 3: C 13 LINESTRING (-112.2811 33.49...,...
#> 4: D 14 LINESTRING (-112.283 33.501...,...
#> 5: E 15 LINESTRING (-112.283 33.499...,...
由 reprex package (v2.0.0)
于 2022-05-09 创建
这个有效:
library(sfheaders)
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15)
geo <- c("c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)")
geo <- geo %>%
sub('c\(', '', x = .) %>%
sub('\)', '', x = .) %>%
lapply(function(x) x %>%
str_split_fixed(",", str_count(string = ., ',')+1) %>%
matrix(ncol=2) %>%
data.frame)
geom <- list()
for (i in seq(length(geo))){
geom <- c(geom, sfheaders::sf_linestring(matrix(as.numeric(as.matrix(geo[[i]])),
ncol=2)))
}
sequ <- seq.default(from = 2, to = length(geom), by = 2)
DT$geometry <- geom %>%
do.call(rbind, .[sequ]) %>%
st_sfc()
ID Value geometry
1: A 11 LINESTRING (-112.1165 33.37...,...
2: B 12 LINESTRING (-112.2829 33.49...,...
3: C 13 LINESTRING (-112.2811 33.49...,...
4: D 14 LINESTRING (-112.283 33.501...,...
5: E 15 LINESTRING (-112.283 33.499...,...
您可以使用 eval
来“评估作为字符串给出的表达式”
(如果您的字符串未经过清理(想想 SQL 注入), 可能 会很危险))
因此,使用您的 geo
对象,您会得到
lst <- lapply(geo, function(x) { eval(parse(text = x)) })
str( lst )
List of 5
# $ : num [1:18] -112 -112 -112 -112 -112 ...
# $ : num [1:8] -112.3 -112.3 -112.3 -112.3 33.5 ...
# $ : num [1:16] -112 -112 -112 -112 -112 ...
# $ : num [1:18] -112 -112 -112 -112 -112 ...
# $ : num [1:10] -112 -112 -112 -112 -112 ...
由于我们一次评估 geo 中的每个向量(在 lapply
内),我们还可以同时将其设为 sfg 对象
lst <- lapply(geo, function(x) {
v <- eval(parse(text = x))
m <- matrix(v, ncol = 2)
sf::st_linestring(m)
})
然后只需添加正确的 class 属性
DT$geo <- lst
DT$geo <- sf::st_as_sfc( DT$geo )
DT <- sf::st_as_sf( DT )
sf::st_crs( DT ) <- 4326
DT
# Simple feature collection with 5 features and 2 fields
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: -112.283 ymin: 33.37771 xmax: -112.1165 ymax: 33.50192
# Geodetic CRS: WGS 84
# ID Value geo
# 1 A 11 LINESTRING (-112.1165 33.37...
# 2 B 12 LINESTRING (-112.2829 33.49...
# 3 C 13 LINESTRING (-112.2811 33.49...
# 4 D 14 LINESTRING (-112.283 33.501...
# 5 E 15 LINESTRING (-112.283 33.499...
我有一个数据table如下,需要转换成sf对象:
library(data.table)
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15
)
每个ID的几何信息是一个字符向量,由不同数量的坐标构成线串。 ID“B”有4个坐标; ID“C”有 5.
geo <- c("c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)"
)
正在将几何信息添加到 DT:
DT$geometry <- geo
现在,我需要将 DT 转换为几何指定为 sfc_LINESTRING 的 sf 对象。我尝试使用 st_cast 首先将基于字符的几何变量转换为线串,但它产生了一个错误。
DT_sf <- st_cast(DT$geometry, "LINESTRING")
Error in UseMethod("st_cast") :
no applicable method for 'st_cast' applied to an object of class "character"
需要对近 20,000 行进行此转换。所以,我正在寻找一种计算效率高的方法来实现所需的结果。
您可以删除 c()
符号,然后将坐标拆分为列表。然后您可以创建一个数据框并使用例如上半部分的行和纬度。这可以转换为矩阵,格式 st_linestring
理解:
library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
library(tidyverse)
library(data.table)
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
geo <- c(
"c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)"
)
parsed_geo <-
geo %>%
map(~ {
.x %>%
str_remove_all("c[(]|[)]$") %>%
str_split(",") %>%
first() %>%
map_chr(str_trim) %>%
as_tibble() %>%
mutate(lon_lat = row_number() <= n() / 2) %>%
pivot_wider(names_from = lon_lat, values_fn = list) %>%
unnest() %>%
type_convert() %>%
as.matrix() %>%
st_linestring()
})
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
#> Warning: `cols` is now required when using unnest().
#> Please use `cols = c(`TRUE`, `FALSE`)`
#>
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#> `TRUE` = col_double(),
#> `FALSE` = col_double()
#> )
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15
)
DT$geometry <- parsed_geo
DT
#> ID Value geometry
#> 1: A 11 LINESTRING (-112.1165 33.37...,...
#> 2: B 12 LINESTRING (-112.2829 33.49...,...
#> 3: C 13 LINESTRING (-112.2811 33.49...,...
#> 4: D 14 LINESTRING (-112.283 33.501...,...
#> 5: E 15 LINESTRING (-112.283 33.499...,...
由 reprex package (v2.0.0)
于 2022-05-09 创建这个有效:
library(sfheaders)
DT <- data.table(
ID = c("A", "B", "C", "D", "E"),
Value = 11:15)
geo <- c("c(-112.116492521272, -112.116492811159, -112.116492812107, -112.116491781569, -112.116482854256, -112.116482819195, -112.116476331207, -112.116476325101, -112.11647589954, 33.3777109072744, 33.377733456163, 33.377733512504, 33.377817189599, 33.3785425053239, 33.3785454379367, 33.3790725760563, 33.3790731291841, 33.3791076444333)",
"c(-112.282916223332, -112.282955145531, -112.282977080374, -112.282986066594, 33.499285198973, 33.4994146786288, 33.4995335119373, 33.4998030580162)",
"c(-112.281058674957, -112.281058522318, -112.281057917087, -112.281057356648, -112.281055594103, -112.281047371356, -112.281048086137, -112.28104821173, 33.4937123457776, 33.4937301348982, 33.4938008007847, 33.4938659107566, 33.4940708243904, 33.4950232493953, 33.4951159682343, 33.4951322463168)",
"c(-112.282978024041, -112.282977000088, -112.282975472281, -112.282975387447, -112.282974470679, -112.282974464144, -112.282974284899, -112.28297410899, -112.282974107453, 33.5011764123633, 33.5013710145493, 33.5016617311961, 33.501678000948, 33.5018530730796, 33.5018546369058, 33.5018887965849, 33.5019223852857, 33.5019226044706)",
"c(-112.282986066594, -112.282985540911, -112.282984156895, -112.282983004093, -112.282982201845, 33.4998030580162, 33.4998965204233, 33.5001425170464, 33.5003478058912, 33.5004906801949)")
geo <- geo %>%
sub('c\(', '', x = .) %>%
sub('\)', '', x = .) %>%
lapply(function(x) x %>%
str_split_fixed(",", str_count(string = ., ',')+1) %>%
matrix(ncol=2) %>%
data.frame)
geom <- list()
for (i in seq(length(geo))){
geom <- c(geom, sfheaders::sf_linestring(matrix(as.numeric(as.matrix(geo[[i]])),
ncol=2)))
}
sequ <- seq.default(from = 2, to = length(geom), by = 2)
DT$geometry <- geom %>%
do.call(rbind, .[sequ]) %>%
st_sfc()
ID Value geometry
1: A 11 LINESTRING (-112.1165 33.37...,...
2: B 12 LINESTRING (-112.2829 33.49...,...
3: C 13 LINESTRING (-112.2811 33.49...,...
4: D 14 LINESTRING (-112.283 33.501...,...
5: E 15 LINESTRING (-112.283 33.499...,...
您可以使用 eval
来“评估作为字符串给出的表达式”
(如果您的字符串未经过清理(想想 SQL 注入), 可能 会很危险))
因此,使用您的 geo
对象,您会得到
lst <- lapply(geo, function(x) { eval(parse(text = x)) })
str( lst )
List of 5
# $ : num [1:18] -112 -112 -112 -112 -112 ...
# $ : num [1:8] -112.3 -112.3 -112.3 -112.3 33.5 ...
# $ : num [1:16] -112 -112 -112 -112 -112 ...
# $ : num [1:18] -112 -112 -112 -112 -112 ...
# $ : num [1:10] -112 -112 -112 -112 -112 ...
由于我们一次评估 geo 中的每个向量(在 lapply
内),我们还可以同时将其设为 sfg 对象
lst <- lapply(geo, function(x) {
v <- eval(parse(text = x))
m <- matrix(v, ncol = 2)
sf::st_linestring(m)
})
然后只需添加正确的 class 属性
DT$geo <- lst
DT$geo <- sf::st_as_sfc( DT$geo )
DT <- sf::st_as_sf( DT )
sf::st_crs( DT ) <- 4326
DT
# Simple feature collection with 5 features and 2 fields
# Geometry type: LINESTRING
# Dimension: XY
# Bounding box: xmin: -112.283 ymin: 33.37771 xmax: -112.1165 ymax: 33.50192
# Geodetic CRS: WGS 84
# ID Value geo
# 1 A 11 LINESTRING (-112.1165 33.37...
# 2 B 12 LINESTRING (-112.2829 33.49...
# 3 C 13 LINESTRING (-112.2811 33.49...
# 4 D 14 LINESTRING (-112.283 33.501...
# 5 E 15 LINESTRING (-112.283 33.499...