基于与 sf 的距离对选择点
Selection of points based in pair of distance with sf
我想计算点位置之间的欧几里德距离 (df.dist
) 并在一对距离大于 20000 米时删除一些位置 (dplyr::filter(as.numeric(df.dist$dist) < 20000)
),但没有工作。
在我的例子中:
# Package
library(sf)
#> Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(dplyr)
# Create data
set.seed(1)
df <- data.frame(
x = rnorm(10),
y = rnorm(10)
)
df <- st_as_sf(df,coords = c("x","y"),remove = F, crs = 4326)
df.laea = st_transform(
df,
crs = "+proj=laea +x_0=4600000 +y_0=4600000 +lon_0=0.13 +lat_0=0.24 +datum=WGS84 +units=m"
)
# Calculate matrix distance and selection pair of distance < 20000 meters
df.dist <- df %>%
mutate(
dist = st_distance(geometry)
)
distance.target <- df.dist %>%
dplyr::filter(as.numeric(df.dist$dist) < 20000)
#Erro: Problem with `filter()` input `..1`.
#i Input `..1` is `as.numeric(df.dist$dist) < 20000`.
#x Input `..1` must be of size 10 or 1, not size 100.
#Run `rlang::last_error()` to see where the error occurred.
拜托,我有什么解决办法吗?
提前致谢!
st_distance
returns 矩阵在您的情况下为 10x10,它是 100 个值,因此错误 Input ..1 must be of size 10 or 1, not size 100.
。 df.dist
中的输出非常具有误导性:
对于df.dist
Simple feature collection with 10 features and 3 fields
geometry type: POINT
dimension: XY
bbox: xmin: -0.8356286 ymin: -2.2147 xmax: 1.595281 ymax: 1.511781
geographic CRS: WGS 84
x y geometry dist
1 -0.6264538 1.51178117 POINT (-0.6264538 1.511781) 0.00 [m]
2 0.1836433 0.38984324 POINT (0.1836433 0.3898432) 153363.06 [m]
3 -0.8356286 -0.62124058 POINT (-0.8356286 -0.6212406) 237004.19 [m]
4 1.5952808 -2.21469989 POINT (1.595281 -2.2147) 480555.51 [m]
5 0.3295078 1.12493092 POINT (0.3295078 1.124931) 114666.44 [m]
6 -0.8204684 -0.04493361 POINT (-0.8204684 -0.04493361) 173482.34 [m]
7 0.4874291 -0.01619026 POINT (0.4874291 -0.01619026) 209564.83 [m]
8 0.7383247 0.94383621 POINT (0.7383247 0.9438362) 164361.86 [m]
9 0.5757814 0.82122120 POINT (0.5757814 0.8212212) 154058.72 [m]
10 -0.3053884 0.59390132 POINT (-0.3053884 0.5939013) 107601.29 [m]
因为df.dist$dist
是矩阵:
Units: [m]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0.0 153363.06 237004.2 480555.5 114666.44 173482.34 209564.83 164361.86 154058.72 107601.29
[2,] 153363.1 0.00 159289.4 328063.3 82887.65 121676.58 56207.83 86974.75 64657.24 58927.70
[3,] 237004.2 159289.36 0.0 322838.3 232597.70 63747.10 161762.69 246263.77 223885.09 146756.58
[4,] 480555.5 328063.26 322838.3 0.0 395238.69 360338.42 272578.66 362043.43 354354.31 375761.40
[5,] 114666.4 82887.65 232597.7 395238.7 0.00 181986.32 127397.41 49713.21 43349.25 91879.46
[6,] 173482.3 121676.58 63747.1 360338.4 181986.32 0.00 145629.14 205089.33 182564.02 90980.32
[7,] 209564.8 56207.83 161762.7 272578.7 127397.41 145629.14 0.00 109766.72 93117.07 111084.52
[8,] 164361.9 86974.75 246263.8 362043.4 49713.21 205089.33 109766.72 0.00 22608.55 122449.40
[9,] 154058.7 64657.24 223885.1 354354.3 43349.25 182564.02 93117.07 22608.55 0.00 101253.41
[10,] 107601.3 58927.70 146756.6 375761.4 91879.46 90980.32 111084.52 122449.40 101253.41 0.00
你可能想要这样的东西:
m <- df$geometry %>% st_distance()
indices <- which(m < units::set_units(20000, "m"), arr.ind = TRUE)
df.dist <- as.data.frame(indices)
df.dist$dist <- as.numeric(m[indices])
row col dist
1 1 1 0
2 2 2 0
3 3 3 0
4 4 4 0
5 5 5 0
6 6 6 0
7 7 7 0
8 8 8 0
9 9 9 0
10 10 10 0
另一种略有不同的方法是使用 as.data.frame.table
转换矩阵,然后像通常在 dplyr
.
中那样转换 filter
另一个答案中的索引对于大型数据集会更有效。
# Packages
library(sf)
#> Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(dplyr, warn.conflicts = FALSE)
# Create data
set.seed(1)
df <- data.frame(x = rnorm(10),
y = rnorm(10)) %>%
st_as_sf(coords = c("x","y"),
remove = F,
crs = 4326)
df %>%
pull(geometry) %>%
st_distance() %>%
as.data.frame.table() %>%
filter(Freq < units::set_units(20000, 'm'))
#> Var1 Var2 Freq
#> 1 A A 0 [m]
#> 2 B B 0 [m]
#> 3 C C 0 [m]
#> 4 D D 0 [m]
#> 5 E E 0 [m]
#> 6 F F 0 [m]
#> 7 G G 0 [m]
#> 8 H H 0 [m]
#> 9 I I 0 [m]
#> 10 J J 0 [m]
由 reprex package (v2.0.0)
于 2021-08-20 创建
我想计算点位置之间的欧几里德距离 (df.dist
) 并在一对距离大于 20000 米时删除一些位置 (dplyr::filter(as.numeric(df.dist$dist) < 20000)
),但没有工作。
在我的例子中:
# Package
library(sf)
#> Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(dplyr)
# Create data
set.seed(1)
df <- data.frame(
x = rnorm(10),
y = rnorm(10)
)
df <- st_as_sf(df,coords = c("x","y"),remove = F, crs = 4326)
df.laea = st_transform(
df,
crs = "+proj=laea +x_0=4600000 +y_0=4600000 +lon_0=0.13 +lat_0=0.24 +datum=WGS84 +units=m"
)
# Calculate matrix distance and selection pair of distance < 20000 meters
df.dist <- df %>%
mutate(
dist = st_distance(geometry)
)
distance.target <- df.dist %>%
dplyr::filter(as.numeric(df.dist$dist) < 20000)
#Erro: Problem with `filter()` input `..1`.
#i Input `..1` is `as.numeric(df.dist$dist) < 20000`.
#x Input `..1` must be of size 10 or 1, not size 100.
#Run `rlang::last_error()` to see where the error occurred.
拜托,我有什么解决办法吗?
提前致谢!
st_distance
returns 矩阵在您的情况下为 10x10,它是 100 个值,因此错误 Input ..1 must be of size 10 or 1, not size 100.
。 df.dist
中的输出非常具有误导性:
对于df.dist
Simple feature collection with 10 features and 3 fields
geometry type: POINT
dimension: XY
bbox: xmin: -0.8356286 ymin: -2.2147 xmax: 1.595281 ymax: 1.511781
geographic CRS: WGS 84
x y geometry dist
1 -0.6264538 1.51178117 POINT (-0.6264538 1.511781) 0.00 [m]
2 0.1836433 0.38984324 POINT (0.1836433 0.3898432) 153363.06 [m]
3 -0.8356286 -0.62124058 POINT (-0.8356286 -0.6212406) 237004.19 [m]
4 1.5952808 -2.21469989 POINT (1.595281 -2.2147) 480555.51 [m]
5 0.3295078 1.12493092 POINT (0.3295078 1.124931) 114666.44 [m]
6 -0.8204684 -0.04493361 POINT (-0.8204684 -0.04493361) 173482.34 [m]
7 0.4874291 -0.01619026 POINT (0.4874291 -0.01619026) 209564.83 [m]
8 0.7383247 0.94383621 POINT (0.7383247 0.9438362) 164361.86 [m]
9 0.5757814 0.82122120 POINT (0.5757814 0.8212212) 154058.72 [m]
10 -0.3053884 0.59390132 POINT (-0.3053884 0.5939013) 107601.29 [m]
因为df.dist$dist
是矩阵:
Units: [m]
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
[1,] 0.0 153363.06 237004.2 480555.5 114666.44 173482.34 209564.83 164361.86 154058.72 107601.29
[2,] 153363.1 0.00 159289.4 328063.3 82887.65 121676.58 56207.83 86974.75 64657.24 58927.70
[3,] 237004.2 159289.36 0.0 322838.3 232597.70 63747.10 161762.69 246263.77 223885.09 146756.58
[4,] 480555.5 328063.26 322838.3 0.0 395238.69 360338.42 272578.66 362043.43 354354.31 375761.40
[5,] 114666.4 82887.65 232597.7 395238.7 0.00 181986.32 127397.41 49713.21 43349.25 91879.46
[6,] 173482.3 121676.58 63747.1 360338.4 181986.32 0.00 145629.14 205089.33 182564.02 90980.32
[7,] 209564.8 56207.83 161762.7 272578.7 127397.41 145629.14 0.00 109766.72 93117.07 111084.52
[8,] 164361.9 86974.75 246263.8 362043.4 49713.21 205089.33 109766.72 0.00 22608.55 122449.40
[9,] 154058.7 64657.24 223885.1 354354.3 43349.25 182564.02 93117.07 22608.55 0.00 101253.41
[10,] 107601.3 58927.70 146756.6 375761.4 91879.46 90980.32 111084.52 122449.40 101253.41 0.00
你可能想要这样的东西:
m <- df$geometry %>% st_distance()
indices <- which(m < units::set_units(20000, "m"), arr.ind = TRUE)
df.dist <- as.data.frame(indices)
df.dist$dist <- as.numeric(m[indices])
row col dist
1 1 1 0
2 2 2 0
3 3 3 0
4 4 4 0
5 5 5 0
6 6 6 0
7 7 7 0
8 8 8 0
9 9 9 0
10 10 10 0
另一种略有不同的方法是使用 as.data.frame.table
转换矩阵,然后像通常在 dplyr
.
filter
另一个答案中的索引对于大型数据集会更有效。
# Packages
library(sf)
#> Linking to GEOS 3.9.0, GDAL 3.2.1, PROJ 7.2.1
library(dplyr, warn.conflicts = FALSE)
# Create data
set.seed(1)
df <- data.frame(x = rnorm(10),
y = rnorm(10)) %>%
st_as_sf(coords = c("x","y"),
remove = F,
crs = 4326)
df %>%
pull(geometry) %>%
st_distance() %>%
as.data.frame.table() %>%
filter(Freq < units::set_units(20000, 'm'))
#> Var1 Var2 Freq
#> 1 A A 0 [m]
#> 2 B B 0 [m]
#> 3 C C 0 [m]
#> 4 D D 0 [m]
#> 5 E E 0 [m]
#> 6 F F 0 [m]
#> 7 G G 0 [m]
#> 8 H H 0 [m]
#> 9 I I 0 [m]
#> 10 J J 0 [m]
由 reprex package (v2.0.0)
于 2021-08-20 创建