在另一个 data.frame 中找到一个 data.frame 最近的点
Find the nearest points from one data.frame in another data.frame
我有两个 data.table
。它们包含某些点的 x & y 坐标。它们如下:-
set.seed(123)
a <- data.table(x = runif(10), y = runif(10))
a
x y
1: 0.9036218 0.005233401
2: 0.1914482 0.219560561
3: 0.6089018 0.275966545
4: 0.2307832 0.665939467
5: 0.5959250 0.221657568
6: 0.2343375 0.228499024
7: 0.2620843 0.846292513
8: 0.8499895 0.888969644
9: 0.5707878 0.305334950
10: 0.4771339 0.714933915
b <- data.table(x = runif(30), y = runif(30))
b
x y
1: 0.86444548 0.26182116
2: 0.54160133 0.14384453
3: 0.84231617 0.78842707
4: 0.26452802 0.56285334
5: 0.31283576 0.63412792
6: 0.85437689 0.91124597
7: 0.71066455 0.94311426
8: 0.28285278 0.36521536
9: 0.28785318 0.95902709
10: 0.86013854 0.09544158
11: 0.64630209 0.49041844
12: 0.54615120 0.35911319
13: 0.15450425 0.13447951
14: 0.49790235 0.17264283
15: 0.08562707 0.74520138
16: 0.93701477 0.16472997
17: 0.62143442 0.36910724
18: 0.30887985 0.75412718
19: 0.64023414 0.14568618
20: 0.81968754 0.34259618
21: 0.77749145 0.20355856
22: 0.55769767 0.44691877
23: 0.39907966 0.26292129
24: 0.07789015 0.96241503
25: 0.13784013 0.33544759
26: 0.99523977 0.38647609
27: 0.49555325 0.97433126
28: 0.34634466 0.99967023
29: 0.08722789 0.56224314
30: 0.27777771 0.30246561
对于 a
中的每个点,我想在 b
中找到离它们最近的点。即,a
中的每个点都将有一个离 b
最近的点。并且最近点的坐标应该加入a
.
我该怎么做?
提前致谢
您可以使用 FNN
包中的 get.knnx
函数:
set.seed(123)
library(data.table)
a <- data.table(x = runif(10), y = runif(10))
b <- data.table(x = runif(30), y = runif(30))
library(FNN)
c<-get.knnx(b,a,k=1)
a$b.x <- b$x[c$nn.index]
a$b.y <- b$y[c$nn.index]
a
x y b.x b.y
1: 0.2875775 0.95683335 0.28915974 0.89504536
2: 0.7883051 0.45333416 0.69280341 0.44220007
3: 0.4089769 0.67757064 0.41372433 0.71018240
4: 0.8830174 0.57263340 0.96302423 0.66511519
5: 0.9404673 0.10292468 0.90229905 0.09484066
6: 0.0455565 0.89982497 0.02461368 0.81464004
7: 0.5281055 0.24608773 0.54406602 0.12753165
8: 0.8924190 0.04205953 0.88953932 0.04583117
9: 0.5514350 0.32792072 0.47779597 0.44851634
10: 0.4566147 0.95450365 0.28915974 0.89504536
这是一种使用 raster::pointDistance()
的方法
set.seed(123)
dt1 <- data.table(x = runif(10), y = runif(10))
dt2 <- data.table(x = runif(30), y = runif(30))
dt1[, nearest_dt2 := apply(raster::pointDistance(as.matrix(dt1),
as.matrix(dt2),
lonlat = FALSE), 1,
which.min)][]
# x y nearest_dt2
# 1: 0.2875775 0.95683335 9
# 2: 0.7883051 0.45333416 2
# 3: 0.4089769 0.67757064 23
# 4: 0.8830174 0.57263340 11
# 5: 0.9404673 0.10292468 12
# 6: 0.0455565 0.89982497 15
# 7: 0.5281055 0.24608773 7
# 8: 0.8924190 0.04205953 1
# 9: 0.5514350 0.32792072 16
#10: 0.4566147 0.95450365 9
您可以试试下面的代码
> a[, c(paste0("nearest.", c("x", "y"))) := b[max.col(-abs(outer(.SD[, x + 1i * y], b[, x + 1i * y], `-`)))]][]
x y nearest.x nearest.y
1: 0.2875775 0.95683335 0.28915974 0.89504536
2: 0.7883051 0.45333416 0.69280341 0.44220007
3: 0.4089769 0.67757064 0.41372433 0.71018240
4: 0.8830174 0.57263340 0.96302423 0.66511519
5: 0.9404673 0.10292468 0.90229905 0.09484066
6: 0.0455565 0.89982497 0.02461368 0.81464004
7: 0.5281055 0.24608773 0.54406602 0.12753165
8: 0.8924190 0.04205953 0.88953932 0.04583117
9: 0.5514350 0.32792072 0.47779597 0.44851634
10: 0.4566147 0.95450365 0.28915974 0.89504536
另一种可能的解决方案,基于rdist::cdist
:
library(data.table)
library(rdist)
set.seed(123)
a <- data.table(x = runif(10), y = runif(10))
b <- data.table(x = runif(30), y = runif(30))
cbind(a, b[apply(cdist(a, b), 1, which.min),])
#> x y x y
#> 1: 0.2875775 0.95683335 0.28915974 0.89504536
#> 2: 0.7883051 0.45333416 0.69280341 0.44220007
#> 3: 0.4089769 0.67757064 0.41372433 0.71018240
#> 4: 0.8830174 0.57263340 0.96302423 0.66511519
#> 5: 0.9404673 0.10292468 0.90229905 0.09484066
#> 6: 0.0455565 0.89982497 0.02461368 0.81464004
#> 7: 0.5281055 0.24608773 0.54406602 0.12753165
#> 8: 0.8924190 0.04205953 0.88953932 0.04583117
#> 9: 0.5514350 0.32792072 0.47779597 0.44851634
#> 10: 0.4566147 0.95450365 0.28915974 0.89504536
我有两个 data.table
。它们包含某些点的 x & y 坐标。它们如下:-
set.seed(123)
a <- data.table(x = runif(10), y = runif(10))
a
x y
1: 0.9036218 0.005233401
2: 0.1914482 0.219560561
3: 0.6089018 0.275966545
4: 0.2307832 0.665939467
5: 0.5959250 0.221657568
6: 0.2343375 0.228499024
7: 0.2620843 0.846292513
8: 0.8499895 0.888969644
9: 0.5707878 0.305334950
10: 0.4771339 0.714933915
b <- data.table(x = runif(30), y = runif(30))
b
x y
1: 0.86444548 0.26182116
2: 0.54160133 0.14384453
3: 0.84231617 0.78842707
4: 0.26452802 0.56285334
5: 0.31283576 0.63412792
6: 0.85437689 0.91124597
7: 0.71066455 0.94311426
8: 0.28285278 0.36521536
9: 0.28785318 0.95902709
10: 0.86013854 0.09544158
11: 0.64630209 0.49041844
12: 0.54615120 0.35911319
13: 0.15450425 0.13447951
14: 0.49790235 0.17264283
15: 0.08562707 0.74520138
16: 0.93701477 0.16472997
17: 0.62143442 0.36910724
18: 0.30887985 0.75412718
19: 0.64023414 0.14568618
20: 0.81968754 0.34259618
21: 0.77749145 0.20355856
22: 0.55769767 0.44691877
23: 0.39907966 0.26292129
24: 0.07789015 0.96241503
25: 0.13784013 0.33544759
26: 0.99523977 0.38647609
27: 0.49555325 0.97433126
28: 0.34634466 0.99967023
29: 0.08722789 0.56224314
30: 0.27777771 0.30246561
对于 a
中的每个点,我想在 b
中找到离它们最近的点。即,a
中的每个点都将有一个离 b
最近的点。并且最近点的坐标应该加入a
.
我该怎么做?
提前致谢
您可以使用 FNN
包中的 get.knnx
函数:
set.seed(123)
library(data.table)
a <- data.table(x = runif(10), y = runif(10))
b <- data.table(x = runif(30), y = runif(30))
library(FNN)
c<-get.knnx(b,a,k=1)
a$b.x <- b$x[c$nn.index]
a$b.y <- b$y[c$nn.index]
a
x y b.x b.y
1: 0.2875775 0.95683335 0.28915974 0.89504536
2: 0.7883051 0.45333416 0.69280341 0.44220007
3: 0.4089769 0.67757064 0.41372433 0.71018240
4: 0.8830174 0.57263340 0.96302423 0.66511519
5: 0.9404673 0.10292468 0.90229905 0.09484066
6: 0.0455565 0.89982497 0.02461368 0.81464004
7: 0.5281055 0.24608773 0.54406602 0.12753165
8: 0.8924190 0.04205953 0.88953932 0.04583117
9: 0.5514350 0.32792072 0.47779597 0.44851634
10: 0.4566147 0.95450365 0.28915974 0.89504536
这是一种使用 raster::pointDistance()
set.seed(123)
dt1 <- data.table(x = runif(10), y = runif(10))
dt2 <- data.table(x = runif(30), y = runif(30))
dt1[, nearest_dt2 := apply(raster::pointDistance(as.matrix(dt1),
as.matrix(dt2),
lonlat = FALSE), 1,
which.min)][]
# x y nearest_dt2
# 1: 0.2875775 0.95683335 9
# 2: 0.7883051 0.45333416 2
# 3: 0.4089769 0.67757064 23
# 4: 0.8830174 0.57263340 11
# 5: 0.9404673 0.10292468 12
# 6: 0.0455565 0.89982497 15
# 7: 0.5281055 0.24608773 7
# 8: 0.8924190 0.04205953 1
# 9: 0.5514350 0.32792072 16
#10: 0.4566147 0.95450365 9
您可以试试下面的代码
> a[, c(paste0("nearest.", c("x", "y"))) := b[max.col(-abs(outer(.SD[, x + 1i * y], b[, x + 1i * y], `-`)))]][]
x y nearest.x nearest.y
1: 0.2875775 0.95683335 0.28915974 0.89504536
2: 0.7883051 0.45333416 0.69280341 0.44220007
3: 0.4089769 0.67757064 0.41372433 0.71018240
4: 0.8830174 0.57263340 0.96302423 0.66511519
5: 0.9404673 0.10292468 0.90229905 0.09484066
6: 0.0455565 0.89982497 0.02461368 0.81464004
7: 0.5281055 0.24608773 0.54406602 0.12753165
8: 0.8924190 0.04205953 0.88953932 0.04583117
9: 0.5514350 0.32792072 0.47779597 0.44851634
10: 0.4566147 0.95450365 0.28915974 0.89504536
另一种可能的解决方案,基于rdist::cdist
:
library(data.table)
library(rdist)
set.seed(123)
a <- data.table(x = runif(10), y = runif(10))
b <- data.table(x = runif(30), y = runif(30))
cbind(a, b[apply(cdist(a, b), 1, which.min),])
#> x y x y
#> 1: 0.2875775 0.95683335 0.28915974 0.89504536
#> 2: 0.7883051 0.45333416 0.69280341 0.44220007
#> 3: 0.4089769 0.67757064 0.41372433 0.71018240
#> 4: 0.8830174 0.57263340 0.96302423 0.66511519
#> 5: 0.9404673 0.10292468 0.90229905 0.09484066
#> 6: 0.0455565 0.89982497 0.02461368 0.81464004
#> 7: 0.5281055 0.24608773 0.54406602 0.12753165
#> 8: 0.8924190 0.04205953 0.88953932 0.04583117
#> 9: 0.5514350 0.32792072 0.47779597 0.44851634
#> 10: 0.4566147 0.95450365 0.28915974 0.89504536