根据最近的特征连接两个空间数据框(在每个组内)

Join two spatial data frames based on nearest features (within each group)

有两个数据框需要根据最近的特征进行拼接。正如您在下面看到的,dfUniqueIDs 是基于 well_iddf1 创建的。

由于 df 中缺少 well_id 并且无法根据此列合并两个数据帧,我尝试根据最近的特征合并它们。

> group_WP030793Ta <- df %>% 
+   dplyr::filter(group_id == "WP030793Ta")
> group_WP030793Ta
# A tibble: 4 x 4
  UniqueIDs        group_id          x        y
  <chr>            <chr>         <dbl>    <dbl>
1 GW161_WP030793Ta WP030793Ta 1894411. 5577232.
2 GW162_WP030793Ta WP030793Ta 1894914. 5577675.
3 GW163_WP030793Ta WP030793Ta 1895271. 5576725.
4 GW164_WP030793Ta WP030793Ta 1895573. 5577578. 

> group1_WP030793Ta <- df1 %>% 
+   dplyr::filter(group_id1 == "WP030793Ta")
> group1_WP030793Ta
   group_id1      x1      y1   well_id
1 WP030793Ta 1894914 5577675 Well 1880
2 WP030793Ta 1895573 5577578 Well 1452
3 WP030793Ta 1894411 5577232 Well 2043
4 WP030793Ta 1895271 5576725 Well 1881

我是按照下面的方式做的。

df_sf1 <- df1 %>% 
  st_as_sf(coords = c('x1', 'y1'), remove = F) %>%
  st_set_crs(2193)


joined_sf <- df_sf %>% 
  cbind(
    df_sf1[st_nearest_feature(df_sf, df_sf1),])

如您所见,group_idgroup_id1 不一样。

check_WP030793Ta <- joined_sf %>% 
  dplyr::filter(group_id == "WP030793Ta")

         UniqueIDs   group_id       x       y  group_id1      x1      y1   well_id                geometry              geometry.1
1 GW161_WP030793Ta WP030793Ta 1894411 5577232  WP040177T 1894411 5577232 Well 2043 POINT (1894411 5577232) POINT (1894411 5577232)
2 GW162_WP030793Ta WP030793Ta 1894914 5577675 WP030793Ta 1894914 5577675 Well 1880 POINT (1894914 5577675) POINT (1894914 5577675)
3 GW163_WP030793Ta WP030793Ta 1895271 5576725  WP040177T 1895271 5576725 Well 1881 POINT (1895271 5576725) POINT (1895271 5576725)
4 GW164_WP030793Ta WP030793Ta 1895573 5577578 WP140476Tb 1895573 5577578 Well 1452 POINT (1895573 5577578) POINT (1895573 5577578)

如何根据每个组中最近的特征加入?

更新

请注意,某些 group_idswell_id 重复了。

例如,如果用户拥有名为 WPXXXTa 的许可 group_id(从 2000 年到 2005 年),当此许可 group_id 获得更新为 WPXXXTb(从 2005 年到 2010 年),它通常具有相同的 well_ids.

数据

df

> dput(df)
structure(list(UniqueIDs = c("GW427_WP980128T", "GW428_WP980128T", 
"GW429_WP980128T", "GW430_WP980128T", "GW52_WP030680T", "GW53_WP030680T", 
"GW54_WP030680T", "GW55_WP030680T", "GW56_WP030680Ta", "GW57_WP030680Ta", 
"GW58_WP030680Ta", "GW59_WP030680Ta", "GW92_WP030710T", "GW93_WP030710T", 
"GW94_WP030710T", "GW95_WP030710T", "GW96_WP030710Ta", "GW97_WP030710Ta", 
"GW98_WP030710Ta", "GW99_WP030710Ta", "GW157_WP030793T", "GW158_WP030793T", 
"GW159_WP030793T", "GW160_WP030793T", "GW161_WP030793Ta", "GW162_WP030793Ta", 
"GW163_WP030793Ta", "GW164_WP030793Ta", "GW248_WP080553Ta", "GW249_WP080553Ta", 
"GW250_WP080553Ta", "GW251_WP080553Ta", "GW252_WP080553Tb", "GW253_WP080553Tb", 
"GW254_WP080553Tb", "GW255_WP080553Tb", "GW256_WP080553Tc", "GW257_WP080553Tc", 
"GW258_WP080553Tc", "GW259_WP080553Tc", "GW265_WP100090T", "GW266_WP100090T", 
"GW267_WP100090T", "GW268_WP100090T", "GW499_WP100090Ta", "GW500_WP100090Ta", 
"GW501_WP100090Ta", "GW502_WP100090Ta", "GW503_WP100090Tb", "GW504_WP100090Tb", 
"GW505_WP100090Tb", "GW506_WP100090Tb", "GW555_WP120385Tb", "GW556_WP120385Tb", 
"GW557_WP120385Tb", "GW558_WP120385Tb", "GW314_WP140476T", "GW315_WP140476T", 
"GW316_WP140476T", "GW317_WP140476T", "GW318_WP140476T", "GW319_WP140476T", 
"GW320_WP140476Ta", "GW321_WP140476Ta", "GW322_WP140476Ta", "GW323_WP140476Ta", 
"GW324_WP140476Ta", "GW325_WP140476Ta", "GW561_WP140476Tb", "GW562_WP140476Tb", 
"GW563_WP140476Tb", "GW564_WP140476Tb", "GW565_WP140476Tb", "GW566_WP140476Tb", 
"GW340_WP140564T", "GW341_WP140564T", "GW342_WP140564T", "GW343_WP140564T", 
"GW344_WP140564T", "GW345_WP140564T", "GW532_WP140564Ta", "GW533_WP140564Ta", 
"GW534_WP140564Ta", "GW535_WP140564Ta", "GW567_WP140564Ta", "GW568_WP140564Ta", 
"GW346_WP140575T", "GW347_WP140575T", "GW348_WP140575T", "GW349_WP140575T", 
"GW350_WP140575T", "GW544_WP140575Ta", "GW545_WP140575Ta", "GW546_WP140575Ta", 
"GW547_WP140575Ta", "GW548_WP140575Ta", "GW549_WP140575Ta"), 
    group_id = c("WP980128T", "WP980128T", "WP980128T", "WP980128T", 
    "WP030680T", "WP030680T", "WP030680T", "WP030680T", "WP030680Ta", 
    "WP030680Ta", "WP030680Ta", "WP030680Ta", "WP030710T", "WP030710T", 
    "WP030710T", "WP030710T", "WP030710Ta", "WP030710Ta", "WP030710Ta", 
    "WP030710Ta", "WP030793T", "WP030793T", "WP030793T", "WP030793T", 
    "WP030793Ta", "WP030793Ta", "WP030793Ta", "WP030793Ta", "WP080553Ta", 
    "WP080553Ta", "WP080553Ta", "WP080553Ta", "WP080553Tb", "WP080553Tb", 
    "WP080553Tb", "WP080553Tb", "WP080553Tc", "WP080553Tc", "WP080553Tc", 
    "WP080553Tc", "WP100090T", "WP100090T", "WP100090T", "WP100090T", 
    "WP100090Ta", "WP100090Ta", "WP100090Ta", "WP100090Ta", "WP100090Tb", 
    "WP100090Tb", "WP100090Tb", "WP100090Tb", "WP120385Tb", "WP120385Tb", 
    "WP120385Tb", "WP120385Tb", "WP140476T", "WP140476T", "WP140476T", 
    "WP140476T", "WP140476T", "WP140476T", "WP140476Ta", "WP140476Ta", 
    "WP140476Ta", "WP140476Ta", "WP140476Ta", "WP140476Ta", "WP140476Tb", 
    "WP140476Tb", "WP140476Tb", "WP140476Tb", "WP140476Tb", "WP140476Tb", 
    "WP140564T", "WP140564T", "WP140564T", "WP140564T", "WP140564T", 
    "WP140564T", "WP140564Ta", "WP140564Ta", "WP140564Ta", "WP140564Ta", 
    "WP140564Ta", "WP140564Ta", "WP140575T", "WP140575T", "WP140575T", 
    "WP140575T", "WP140575T", "WP140575Ta", "WP140575Ta", "WP140575Ta", 
    "WP140575Ta", "WP140575Ta", "WP140575Ta"), x = c(1895418.928, 
    1895977.206, 1896640.698, 1895417.928, 1895417.928, 1895418.928, 
    1895977.206, 1896640.698, 1895417.928, 1895418.928, 1895977.206, 
    1896640.698, 1897977.552, 1898423.744, 1898465.965, 1899160.578, 
    1897977.552, 1898423.744, 1898465.965, 1899160.578, 1895573.351, 
    1894410.616, 1894913.875, 1895271.31, 1894410.616, 1894913.875, 
    1895271.31, 1895573.351, 1896495.091, 1898542.668, 1898731.391, 
    1898948.03, 1896495.091, 1898542.668, 1898731.391, 1898948.03, 
    1896495.091, 1898542.668, 1898731.391, 1898948.03, 1889390.432, 
    1889592.342, 1890299.744, 1891064.34, 1891064.34, 1889390.432, 
    1889592.342, 1890299.744, 1890299.744, 1891064.34, 1889390.432, 
    1889592.342, 1894083.58, 1894092.36, 1896352.598, 1894878, 
    1893616.553, 1894022.731, 1894410.616, 1894913.875, 1895271.31, 
    1895573.351, 1893616.553, 1894022.731, 1894410.616, 1894913.875, 
    1895271.31, 1895573.351, 1894913.875, 1893616.553, 1895271.31, 
    1894410.616, 1894022.731, 1895573.351, 1895417.928, 1895977.206, 
    1896640.698, 1896641.699, 1897384.378, 1899529.078, 1896640.698, 
    1895977.206, 1896641.699, 1897384.378, 1895417.928, 1899529.078, 
    1888029.348, 1889592.342, 1890299.744, 1891064.34, 1891768.728, 
    1890299.744, 1891768.728, 1886805.544, 1888029.348, 1891064.34, 
    1889592.342), y = c(5583799.952, 5584310.447, 5584219.537, 
    5583795.948, 5583795.948, 5583799.952, 5584310.447, 5584219.537, 
    5583795.948, 5583799.952, 5584310.447, 5584219.537, 5580035.831, 
    5580655.371, 5579716.704, 5579134.428, 5580035.831, 5580655.371, 
    5579716.704, 5579134.428, 5577577.59, 5577232.113, 5577674.523, 
    5576724.933, 5577232.113, 5577674.523, 5576724.933, 5577577.59, 
    5586371.051, 5585936.231, 5587672.549, 5585643.113, 5586371.051, 
    5585936.231, 5587672.549, 5585643.113, 5586371.051, 5585936.231, 
    5587672.549, 5585643.113, 5570931.906, 5572174.76, 5572513.115, 
    5572010.921, 5572010.921, 5570931.906, 5572174.76, 5572513.115, 
    5572513.115, 5572010.921, 5570931.906, 5572174.76, 5576273.384, 
    5577416.176, 5579074.805, 5578463, 5579767.708, 5580255.137, 
    5577232.113, 5577674.523, 5576724.933, 5577577.59, 5579767.708, 
    5580255.137, 5577232.113, 5577674.523, 5576724.933, 5577577.59, 
    5577674.523, 5579767.708, 5576724.933, 5577232.113, 5580255.137, 
    5577577.59, 5583795.948, 5584310.447, 5584219.537, 5584216.535, 
    5583557.233, 5582842.198, 5584219.537, 5584310.447, 5584216.535, 
    5583557.233, 5583795.948, 5582842.198, 5572055.392, 5572174.76, 
    5572513.115, 5572010.921, 5572459.348, 5572513.115, 5572459.348, 
    5572145.22, 5572055.392, 5572010.921, 5572174.76)), row.names = c(NA, 
-97L), class = c("tbl_df", "tbl", "data.frame"))

df1

> dput(df1)
structure(list(group_id1 = c("WP140564Ta", "WP030793Ta", "WP040177T", 
"WP140476Tb", "WP040178T", "WP040178T", "WP030793Ta", "WP040177T", 
"WP030793Ta", "WP030609Ta", "WP140476Tb", "WP040178T", "WP030793Ta", 
"WP040177T", "WP140476Tb", "WP040177T", "WP140476Tb", "WP040178T", 
"WP030710Ta", "WP140575Ta", "WP130260T", "WP140564Ta", "WP120385Tb", 
"WP080553Tc", "WP120320T", "WP030609Ta", "WP120320T", "WP140476Tb", 
"WP030609Ta", "WP140575Ta", "WP140575Ta", "WP140564Ta", "WP140564Ta", 
"WP030775Tb", "WP981038Tb", "WP180240Ta", "WP140575Ta", "WP080553Tc", 
"WP140564Ta", "WP030775Tb", "WP180240Ta", "WP981038Tb", "WP120385Tb", 
"WP030775Tb", "WP140564Ta", "WP981038Tb", "WP030609Ta", "WP981038Tb", 
"WP981038Tb", "WP180240Ta", "WP120293Ta", "WP130260T", "WP120320T", 
"WP130260T", "WP030683Ta", "WP120320T", "WP120293Ta", "WP130260T", 
"WP180240Ta", "WP120320T", "WP030775Tb", "WP030775Tb", "WP180240Ta", 
"WP140575Ta", "WP140476Tb", "WP080553Tc", "WP030710Ta", "WP030710Ta", 
"WP030710Ta", "WP120320T", "WP140575Ta", "WP120385Tb", "WP030775Tb", 
"WP080553Tc", "WP981038Tb", "WP030775Tb", "WP120385Tb", "WP120293Ta", 
"WP120293Ta", "WP030683Ta", "WP030683Ta", "WP030683Ta"), x1 = c(1896640.698, 
1894913.875, 1894913.875, 1894913.875, 1898423.744, 1897977.552, 
1895573.351, 1895573.351, 1894410.616, 1884000.938, 1893616.553, 
1898465.965, 1895271.31, 1895271.31, 1895271.31, 1894410.616, 
1894410.616, 1899160.578, 1898465.965, 1890299.744, 1906890.973, 
1895417.928, 1894083.58, 1898731.391, 1884030.965, 1884030.965, 
1884000.938, 1894022.731, 1883999.938, 1891768.728, 1886805.544, 
1895977.206, 1896641.699, 1899959.097, 1889368, 1889624.55, 1888029.348, 
1898948.03, 1899529.078, 1899915.06, 1889388.418, 1889624.55, 
1894092.36, 1899927.07, 1897384.378, 1889388.418, 1883986.921, 
1888503.953, 1889634.556, 1889264.351, 1896007.954, 1898380.5, 
1884027.955, 1899492.203, 1900158.351, 1883720.711, 1896611.299, 
1899303.201, 1889895, 1883986.921, 1899908.862, 1899916.15, 1889357, 
1891064.34, 1895573.351, 1896495.091, 1899160.578, 1898423.744, 
1897977.552, 1883999.938, 1889592.342, 1896352.598, 1899938.084, 
1898542.668, 1889264.351, 1899924.074, 1894878, 1893939.51, 1896548.228, 
1899625.914, 1899871.286, 1899965.508), y1 = c(5584219.537, 5577674.523, 
5577674.523, 5577674.523, 5580655.371, 5580035.831, 5577577.59, 
5577577.59, 5577232.113, 5576264.299, 5579767.708, 5579716.704, 
5576724.933, 5576724.933, 5576724.933, 5577232.113, 5577232.113, 
5579134.428, 5579716.704, 5572513.115, 5568457.895, 5583795.948, 
5576273.384, 5587672.549, 5576228.283, 5576228.283, 5576264.299, 
5580255.137, 5576264.299, 5572459.348, 5572145.22, 5584310.447, 
5584216.535, 5567653.364, 5564346, 5564890.01, 5572055.392, 5585643.113, 
5582842.198, 5567693.386, 5564756.889, 5564890.01, 5577416.176, 
5567683.38, 5583557.233, 5564756.889, 5576304.322, 5564069.317, 
5564890.012, 5564669.815, 5570344.641, 5570933.436, 5576266.306, 
5571415.958, 5573380.472, 5576500.39, 5570774.037, 5570587.34, 
5564942, 5576304.322, 5567687.778, 5567676.036, 5564340, 5572010.921, 
5577577.59, 5586371.051, 5579134.428, 5580655.371, 5580035.831, 
5576264.299, 5572174.76, 5579074.805, 5567635.348, 5585936.231, 
5564669.815, 5567641.35, 5578463, 5570575.456, 5570945.145, 5573723.626, 
5572550.829, 5571596.163), well_id = c("Well 1518", "Well 1880", 
"Well 1880", "Well 1880", "Well 2246", "Well 2277", "Well 1452", 
"Well 1452", "Well 2043", "Well 2747", "Well 2219", "Well 2278", 
"Well 1881", "Well 1881", "Well 1881", "Well 2043", "Well 2043", 
"Well 2242", "Well 2278", "Well 3774", "Well 2830", "Well 4122", 
"Well 4830", "Well 4295", "Well 4411", "Well 4411", "Well 2747", 
"Well 2933", "Well 3092", "Well 4882", "Well 3434", "Well 3870", 
"Well 4593", "Well 5617", "Well 6715", "Well 6720", "Well 4489", 
"Well 4672", "Well 5167", "Well 15108", "Well 15458", "Well 6720", 
"Well 4994", "Well 15109", "Well 5497", "Well 15458", "SW 214", 
"Well 6716", "Well 6721", "Well 6723", "SW 754", "SW 854", "SW 223", 
"SW 928", "SW 962", "Well 16053", "SW 787", "SW 921", "Well 16813", 
"SW 213", "Well 16892", "Well 16893", "Well 16930", "Well 1402", 
"Well 1452", "Well 2160", "Well 2242", "Well 2246", "Well 2277", 
"Well 3092", "Well 3722", "Well 4764", "Well 5676", "Well 5723", 
"Well 6723", "Well 15107", "Well 16817", "SW 661", "SW 782", 
"SW 941", "SW 950", "SW 954")), class = "data.frame", row.names = c(NA, 
-82L))

我认为您正在寻找空间连接,而不是 cbind / left_join

library(tidyverse)
library(sf)

# Make df & df1 sf objects, and keep the coordinates as columns just in case.
df <- df %>% st_as_sf(coords = c("x", "y"), remove = FALSE) %>%
  st_set_crs(2193)
df1 <- df1 %>% st_as_sf(coords = c("x1", "y1"), remove = FALSE) %>%
  st_set_crs(2193)

# Join df with df1, based on the nearest feature:
df_near <- st_join(df, df1, join = st_nearest_feature)

df_near
Simple feature collection with 97 features and 8 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 1886806 ymin: 5570932 xmax: 1899529 ymax: 5587673
Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
# A tibble: 97 × 9
   UniqueIDs       group_id          x        y          geometry group_id1        x1       y1 well_id  
 * <chr>           <chr>         <dbl>    <dbl>       <POINT [m]> <chr>         <dbl>    <dbl> <chr>    
 1 GW427_WP980128T WP980128T  1895419. 5583800. (1895419 5583800) WP140564Ta 1895418. 5583796. Well 4122
 2 GW428_WP980128T WP980128T  1895977. 5584310. (1895977 5584310) WP140564Ta 1895977. 5584310. Well 3870
 3 GW429_WP980128T WP980128T  1896641. 5584220. (1896641 5584220) WP140564Ta 1896641. 5584220. Well 1518

df 的第 51 行似乎是唯一一个不直接位于 df1 的任何点之上的行。这是加入后的样子:

df_near[51,]
Simple feature collection with 1 feature and 8 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: 1889390 ymin: 5570932 xmax: 1889390 ymax: 5570932
Projected CRS: NZGD2000 / New Zealand Transverse Mercator 2000
# A tibble: 1 × 9
  UniqueIDs        group_id          x        y          geometry group_id1        x1       y1 well_id  
  <chr>            <chr>         <dbl>    <dbl>       <POINT [m]> <chr>         <dbl>    <dbl> <chr>    
1 GW505_WP100090Tb WP100090Tb 1889390. 5570932. (1889390 5570932) WP140575Ta 1889592. 5572175. Well 3722

以及显示最近点之间连接的图表。只有一个,因为除一个之外的所有 df 都在 df1 之上。

df 红色,df1 黑色,连接绿色。

library(nngeo)
ggplot() + 
  geom_sf(data = df, color = 'red', size = 3) + 
  geom_sf(data = df1, color = 'black', alpha = .6) + 
  theme_void() + 
  geom_sf(data = st_connect(df, df1), color = 'green')