在每个组中找到最近的地理点
Finding the closest geographical point within each group
我正在尝试为每个“年”组中的每个银行找到最近点(在我的例子中是最近的银行)。我试过使用 st_distance,但没有成功。
这是我的数据示例:
Year geometry bank Location
7 1838 POINT (759859.6 -728345) Bank of Mobile Mobile
43 1838 POINT (779861.1 -445454.7) Bank of the State of Alabama Tuscaloosa
58 1838 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
59 1841 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
60 1842 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
67 1838 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
68 1841 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
69 1842 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
79 1838 POINT (759859.6 -728345) Bank of the State of Alabama, branch Mobile
91 1838 POINT (905601.7 -526517.9) Bank of the State of Alabama, branch Montgomery
我想知道的是,例如,分别在 1838 年、1841 年和 1842 年分别离“迪凯特”中的“阿拉巴马州银行分行”最近的银行是哪家。
对于某些银行,到另一家银行的最小距离应为零,因为同一家银行中有多家银行 town/city(例如 1838 年的“移动银行”)。
这是一个可重现的例子:
S1 <- structure(list(Year = c("1838", "1838", "1838", "1841", "1842",
"1838", "1841", "1842", "1838", "1838", "1841", "1842", "1838"
), geometry = structure(list(structure(c(759859.587499541, -728344.968108917
), class = c("XY", "POINT", "sfg")), structure(c(779861.05153977,
-445454.703808866), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(905601.700907393,
-526517.946820037), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(336877.092499058,
-301769.66275037), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 336877.092499058,
ymin = -728344.968108917, xmax = 905601.700907393, ymax = -267830.684434561
), class = "bbox"), crs = structure(list(epsg = NA_integer_, proj4string = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"), class = "crs"), n_empty = 0L),
bank = c("Bank of Mobile", "Bank of the State of Alabama",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch", "Planters & Merchants Bank", "Planters & Merchants Bank",
"Bank of the State of Arkansas"), Location = c("Mobile",
"Tuscaloosa", "Decatur", "Decatur", "Decatur", "Huntsville",
"Huntsville", "Huntsville", "Mobile", "Montgomery", "Mobile",
"Mobile", "Little Rock")), sf_column = "geometry", agr = structure(c(Year = NA_integer_,
bank = NA_integer_, Location = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")), row.names = c(7L, 43L, 58L, 59L, 60L,
67L, 68L, 69L, 79L, 91L, 116L, 117L, 130L), class = c("sf", "data.frame"
))
这是一个带有参数的函数,允许您按年份、银行名称和位置选择银行。它将 return 具有该位置的 sf
对象和距离 data.frame 最近的位置,该对象来自同一年或更晚的名称和位置。
您可能需要更改 index2 的逻辑,具体取决于您要查找的内容。
dist_by_year <- function(x = S1, year = 1841,
bank = 'Bank of the State of Alabama, branch',
loc = 'Decatur'){
x$Year <- as.numeric(x$Year)
index1 <- which(x$bank == bank & x$Year == year & x$Location == loc)
index2 <- which((x$bank != bank & x$Location != loc)& x$Year >= year )
index3 <- st_nearest_feature(x[index1,], x[index2,])
closest <- x[index2,][index3,]
closest$id <- 'closest'
target <- x[index1,]
target$id <- 'target'
rbind(closest, target)
}
Returns:
> dist_by_year()
Simple feature collection with 2 features and 4 fields
geometry type: POINT
dimension: XY
bbox: xmin: 759859.6 ymin: -728345 xmax: 819114.6 ymax: -285180.1
CRS: +proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
Year bank Location id geometry
116 1841 Planters & Merchants Bank Mobile closest POINT (759859.6 -728345)
59 1841 Bank of the State of Alabama, branch Decatur target POINT (819114.6 -285180.1)
可以通过管道传输到 st_distance
实际距离:
> dist_by_year() %>% st_distance()
Units: [m]
[,1] [,2]
[1,] 0.0 447108.8
[2,] 447108.8 0.0
以下是我解决问题的方法,以供日后参考。它给出了到 10 个最近点(银行)的距离:
for (i in 1830:1860) {
d <- S1c[which(S1c$Date==i),]
d1 <- st_nn(d, d, returnDist=T, k=11, maxdist=200000) #maximum distance 200km
for(k in 1:11) {
d[[paste0("neigbor_id_", k)]] = sapply(d1[[1]], "[", k)
d[[paste0("neighbor_", k)]] = sapply(d1[[2]], "[", k)
} #Merge output from st_nn with original data
assign(paste("d_", i, sep = ""), d) #Generates files for each years
}
#Row bind
d <- rbind(d_1830, d_1831, d_1832, d_1833, d_1834, d_1835, d_1836, d_1837, d_1838, d_1839, d_1840, d_1841, d_1842, d_1843, d_1844, d_1845, d_1846, d_1847, d_1848, d_1849, d_1850, d_1851, d_1852, d_1853, d_1854, d_1855, d_1856, d_1857, d_1858, d_1859, d_1860)
我正在尝试为每个“年”组中的每个银行找到最近点(在我的例子中是最近的银行)。我试过使用 st_distance,但没有成功。
这是我的数据示例:
Year geometry bank Location
7 1838 POINT (759859.6 -728345) Bank of Mobile Mobile
43 1838 POINT (779861.1 -445454.7) Bank of the State of Alabama Tuscaloosa
58 1838 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
59 1841 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
60 1842 POINT (819114.6 -285180.1) Bank of the State of Alabama, branch Decatur
67 1838 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
68 1841 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
69 1842 POINT (853709.4 -267830.7) Bank of the State of Alabama, branch Huntsville
79 1838 POINT (759859.6 -728345) Bank of the State of Alabama, branch Mobile
91 1838 POINT (905601.7 -526517.9) Bank of the State of Alabama, branch Montgomery
我想知道的是,例如,分别在 1838 年、1841 年和 1842 年分别离“迪凯特”中的“阿拉巴马州银行分行”最近的银行是哪家。
对于某些银行,到另一家银行的最小距离应为零,因为同一家银行中有多家银行 town/city(例如 1838 年的“移动银行”)。
这是一个可重现的例子:
S1 <- structure(list(Year = c("1838", "1838", "1838", "1841", "1842",
"1838", "1841", "1842", "1838", "1838", "1841", "1842", "1838"
), geometry = structure(list(structure(c(759859.587499541, -728344.968108917
), class = c("XY", "POINT", "sfg")), structure(c(779861.05153977,
-445454.703808866), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(819114.617375314,
-285180.101652618), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(853709.422752713,
-267830.684434561), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(905601.700907393,
-526517.946820037), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(759859.587499541,
-728344.968108917), class = c("XY", "POINT", "sfg")), structure(c(336877.092499058,
-301769.66275037), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT",
"sfc"), precision = 0, bbox = structure(c(xmin = 336877.092499058,
ymin = -728344.968108917, xmax = 905601.700907393, ymax = -267830.684434561
), class = "bbox"), crs = structure(list(epsg = NA_integer_, proj4string = "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs"), class = "crs"), n_empty = 0L),
bank = c("Bank of Mobile", "Bank of the State of Alabama",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch",
"Bank of the State of Alabama, branch", "Bank of the State of Alabama, branch", "Planters & Merchants Bank", "Planters & Merchants Bank",
"Bank of the State of Arkansas"), Location = c("Mobile",
"Tuscaloosa", "Decatur", "Decatur", "Decatur", "Huntsville",
"Huntsville", "Huntsville", "Mobile", "Montgomery", "Mobile",
"Mobile", "Little Rock")), sf_column = "geometry", agr = structure(c(Year = NA_integer_,
bank = NA_integer_, Location = NA_integer_), class = "factor", .Label = c("constant",
"aggregate", "identity")), row.names = c(7L, 43L, 58L, 59L, 60L,
67L, 68L, 69L, 79L, 91L, 116L, 117L, 130L), class = c("sf", "data.frame"
))
这是一个带有参数的函数,允许您按年份、银行名称和位置选择银行。它将 return 具有该位置的 sf
对象和距离 data.frame 最近的位置,该对象来自同一年或更晚的名称和位置。
您可能需要更改 index2 的逻辑,具体取决于您要查找的内容。
dist_by_year <- function(x = S1, year = 1841,
bank = 'Bank of the State of Alabama, branch',
loc = 'Decatur'){
x$Year <- as.numeric(x$Year)
index1 <- which(x$bank == bank & x$Year == year & x$Location == loc)
index2 <- which((x$bank != bank & x$Location != loc)& x$Year >= year )
index3 <- st_nearest_feature(x[index1,], x[index2,])
closest <- x[index2,][index3,]
closest$id <- 'closest'
target <- x[index1,]
target$id <- 'target'
rbind(closest, target)
}
Returns:
> dist_by_year()
Simple feature collection with 2 features and 4 fields
geometry type: POINT
dimension: XY
bbox: xmin: 759859.6 ymin: -728345 xmax: 819114.6 ymax: -285180.1
CRS: +proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
Year bank Location id geometry
116 1841 Planters & Merchants Bank Mobile closest POINT (759859.6 -728345)
59 1841 Bank of the State of Alabama, branch Decatur target POINT (819114.6 -285180.1)
可以通过管道传输到 st_distance
实际距离:
> dist_by_year() %>% st_distance()
Units: [m]
[,1] [,2]
[1,] 0.0 447108.8
[2,] 447108.8 0.0
以下是我解决问题的方法,以供日后参考。它给出了到 10 个最近点(银行)的距离:
for (i in 1830:1860) {
d <- S1c[which(S1c$Date==i),]
d1 <- st_nn(d, d, returnDist=T, k=11, maxdist=200000) #maximum distance 200km
for(k in 1:11) {
d[[paste0("neigbor_id_", k)]] = sapply(d1[[1]], "[", k)
d[[paste0("neighbor_", k)]] = sapply(d1[[2]], "[", k)
} #Merge output from st_nn with original data
assign(paste("d_", i, sep = ""), d) #Generates files for each years
}
#Row bind
d <- rbind(d_1830, d_1831, d_1832, d_1833, d_1834, d_1835, d_1836, d_1837, d_1838, d_1839, d_1840, d_1841, d_1842, d_1843, d_1844, d_1845, d_1846, d_1847, d_1848, d_1849, d_1850, d_1851, d_1852, d_1853, d_1854, d_1855, d_1856, d_1857, d_1858, d_1859, d_1860)