R : 在这种情况下如何使用 apply 来加速函数?

R : How to use apply in this case to speed up the function?

我正在尝试计算点与每个点的最近多边形之间的距离。我目前正在使用函数 st_distance(库 sf),这似乎是最快的方法。但这仍然需要很多时间。 这就是为什么我想将我正在使用的循环更改为应用程序或以更快的方式执行此操作的原因。有人可以帮我做吗? 谢谢!

## Importation of shapefiles
# library(rgdal)
# pathToShp = "J:/shp_files/"
# points = readOGR(dsn = pathToShp, layer="points_2154", stringsAsFactors=FALSE)   #Points in EPSG 2154 Lambert
# polygons = readOGR(dsn = pathToShp, layer="polygons_2154", stringsAsFactors=FALSE)   #Polygons

library(sf)
# points_sf = st_as_sf(points)
# polygons_sf = st_as_sf(polygons)

## Search the closest polygon for each point
point_polygon <- c()
point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)     # ID of the closest polygon for each point

## Distance between each point and the closest polygon
dist_sf <- c()
for (i in 1:nrow(points_sf)) {
  dist_sf[i] = st_distance(points_sf[i,], 
                           polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                           by_element = TRUE)    
}

您应该获得:

dist_sf
# [1] 514830.0 260656.0 260647.7 260653.5 262053.6

数据

points_sf <- structure(list(field_1 = c("1", "2", "3", "4", "5"), adresse = c("6 RUE DES VIGNES, 40140 SOUSTONS, France", 
"22 RUE DE PARIS, 03000 MOULINS, France", "5 RUE REGNAUDIN, 03000 MOULINS, France", 
"31 RUE DE PARIS, 03000 MOULINS, France", "15 RUE DES RAMIERS, 85360 LA TRANCHE SUR MER, France"
), latitude = c(43.75395, 46.56875, 46.56893, 46.56873, 46.35638
), longitude = c(-1.31277, 3.330296, 3.330394, 3.330224, -1.470842
), geometry = structure(list(structure(c(352768.516216819, 6304476.86420524
), class = c("XY", "POINT", "sfg")), structure(c(725298.307259582, 
6607688.02981763), class = c("XY", "POINT", "sfg")), structure(c(725305.729670888, 
6607708.05130113), class = c("XY", "POINT", "sfg")), structure(c(725292.801896427, 
6607685.78563239), class = c("XY", "POINT", "sfg")), structure(c(356412.817813797, 
6593779.89675049), class = c("XY", "POINT", "sfg"))), class = c("sfc_POINT", 
"sfc"), precision = 0, bbox = structure(c(xmin = 352768.516216819, 
ymin = 6304476.86420524, xmax = 725305.729670888, ymax = 6607708.05130113
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), sf_column = "geometry", agr = structure(c(field_1 = NA_integer_, 
adresse = NA_integer_, latitude = NA_integer_, longitude = NA_integer_
), .Label = c("constant", "aggregate", "identity"), class = "factor"), row.names = c(NA, 
5L), class = c("sf", "data.frame"))

polygons_sf <- structure(list(ID = c("M1204300", "E6490620", "E4240850"), geometry = structure(list(
    structure(list(structure(c(533957.599997006, 534047.299997008, 
    534171.89999701, 534191.69999701, 534226.099997011, 534270.599997012, 
    534325.099997013, 534369.799997014, 534449.399997015, 534549.199997017, 
    534674.099997019, 534924.099997023, 535084.299997026, 535174.199997028, 
    535239.099997029, 535293.89999703, 535323.599997031, 6786523.09989417, 
    6786492.39989417, 6786461.39989417, 6786436.19989417, 6786370.99989417, 
    6786305.69989417, 6786255.19989417, 6786219.89989417, 6786184.19989417, 
    6786163.39989417, 6786162.39989417, 6786185.29989417, 6786218.89989417, 
    6786218.19989417, 6786207.69989417, 6786182.19989417, 6786156.99989417
    ), .Dim = c(17L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg")), structure(list(structure(c(608743.099998312, 608792.899998313, 
    608827.799998314, 608847.799998314, 608867.999998314, 608918.699998315, 
    608974.499998316, 609015.299998317, 609071.299998318, 609086.499998318, 
    609106.399998319, 609156.19999832, 609181.59999832, 609197.09999832, 
    609202.299998321, 609217.599998321, 609257.999998322, 609273.299998322, 
    609324.099998323, 609354.699998323, 7003205.49989546, 7003185.09989545, 
    7003164.79989545, 7003169.69989546, 7003194.49989546, 7003278.99989546, 
    7003378.49989546, 7003478.09989546, 7003597.59989546, 7003623.39989546, 
    7003618.29989546, 7003592.89989546, 7003642.59989546, 7003702.49989546, 
    7003732.39989546, 7003767.29989546, 7003816.89989546, 7003856.79989546, 
    7003946.29989546, 7004020.99989546), .Dim = c(20L, 2L))), class = c("XY", 
    "MULTILINESTRING", "sfg")), structure(list(structure(c(669193.399999424, 
    669183.499999424, 669153.399999423, 669097.999999422, 669077.999999422, 
    669048.599999421, 7097101.79989609, 7097111.89989609, 7097102.09989609, 
    7097047.59989609, 7097052.79989609, 7097123.99989609), .Dim = c(6L, 
    2L)), structure(c(669048.599999421, 669022.899999421, 668953.19999942, 
    668888.899999418, 668854.499999418, 668809.899999417, 668790.299999417, 
    668740.899999416, 668721.199999415, 668656.799999414, 668637.199999414, 
    668618.099999413, 7097123.99989609, 7097149.19989609, 7097189.79989609, 
    7097265.29989609, 7097340.49989609, 7097385.89989609, 7097430.99989609, 
    7097496.39989609, 7097532.59989609, 7097598.09989609, 7097653.1998961, 
    7097758.39989609), .Dim = c(12L, 2L)), structure(c(668618.099999413, 
    668598.799999413, 668553.799999412, 668519.299999411, 668435.09999941, 
    668335.899999408, 668159.599999405, 7097758.39989609, 7097833.49989609, 
    7097949.7998961, 7098010.0998961, 7098095.7998961, 7098191.4998961, 
    7098459.3998961), .Dim = c(7L, 2L))), class = c("XY", "MULTILINESTRING", 
    "sfg"))), class = c("sfc_MULTILINESTRING", "sfc"), precision = 0, bbox = structure(c(xmin = 533957.599997006, 
ymin = 6786156.99989417, xmax = 669193.399999424, ymax = 7098459.3998961
), class = "bbox"), crs = structure(list(epsg = NA_integer_, 
    proj4string = "+proj=lcc +lat_1=49 +lat_2=44 +lat_0=46.5 +lon_0=3 +x_0=700000 +y_0=6600000 +ellps=GRS80 +units=m +no_defs"), class = "crs"), n_empty = 0L)), row.names = 0:2, class = c("sf", 
"data.frame"), sf_column = "geometry", agr = structure(c(ID = NA_integer_), class = "factor", .Label = c("constant", 
"aggregate", "identity")))

这个:

apply(st_distance(points_sf, polygons_sf), 1, min)

似乎是最快的选择。虽然原生 sf 版本并没有慢多少。 实际时间见下文

library(microbenchmark)

microbenchmark(
    loop = {
        point_polygon = st_join(points_sf, polygons_sf, join = st_nearest_feature)
        ## Distance between each point and the closest polygon
        dist_sf <- c()
        for (i in 1:nrow(points_sf)) {
            dist_sf[i] = st_distance(points_sf[i,], 
                                     polygons_sf[polygons_sf$ID == point_polygon$ID[i], ], 
                                     by_element = TRUE)    
        }
    },
    apply = { apply(st_distance(points_sf, polygons_sf), 1, min) },
    native = {
        polys = polygons_sf[st_nearest_feature(points_sf, polygons_sf), ]
        st_length(st_nearest_points(points_sf, polys, pairwise = TRUE))
    },
    dt = {
        dist = as.data.table(st_distance(points_sf, polygons_sf))
        dist[, pmin(V1, V2, V3)]
    },
    times = 10
)


Unit: milliseconds
   expr     min       lq      mean   median       uq     max neval  cld
   loop 29.2660 30.36030 32.092494 30.95950 32.97390 42.5732   100    d
  apply  2.7579  2.90365  3.124069  2.96670  3.20515  5.0635   100 a   
 native  3.9875  4.13340  4.566414  4.24310  4.55095 11.9232   100   c 
     dt  3.4089  3.57920  3.838198  3.66055  3.93795  8.6983   100  b