不,map() 或 apply() 不能系统地替换 for() 循环,在 R

No, map() or apply() cannot systematically replace for() loop, in R

我有以下矢量 a = c(1,0,0,1,0,0,0,0)。我想获得以下一个:c(1,2,3,1,2,3,4,5)。即如果a[i] == 1,则让其为1,否则a[i]应为a[i-1] + 1。 用 for 做到这一点很容易:

for (i in seq(1,length(a))) {
  a[i] = ifelse(a[i]==1,1,a[i-1]+1)
a
 [1] 1 2 3 1 2 3 4 5

循环有效是因为 for() 直接在全局环境中更改了向量。 map() (或 apply() 函数)通常被认为是比循环更好的选择。现在在那种情况下,它根本无法完成这项工作,因为它不会更改全局环境中的对象。

b = c(1,0,0,1,0,0,0,0)

map(
  .x = seq(1,(length(b))),
  .f = function(x) {
    b[x] <- ifelse(b[x]==1, b[x], b[x-1]+1)})
b
[1] 1 0 0 1 0 0 0 0

这里怎么用map()?

使用purrr::walk:

library(purrr)

b = c(1,0,0,1,0,0,0,0)

walk(
  .x = seq(1,(length(b))),
  .f = function(x) {
    b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
b

#> [1] 1 2 3 1 2 3 4 5

甚至使用 purrr:map:

library(purrr)

b = c(1,0,0,1,0,0,0,0)

map(
  .x = seq(1,(length(b))),
  .f = function(x) {
    b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})

#> [[1]]
#> [1] 1
#> 
#> [[2]]
#> [1] 2
#> 
#> [[3]]
#> [1] 3
#> 
#> [[4]]
#> [1] 1
#> 
#> [[5]]
#> [1] 2
#> 
#> [[6]]
#> [1] 3
#> 
#> [[7]]
#> [1] 4
#> 
#> [[8]]
#> [1] 5

b

#> [1] 1 2 3 1 2 3 4 5

没有循环或映射:

cs <- cumsum(a)
seq_along(cs) - match(cs, cs) + 1
## [1] 1 2 3 1 2 3 4 5

ave(a, cumsum(a), FUN = seq_along)
## [1] 1 2 3 1 2 3 4 5

Reduce 也有效:

Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5

Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5

基准

library(purrr)
library(microbenchmark)

a <- c(1, 0, 0, 1, 0, 0, 0, 0)

microbenchmark(
  A = { cs <- cumsum(a); seq_along(cs) - match(cs, cs) + 1},
  B = ave(a, cumsum(a), FUN = seq_along),
  C = Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE),
  D = Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE),
  E = { b <- a; walk(
  .x = seq(1,(length(b))),
  .f = function(x) {
    b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
  },
  F = { b <- a;
    map(
     .x = seq(1,(length(b))),
     .f = function(x) {
      b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
  }
)

给予:

Unit: microseconds
 expr   min     lq     mean  median      uq      max neval cld
    A   6.3  10.65   15.810   13.80   19.80     57.9   100  a 
    B 305.3 326.40  397.376  396.35  406.05    977.0   100  a 
    C  41.4  48.25  126.469   57.45   71.70   6597.0   100  a 
    D  49.5  58.45  137.924   67.40   80.85   6452.5   100  a 
    E 133.2 183.15 4796.402 4397.95 8720.40  35341.7   100   b
    F 131.8 174.10 6070.244 4385.25 8756.20 139827.0   100   b

备注

a <- c(1, 0, 0, 1, 0, 0, 0, 0)