不,map() 或 apply() 不能系统地替换 for() 循环,在 R
No, map() or apply() cannot systematically replace for() loop, in R
我有以下矢量 a = c(1,0,0,1,0,0,0,0)
。我想获得以下一个:c(1,2,3,1,2,3,4,5)
。即如果a[i] == 1
,则让其为1,否则a[i]
应为a[i-1] + 1
。
用 for 做到这一点很容易:
for (i in seq(1,length(a))) {
a[i] = ifelse(a[i]==1,1,a[i-1]+1)
a
[1] 1 2 3 1 2 3 4 5
循环有效是因为 for() 直接在全局环境中更改了向量。
map() (或 apply() 函数)通常被认为是比循环更好的选择。现在在那种情况下,它根本无法完成这项工作,因为它不会更改全局环境中的对象。
b = c(1,0,0,1,0,0,0,0)
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <- ifelse(b[x]==1, b[x], b[x-1]+1)})
b
[1] 1 0 0 1 0 0 0 0
这里怎么用map()?
使用purrr::walk
:
library(purrr)
b = c(1,0,0,1,0,0,0,0)
walk(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
b
#> [1] 1 2 3 1 2 3 4 5
甚至使用 purrr:map
:
library(purrr)
b = c(1,0,0,1,0,0,0,0)
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] 3
#>
#> [[4]]
#> [1] 1
#>
#> [[5]]
#> [1] 2
#>
#> [[6]]
#> [1] 3
#>
#> [[7]]
#> [1] 4
#>
#> [[8]]
#> [1] 5
b
#> [1] 1 2 3 1 2 3 4 5
没有循环或映射:
cs <- cumsum(a)
seq_along(cs) - match(cs, cs) + 1
## [1] 1 2 3 1 2 3 4 5
或
ave(a, cumsum(a), FUN = seq_along)
## [1] 1 2 3 1 2 3 4 5
Reduce
也有效:
Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5
或
Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5
基准
library(purrr)
library(microbenchmark)
a <- c(1, 0, 0, 1, 0, 0, 0, 0)
microbenchmark(
A = { cs <- cumsum(a); seq_along(cs) - match(cs, cs) + 1},
B = ave(a, cumsum(a), FUN = seq_along),
C = Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE),
D = Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE),
E = { b <- a; walk(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
},
F = { b <- a;
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
}
)
给予:
Unit: microseconds
expr min lq mean median uq max neval cld
A 6.3 10.65 15.810 13.80 19.80 57.9 100 a
B 305.3 326.40 397.376 396.35 406.05 977.0 100 a
C 41.4 48.25 126.469 57.45 71.70 6597.0 100 a
D 49.5 58.45 137.924 67.40 80.85 6452.5 100 a
E 133.2 183.15 4796.402 4397.95 8720.40 35341.7 100 b
F 131.8 174.10 6070.244 4385.25 8756.20 139827.0 100 b
备注
a <- c(1, 0, 0, 1, 0, 0, 0, 0)
我有以下矢量 a = c(1,0,0,1,0,0,0,0)
。我想获得以下一个:c(1,2,3,1,2,3,4,5)
。即如果a[i] == 1
,则让其为1,否则a[i]
应为a[i-1] + 1
。
用 for 做到这一点很容易:
for (i in seq(1,length(a))) {
a[i] = ifelse(a[i]==1,1,a[i-1]+1)
a
[1] 1 2 3 1 2 3 4 5
循环有效是因为 for() 直接在全局环境中更改了向量。 map() (或 apply() 函数)通常被认为是比循环更好的选择。现在在那种情况下,它根本无法完成这项工作,因为它不会更改全局环境中的对象。
b = c(1,0,0,1,0,0,0,0)
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <- ifelse(b[x]==1, b[x], b[x-1]+1)})
b
[1] 1 0 0 1 0 0 0 0
这里怎么用map()?
使用purrr::walk
:
library(purrr)
b = c(1,0,0,1,0,0,0,0)
walk(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
b
#> [1] 1 2 3 1 2 3 4 5
甚至使用 purrr:map
:
library(purrr)
b = c(1,0,0,1,0,0,0,0)
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
#> [[1]]
#> [1] 1
#>
#> [[2]]
#> [1] 2
#>
#> [[3]]
#> [1] 3
#>
#> [[4]]
#> [1] 1
#>
#> [[5]]
#> [1] 2
#>
#> [[6]]
#> [1] 3
#>
#> [[7]]
#> [1] 4
#>
#> [[8]]
#> [1] 5
b
#> [1] 1 2 3 1 2 3 4 5
没有循环或映射:
cs <- cumsum(a)
seq_along(cs) - match(cs, cs) + 1
## [1] 1 2 3 1 2 3 4 5
或
ave(a, cumsum(a), FUN = seq_along)
## [1] 1 2 3 1 2 3 4 5
Reduce
也有效:
Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5
或
Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE)
## [1] 1 2 3 1 2 3 4 5
基准
library(purrr)
library(microbenchmark)
a <- c(1, 0, 0, 1, 0, 0, 0, 0)
microbenchmark(
A = { cs <- cumsum(a); seq_along(cs) - match(cs, cs) + 1},
B = ave(a, cumsum(a), FUN = seq_along),
C = Reduce(\(x, y) if (y) 1 else x+1, a, acc = TRUE),
D = Reduce(\(x, y) y + (x + 1) * !y, a, acc = TRUE),
E = { b <- a; walk(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
},
F = { b <- a;
map(
.x = seq(1,(length(b))),
.f = function(x) {
b[x] <<- ifelse(b[x]==1, b[x], b[x-1]+1)})
}
)
给予:
Unit: microseconds
expr min lq mean median uq max neval cld
A 6.3 10.65 15.810 13.80 19.80 57.9 100 a
B 305.3 326.40 397.376 396.35 406.05 977.0 100 a
C 41.4 48.25 126.469 57.45 71.70 6597.0 100 a
D 49.5 58.45 137.924 67.40 80.85 6452.5 100 a
E 133.2 183.15 4796.402 4397.95 8720.40 35341.7 100 b
F 131.8 174.10 6070.244 4385.25 8756.20 139827.0 100 b
备注
a <- c(1, 0, 0, 1, 0, 0, 0, 0)