为递归提取创建表达式

Creating expressions for recursive extraction

真题

我想写一个小函数,让我可以根据语法糖灵活地为列表或环境编写递归提取表达式[[ and/or $:

x <- list(a = list(b = 1))

> x[["a"]][["b"]]
[1] 1

我如何通过使用 substitute 而不是 parseeval 的组合来做到这一点(因为这真的很慢)?

我试过的

我知道如何通过组合 parseeval 来做到这一点,但这真的很慢:

foo <- function(idx, obj = character(), sep = c("[['", "']][['", "']]")) {
  out <- paste0(sep[1], paste(idx, collapse = sep[2]), sep[3])
  if (length(obj)) {
    out <- paste0(obj, out)
  }
  out
}
> foo(c("a", "b"), "x")
[1] "x[['a']][['b']]"

expr <- parse(text = foo(c("a", "b"), "x"))

> expr
expression(x[['a']][['b']])

> eval(expr)
[1] 1

所以我转向substitute。虽然这适用于我想编写 R 表达式的大部分任务,但我不知道如何将它用于此 "nested extraction" 任务:

expr <- substitute(assign(X, VALUE),
  list(X = "x_2", VALUE = as.name("letters")))

> expr
assign("x_2", letters)

eval(expr)
> x_2
 [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u"
[22] "v" "w" "x" "y" "z"

expr <- substitute(FUN(X, INDEX),
  list(FUN = as.name('[['), X = as.name("x"), INDEX = "a"))

> expr
x[["a"]]

> eval(expr)
$b
[1] 1

基准测试

这是计算成本的比较:

# install.packages("microbenchmark")
require(microbenchmark)
res <- microbenchmark(
  "1" = {
    expr <- substitute(assign(X, VALUE),
      list(X = "x_2", VALUE = as.name("letters")))
    eval(expr)
  },
  "2" = {
    expr <- substitute(FUN(X, INDEX),
      list(FUN = as.name('[['), X = as.name("x"), INDEX = "a"))
    eval(expr)
  },
  "3" = {
    expr <- parse(text = foo(c("a", "b"), "x"))
    eval(expr)
  }
)

> res
Unit: microseconds
 expr     min       lq      mean  median       uq     max neval
    1   3.519   4.1060   6.12306   4.839   5.7185  59.528   100
    2   2.639   3.5190   4.41350   4.105   4.6920  20.820   100
    3 196.765 204.6825 220.33854 212.600 222.8630 505.547   100

更新

我想出了一个函数,它可以利用 substitute,同时还能处理递归方面的任务:

bar <- function(idx, obj) {
  for (ii in 1:length(idx)) {
    if (ii == 1) {
      X <- as.name("obj")
    } else {
      X <- expr
    }
    INDEX <- idx[ii]
    expr <- substitute(FUN(X, INDEX),
      list(FUN = as.name('[['), X = X, INDEX = INDEX))
  }
  expr
}

然后 Gabor 给出了他的答案,让我意识到我认为出于某种无法解释的原因,我以前从未使用过 [[ 和多个索引值。我只为 [ 这样做了。有点尴尬;-)。不过,这是综合比较:

require(microbenchmark)
res <- microbenchmark(
  "1" = {
    expr <- substitute(assign(X, VALUE),
      list(X = "x_2", VALUE = as.name("letters")))
    eval(expr)
  },
  "2" = {
    expr <- substitute(FUN(X, INDEX),
      list(FUN = as.name('[['), X = as.name("x"), INDEX = "a"))
    eval(expr)
  },
  "3" = {
    expr <- parse(text = foo(c("a", "b"), "x"))
    eval(expr)
  },
  "4" = {
    expr <- bar(c("a", "b"), x)
    eval(expr)
  },
  "5" = {
    expr <- substitute(FUN(X, INDEX),
      list(FUN = as.name('[['), X = as.name("x"), INDEX = c("a", "b")))
    eval(expr)
  }
)

>   res
Unit: microseconds
 expr     min       lq      mean   median      uq      max neval
    1   3.519   4.3990   5.59530   4.9855   5.865   21.700   100
    2   2.639   3.5190   4.53380   3.8130   4.692   30.790   100
    3 174.772 182.2490 212.85493 196.3250 207.615 1558.576   100
    4  13.489  14.9555  19.50659  17.1550  21.700   44.573   100
    5   3.226   4.1050   4.85050   4.3990   5.279   13.490   100

尝试使用问题中的 x

 x[[c("a", "b")]]
 ## [1] 1