如何使用非标准评估 NSE 评估 data.table 上的参数?
How to use non-standard evaluation NSE to evaluate arguments on data.table?
假设我有以下内容
library(data.table)
cars1 = setDT(copy(cars))
cars2 = setDT(copy(cars))
car_list = list(cars1, cars2)
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
code = rlang::enquos(...)
cars1 = x[[1]]
rlang::eval_tidy(quo(cars1[!!!code]))
}
car_list[,.N, by = speed]
所以我希望通过定义 [.dd
函数对 cars1
和 cars2
执行任意操作,这样无论我放入 ...
中的什么都可以被 [=15 执行=] 和 cars2
使用 [
data.table 语法,例如
car_list[,.N, by = speed]
应执行以下操作
cars1[,.N, by = speed]
cars2[,.N, by = speed]
我也想要
car_list[,speed*2]
要做
cars1[,speed*2]
cars2[,speed*2]
基本上,[.dd
中的...
必须接受任意代码。
不知何故我需要捕获 ...
所以我尝试做 code = rlang::enquos(...)
然后 rlang::eval_tidy(quo(cars1[!!!code]))
不起作用并给出错误
Error in [.data.table
(cars1, ~, ~.N, by = ~speed) :
argument "i" is missing, with no default
第一个基础 R 选项是 substitute(...())
,然后是 do.call
:
library(data.table)
cars1 = setDT(copy(cars))
cars2 = setDT(copy(cars))
cars2[, speed := sort(speed, decreasing = TRUE)]
car_list = list(cars1, cars2)
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
a <- substitute(...()) #this is an alist
expr <- quote(x[[i]])
expr <- c(expr, a)
res <- list()
for (i in seq_along(x)) {
res[[i]] <- do.call(data.table:::`[.data.table`, expr)
}
res
}
all.equal(
car_list[,.N, by = speed],
list(cars1[,.N, by = speed], cars2[,.N, by = speed])
)
#[1] TRUE
all.equal(
car_list[, speed*2],
list(cars1[, speed*2], cars2[, speed*2])
)
#[1] TRUE
第二个基础 R 选项是 match.call
,修改调用然后评估(您在 lm
中找到这种方法):
`[.dd` <- function(x,...) {
thecall <- match.call()
thecall[[1]] <- quote(`[`)
thecall[[2]] <- quote(x[[i]])
res <- list()
for (i in seq_along(x)) {
res[[i]] <- eval(thecall)
}
res
}
all.equal(
car_list[,.N, by = speed],
list(cars1[,.N, by = speed], cars2[,.N, by = speed])
)
#[1] TRUE
all.equal(
car_list[, speed*2],
list(cars1[, speed*2], cars2[, speed*2])
)
#[1] TRUE
如果您使用 :=
.
,我还没有测试这些方法是否会进行深拷贝
我评论中的建议不完整。
您确实可以使用 rlang
来支持整洁的评估,
但由于 data.table
本身并不直接支持它,
你最好使用表达式而不是 quosures,
并且您需要在调用 eval_tidy
:
之前构建完整的最终表达式
`[.dd` <- function(x, ...) {
code <- rlang::enexprs(...)
lapply(x, function(dt) {
ex <- rlang::expr(dt[!!!code])
rlang::eval_tidy(ex)
})
}
car_list[, .N, by = speed]
[[1]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
6: 11 2
7: 12 4
8: 13 4
9: 14 4
10: 15 3
11: 16 2
12: 17 3
13: 18 4
14: 19 3
15: 20 5
16: 22 1
17: 23 1
18: 24 4
19: 25 1
[[2]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
6: 11 2
7: 12 4
8: 13 4
9: 14 4
10: 15 3
11: 16 2
12: 17 3
13: 18 4
14: 19 3
15: 20 5
16: 22 1
17: 23 1
18: 24 4
19: 25 1
虽然不属于 rlang
类型的口头禅,但这种方法似乎工作得很好:lapply(dt_list, '[', ...)
代码对我来说更具可读性,因为它明确说明了所使用的方法。如果我看到 car_list[, .N, by = speed]
我会期待默认的 data.table
方法。
将其作为一个函数可以让您两全其美:
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
lapply(x, '[', ...)
}
car_list[, .N, speed]
car_list[, speed * 2]
car_list[, .(.N, max(dist)), speed]
car_list[, `:=` (more_speed = speed+5)]
以下是该方法的一些示例:
car_list[, .N, speed]
# lapply(car_list, '[', j = .N, by = speed)
# or
# lapply(car_list, '[', , .N, speed)
[[1]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
...
[[2]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
...
car_list[, speed * 2]
# lapply(car_list, '[', j = speed*2)
# or
# lapply(car_list, '[', , speed*2)
[[1]]
[1] 8 8 14 14 16 18 20 20 20 22 22 24 24 24 24 26 26
[18] 26 26 28 28 28 28 30 30 30 32 32 34 34 34 36 36 36
[35] 36 38 38 38 40 40 40 40 40 44 46 48 48 48 48 50
[[2]]
[1] 8 8 14 14 16 18 20 20 20 22 22 24 24 24 24 26 26
[18] 26 26 28 28 28 28 30 30 30 32 32 34 34 34 36 36 36
[35] 36 38 38 38 40 40 40 40 40 44 46 48 48 48 48 50
car_list[, .(.N, max(dist)), speed]
# lapply(car_list, '[', j = list(.N, max(dist)), by = speed)
# or
# lapply(car_list, '[', ,.(.N, max(dist)), speed)
[[1]]
speed N V2
1: 4 2 10
2: 7 2 22
3: 8 1 16
4: 9 1 10
5: 10 3 34
...
[[2]]
speed N V2
1: 4 2 10
2: 7 2 22
3: 8 1 16
4: 9 1 10
5: 10 3 34
...
这适用于 :=
运算符:
car_list[, `:=` (more_speed = speed+5)]
# or
# lapply(car_list, '[', , `:=` (more_speed = speed+5))
car_list
[[1]]
speed dist more_speed
1: 4 2 9
2: 4 10 9
3: 7 4 12
4: 7 22 12
5: 8 16 13
...
[[2]]
speed dist more_speed
1: 4 2 9
2: 4 10 9
3: 7 4 12
4: 7 22 12
5: 8 16 13
假设我有以下内容
library(data.table)
cars1 = setDT(copy(cars))
cars2 = setDT(copy(cars))
car_list = list(cars1, cars2)
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
code = rlang::enquos(...)
cars1 = x[[1]]
rlang::eval_tidy(quo(cars1[!!!code]))
}
car_list[,.N, by = speed]
所以我希望通过定义 [.dd
函数对 cars1
和 cars2
执行任意操作,这样无论我放入 ...
中的什么都可以被 [=15 执行=] 和 cars2
使用 [
data.table 语法,例如
car_list[,.N, by = speed]
应执行以下操作
cars1[,.N, by = speed]
cars2[,.N, by = speed]
我也想要
car_list[,speed*2]
要做
cars1[,speed*2]
cars2[,speed*2]
基本上,[.dd
中的...
必须接受任意代码。
不知何故我需要捕获 ...
所以我尝试做 code = rlang::enquos(...)
然后 rlang::eval_tidy(quo(cars1[!!!code]))
不起作用并给出错误
Error in
[.data.table
(cars1, ~, ~.N, by = ~speed) : argument "i" is missing, with no default
第一个基础 R 选项是 substitute(...())
,然后是 do.call
:
library(data.table)
cars1 = setDT(copy(cars))
cars2 = setDT(copy(cars))
cars2[, speed := sort(speed, decreasing = TRUE)]
car_list = list(cars1, cars2)
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
a <- substitute(...()) #this is an alist
expr <- quote(x[[i]])
expr <- c(expr, a)
res <- list()
for (i in seq_along(x)) {
res[[i]] <- do.call(data.table:::`[.data.table`, expr)
}
res
}
all.equal(
car_list[,.N, by = speed],
list(cars1[,.N, by = speed], cars2[,.N, by = speed])
)
#[1] TRUE
all.equal(
car_list[, speed*2],
list(cars1[, speed*2], cars2[, speed*2])
)
#[1] TRUE
第二个基础 R 选项是 match.call
,修改调用然后评估(您在 lm
中找到这种方法):
`[.dd` <- function(x,...) {
thecall <- match.call()
thecall[[1]] <- quote(`[`)
thecall[[2]] <- quote(x[[i]])
res <- list()
for (i in seq_along(x)) {
res[[i]] <- eval(thecall)
}
res
}
all.equal(
car_list[,.N, by = speed],
list(cars1[,.N, by = speed], cars2[,.N, by = speed])
)
#[1] TRUE
all.equal(
car_list[, speed*2],
list(cars1[, speed*2], cars2[, speed*2])
)
#[1] TRUE
如果您使用 :=
.
我评论中的建议不完整。
您确实可以使用 rlang
来支持整洁的评估,
但由于 data.table
本身并不直接支持它,
你最好使用表达式而不是 quosures,
并且您需要在调用 eval_tidy
:
`[.dd` <- function(x, ...) {
code <- rlang::enexprs(...)
lapply(x, function(dt) {
ex <- rlang::expr(dt[!!!code])
rlang::eval_tidy(ex)
})
}
car_list[, .N, by = speed]
[[1]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
6: 11 2
7: 12 4
8: 13 4
9: 14 4
10: 15 3
11: 16 2
12: 17 3
13: 18 4
14: 19 3
15: 20 5
16: 22 1
17: 23 1
18: 24 4
19: 25 1
[[2]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
6: 11 2
7: 12 4
8: 13 4
9: 14 4
10: 15 3
11: 16 2
12: 17 3
13: 18 4
14: 19 3
15: 20 5
16: 22 1
17: 23 1
18: 24 4
19: 25 1
虽然不属于 rlang
类型的口头禅,但这种方法似乎工作得很好:lapply(dt_list, '[', ...)
代码对我来说更具可读性,因为它明确说明了所使用的方法。如果我看到 car_list[, .N, by = speed]
我会期待默认的 data.table
方法。
将其作为一个函数可以让您两全其美:
class(car_list) <- "dd"
`[.dd` <- function(x,...) {
lapply(x, '[', ...)
}
car_list[, .N, speed]
car_list[, speed * 2]
car_list[, .(.N, max(dist)), speed]
car_list[, `:=` (more_speed = speed+5)]
以下是该方法的一些示例:
car_list[, .N, speed]
# lapply(car_list, '[', j = .N, by = speed)
# or
# lapply(car_list, '[', , .N, speed)
[[1]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
...
[[2]]
speed N
1: 4 2
2: 7 2
3: 8 1
4: 9 1
5: 10 3
...
car_list[, speed * 2]
# lapply(car_list, '[', j = speed*2)
# or
# lapply(car_list, '[', , speed*2)
[[1]]
[1] 8 8 14 14 16 18 20 20 20 22 22 24 24 24 24 26 26
[18] 26 26 28 28 28 28 30 30 30 32 32 34 34 34 36 36 36
[35] 36 38 38 38 40 40 40 40 40 44 46 48 48 48 48 50
[[2]]
[1] 8 8 14 14 16 18 20 20 20 22 22 24 24 24 24 26 26
[18] 26 26 28 28 28 28 30 30 30 32 32 34 34 34 36 36 36
[35] 36 38 38 38 40 40 40 40 40 44 46 48 48 48 48 50
car_list[, .(.N, max(dist)), speed]
# lapply(car_list, '[', j = list(.N, max(dist)), by = speed)
# or
# lapply(car_list, '[', ,.(.N, max(dist)), speed)
[[1]]
speed N V2
1: 4 2 10
2: 7 2 22
3: 8 1 16
4: 9 1 10
5: 10 3 34
...
[[2]]
speed N V2
1: 4 2 10
2: 7 2 22
3: 8 1 16
4: 9 1 10
5: 10 3 34
...
这适用于 :=
运算符:
car_list[, `:=` (more_speed = speed+5)]
# or
# lapply(car_list, '[', , `:=` (more_speed = speed+5))
car_list
[[1]]
speed dist more_speed
1: 4 2 9
2: 4 10 9
3: 7 4 12
4: 7 22 12
5: 8 16 13
...
[[2]]
speed dist more_speed
1: 4 2 9
2: 4 10 9
3: 7 4 12
4: 7 22 12
5: 8 16 13