如何在嵌套函数中将所有可能的东西传递给 i、j 和 by?
How to pass everything possible to i, j and by in nested functions?
我正在开发一个内部使用 data.table
的包。在这个包中,我有一个函数 count_by
,它按组计算 data.table
中特定变量的不同 ID 的数量。在一些帮助 () 的帮助下,我让它按预期工作:
library(data.table)
#> Warning: package 'data.table' was built under R version 3.6.2
# create example data
sample_dt <- data.table(
id = sort(rep(LETTERS[1:3], 3)),
year = rep(2018L:2020L),
x = runif(9)
)
sample_dt[id == "B" & year < 2020, x := NA_real_]
# define inner function
count_by <- function(DT, id_var, val_var, by = NULL) {
id_var <- as.character(substitute(id_var))
val_var <- as.character(substitute(val_var))
eval(substitute(
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
))
}
# test inner function -> works as expected
(reference <- count_by(sample_dt, id_var = id, val_var = x, by = year))
#> year distinct_ids
#> 1: 2018 2
#> 2: 2019 2
#> 3: 2020 3
identical(count_by(sample_dt, "id", x, year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", x, "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", x, c("year")) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", c("year")), reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", c("year")) , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, x, "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, x, c("year")) , reference)
#> [1] TRUE
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建
现在我想在另一个函数中使用函数 count_by()
(下面的最小示例):
# define wrapper function
wrapper <- function(data, id_var, val_var, by = NULL) {
data <- as.data.table(data)
count_by(data, id_var, val_var, by)
}
# test wrapper function
wrapper(sample_dt, id_var = id, val_var = x, by = year)
#> Error in .(distinct_ids = uniqueN(get("id_var"))): could not find function "."
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建
调试 count_by()
导致观察到如果从 wrapper()
调用 count_by()
,substitute(DT[...])
也会将 DT
替换为 data
:
Browse[2]> substitute(
+ DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
+ )
data[!is.na(get("val_var")), .(distinct_ids = uniqueN(get("id_var"))),
by = by]
由于 data
在 count_by()
的函数环境中不可用,因此它被计算为 utils::data
,然后导致错误。这使问题很清楚,但我想不出解决办法。
我需要替换整个 DT[...]
表达式以使 by
正常工作(参见 或 将变量和名称传递给 data.table 函数)。但是我不能为了 DT
不被替换而替换整个表达式。
解决这个难题的方法是什么?
将 NSE 排除在外对于这个特定示例有效,并且大大简化了事情。但是你应该将参数作为字符串传递:
count_by <- function(DT, id_var, val_var, by = NULL) {
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
}
wrapper <- function(data, id_var, val_var, by = NULL) {
count_by(data, id_var, val_var, by)
}
wrapper(sample_dt, id_var = "id", val_var = "x", by = "year")
# year distinct_ids
# 1: 2018 2
# 2: 2019 2
# 3: 2020 3
@chinsoon12,非常感谢!你的回答几乎做到了!我仍然需要将 id_var
和 val_var
转换为字符,然后在对 data.table
的调用中将 get()
这些转换为字符 - 否则,将字符串传递给 id_var
和 val_var
不起作用。
但是在更高层次上进行评估是必要的关键思想。以下是供日后参考的完整答案:
# define inner function
count_by <- function(DT, id_var, val_var, by = NULL) {
id_var <- as.character(substitute(id_var))
val_var <- as.character(substitute(val_var))
substitute(
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
)
}
# define wrapper function
wrapper <- function(data, id_var, val_var, by = NULL) {
data <- as.data.table(data)
expr <- eval(substitute(count_by(data, id_var, val_var, by)))
eval(expr)
}
# test wrapper function
(reference <- (wrapper(sample_dt, id_var = id, val_var = x, by = year)))
#> year distinct_ids
#> 1: 2018 2
#> 2: 2019 2
#> 3: 2020 3
identical(wrapper(sample_dt, "id", x, year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", x, "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", x, c("year")) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", c("year")), reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", c("year")) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, x, "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, x, c("year")) , reference)
#> [1] TRUE
# test expression in by
wrapper(sample_dt, "id", x, by = .(year_2019 = year > 2019L))
#> year_2019 distinct_ids
#> 1: FALSE 2
#> 2: TRUE 3
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建
我正在开发一个内部使用 data.table
的包。在这个包中,我有一个函数 count_by
,它按组计算 data.table
中特定变量的不同 ID 的数量。在一些帮助 (
library(data.table)
#> Warning: package 'data.table' was built under R version 3.6.2
# create example data
sample_dt <- data.table(
id = sort(rep(LETTERS[1:3], 3)),
year = rep(2018L:2020L),
x = runif(9)
)
sample_dt[id == "B" & year < 2020, x := NA_real_]
# define inner function
count_by <- function(DT, id_var, val_var, by = NULL) {
id_var <- as.character(substitute(id_var))
val_var <- as.character(substitute(val_var))
eval(substitute(
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
))
}
# test inner function -> works as expected
(reference <- count_by(sample_dt, id_var = id, val_var = x, by = year))
#> year distinct_ids
#> 1: 2018 2
#> 2: 2019 2
#> 3: 2020 3
identical(count_by(sample_dt, "id", x, year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", x, "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", x, c("year")) , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, "id", "x", c("year")), reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", year) , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, "x", c("year")) , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, x, "year") , reference)
#> [1] TRUE
identical(count_by(sample_dt, id, x, c("year")) , reference)
#> [1] TRUE
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建现在我想在另一个函数中使用函数 count_by()
(下面的最小示例):
# define wrapper function
wrapper <- function(data, id_var, val_var, by = NULL) {
data <- as.data.table(data)
count_by(data, id_var, val_var, by)
}
# test wrapper function
wrapper(sample_dt, id_var = id, val_var = x, by = year)
#> Error in .(distinct_ids = uniqueN(get("id_var"))): could not find function "."
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建 调试 count_by()
导致观察到如果从 wrapper()
调用 count_by()
,substitute(DT[...])
也会将 DT
替换为 data
:
Browse[2]> substitute(
+ DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
+ )
data[!is.na(get("val_var")), .(distinct_ids = uniqueN(get("id_var"))),
by = by]
由于 data
在 count_by()
的函数环境中不可用,因此它被计算为 utils::data
,然后导致错误。这使问题很清楚,但我想不出解决办法。
我需要替换整个 DT[...]
表达式以使 by
正常工作(参见 DT
不被替换而替换整个表达式。
解决这个难题的方法是什么?
将 NSE 排除在外对于这个特定示例有效,并且大大简化了事情。但是你应该将参数作为字符串传递:
count_by <- function(DT, id_var, val_var, by = NULL) {
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
}
wrapper <- function(data, id_var, val_var, by = NULL) {
count_by(data, id_var, val_var, by)
}
wrapper(sample_dt, id_var = "id", val_var = "x", by = "year")
# year distinct_ids
# 1: 2018 2
# 2: 2019 2
# 3: 2020 3
@chinsoon12,非常感谢!你的回答几乎做到了!我仍然需要将 id_var
和 val_var
转换为字符,然后在对 data.table
的调用中将 get()
这些转换为字符 - 否则,将字符串传递给 id_var
和 val_var
不起作用。
但是在更高层次上进行评估是必要的关键思想。以下是供日后参考的完整答案:
# define inner function
count_by <- function(DT, id_var, val_var, by = NULL) {
id_var <- as.character(substitute(id_var))
val_var <- as.character(substitute(val_var))
substitute(
DT[!is.na(get(val_var)), .(distinct_ids = uniqueN(get(id_var))), by = by]
)
}
# define wrapper function
wrapper <- function(data, id_var, val_var, by = NULL) {
data <- as.data.table(data)
expr <- eval(substitute(count_by(data, id_var, val_var, by)))
eval(expr)
}
# test wrapper function
(reference <- (wrapper(sample_dt, id_var = id, val_var = x, by = year)))
#> year distinct_ids
#> 1: 2018 2
#> 2: 2019 2
#> 3: 2020 3
identical(wrapper(sample_dt, "id", x, year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", x, "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", x, c("year")) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, "id", "x", c("year")), reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", year) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, "x", c("year")) , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, x, "year") , reference)
#> [1] TRUE
identical(wrapper(sample_dt, id, x, c("year")) , reference)
#> [1] TRUE
# test expression in by
wrapper(sample_dt, "id", x, by = .(year_2019 = year > 2019L))
#> year_2019 distinct_ids
#> 1: FALSE 2
#> 2: TRUE 3
由 reprex package (v0.3.0)
于 2020 年 2 月 20 日创建