使用 NSE 构造公式
Use NSE to construct a formula
我正在尝试使用 NSE 构建一个公式,以便我可以轻松地在列中进行管道传输。以下是我想要的用例:
df %>% make_formula(col1, col2, col3)
[1] "col1 ~ col2 + col3"
我先做了这个功能:
varstring <- function(...) {
as.character(match.call()[-1])
}
这适用于单个对象或多个对象:
varstring(col)
[1] "col"
varstring(col1, col2, col3)
[1] "col1" "col2" "col3"
接下来我创建函数来创建公式:
formula <- function(df, col, ...) {
group <- varstring(col)
vars <- varstring(...)
paste(group,"~", paste(vars, collapse = " + "), sep = " ")
}
然而,函数调用 formula(df, col, col1, col2, col3)
产生 [1] "group ~ ..1 + ..2 + ..3"
。
我知道该公式实际上是在评估 varstring(group)
和 varstring(...)
,而不是像我也希望的那样实际替换用户提供的对象进行评估。但我不知道如何按预期进行这项工作。
我建议使用 rlang::enquo
(s) 和 rlang::as_name
来实现:
library(rlang)
formula <- function(df, col, ...) {
group <- enquo(col)
vars <- enquos(...)
group_str <- rlang::as_name(group)
vars_str <- lapply(vars, rlang::as_name)
paste(group_str,"~", paste(vars_str, collapse = " + "), sep = " ")
}
formula(mtcars, col, col1, col2, col3)
#> [1] "col ~ col1 + col2 + col3"
我们可以使用 reformulate
formula_fn <- function(dat, col, ...) {
deparse(reformulate(purrr::map_chr(ensyms(...), rlang::as_string),
response = rlang::as_string(ensym(col) )))
}
formula_fn(mtcars, col, col1, col2, col3)
#[1] "col ~ col1 + col2 + col3"
您可以使用 reduce()
将任意数量的参数与二元函数连接起来
make_formula <- function(lhs, ..., op = "+") {
lhs <- ensym(lhs)
args <- ensyms(...)
n <- length(args)
if (n == 0) {
rhs <- 1
} else if (n == 1) {
rhs <- args[[1]]
} else {
rhs <- purrr::reduce(args, function(out, new) call(op, out, new))
}
# Don't forget to forward the caller environment
new_formula(lhs, rhs, env = caller_env())
}
make_formula(disp)
#> disp ~ 1
make_formula(disp, cyl)
#> disp ~ cyl
make_formula(disp, cyl, am, drat)
#> disp ~ cyl + am + drat
make_formula(disp, cyl, am, drat, op = "*")
#> disp ~ cyl * am * drat
使用表达式的一大优势是它对小的 bobby 表很稳健 (https://xkcd.com/327/):
# User inputs are always interpreted as symbols (variable name)
make_formula(disp, `I(file.remove('~'))`)
#> disp ~ `I(file.remove('~'))`
# With `paste()` + `parse()` user inputs are interpreted as arbitrary code
reformulate(c("foo", "I(file.remove('~'))"))
#> ~foo + I(file.remove("~"))
我采纳了上面@LionelHenry 的建议并创建了以下函数,其中包含一些我最初的问题中没有要求的附加功能。
#' Create a formula
#'
#' Creates a new formula object to be used anywhere formulas are used (i.e, `glm`).
#'
#' @param ... any number of arguments to compose the formula
#' @param lhs a boolean indicating if the formula has a left hand side of the argument
#' @param op the operand acting upon the arguments of the right side of the formula.
#' @param group an argument to use as a grouping variable to facet by
#'
#' @return a formula
#'
#' @details If `lhs` is `TRUE`, the first argument provided is used as the left hand side of the formula.
#' The `group` paramenter will add `| group` to the end of the formula. This is useful for packages that support faceting by grouping variables for the purposes of tables or graphs.
#'
#' @export
#'
#' @examples
#' make_formula(var1, var2, var3)
#' make_formula(var1, var2, var3, lhs = FALSE)
#' make_formula(var1, var2, var3, lhs = FALSE, group = var4)
#'
make_formula <- function(..., lhs = TRUE, op = "+", group = NULL) {
args <- rlang::ensyms(...)
n <- length(args)
group <- rlang::enexpr(group)
if(lhs) {
left <- args[[1]]
if (n == 1) {
right <- 1
} else if (n == 2) {
right <- args[[2]]
} else {
right <- purrr::reduce(args[-1], function(out, new) call(op, out, new))
}
} else {
left <- NULL
if (n == 1) {
right <- args[[1]]
} else {
right <- purrr::reduce(args, function(out, new) call(op, out, new))
}
}
if(!is.null(group)) {
group <- rlang::ensym(group)
right <- purrr::reduce(c(right, group), function(out, new) call("|", out, new))
}
rlang::new_formula(left, right, env = rlang::caller_env()) # Forward to the caller environment
}
我正在尝试使用 NSE 构建一个公式,以便我可以轻松地在列中进行管道传输。以下是我想要的用例:
df %>% make_formula(col1, col2, col3)
[1] "col1 ~ col2 + col3"
我先做了这个功能:
varstring <- function(...) {
as.character(match.call()[-1])
}
这适用于单个对象或多个对象:
varstring(col)
[1] "col"
varstring(col1, col2, col3)
[1] "col1" "col2" "col3"
接下来我创建函数来创建公式:
formula <- function(df, col, ...) {
group <- varstring(col)
vars <- varstring(...)
paste(group,"~", paste(vars, collapse = " + "), sep = " ")
}
然而,函数调用 formula(df, col, col1, col2, col3)
产生 [1] "group ~ ..1 + ..2 + ..3"
。
我知道该公式实际上是在评估 varstring(group)
和 varstring(...)
,而不是像我也希望的那样实际替换用户提供的对象进行评估。但我不知道如何按预期进行这项工作。
我建议使用 rlang::enquo
(s) 和 rlang::as_name
来实现:
library(rlang)
formula <- function(df, col, ...) {
group <- enquo(col)
vars <- enquos(...)
group_str <- rlang::as_name(group)
vars_str <- lapply(vars, rlang::as_name)
paste(group_str,"~", paste(vars_str, collapse = " + "), sep = " ")
}
formula(mtcars, col, col1, col2, col3)
#> [1] "col ~ col1 + col2 + col3"
我们可以使用 reformulate
formula_fn <- function(dat, col, ...) {
deparse(reformulate(purrr::map_chr(ensyms(...), rlang::as_string),
response = rlang::as_string(ensym(col) )))
}
formula_fn(mtcars, col, col1, col2, col3)
#[1] "col ~ col1 + col2 + col3"
您可以使用 reduce()
make_formula <- function(lhs, ..., op = "+") {
lhs <- ensym(lhs)
args <- ensyms(...)
n <- length(args)
if (n == 0) {
rhs <- 1
} else if (n == 1) {
rhs <- args[[1]]
} else {
rhs <- purrr::reduce(args, function(out, new) call(op, out, new))
}
# Don't forget to forward the caller environment
new_formula(lhs, rhs, env = caller_env())
}
make_formula(disp)
#> disp ~ 1
make_formula(disp, cyl)
#> disp ~ cyl
make_formula(disp, cyl, am, drat)
#> disp ~ cyl + am + drat
make_formula(disp, cyl, am, drat, op = "*")
#> disp ~ cyl * am * drat
使用表达式的一大优势是它对小的 bobby 表很稳健 (https://xkcd.com/327/):
# User inputs are always interpreted as symbols (variable name)
make_formula(disp, `I(file.remove('~'))`)
#> disp ~ `I(file.remove('~'))`
# With `paste()` + `parse()` user inputs are interpreted as arbitrary code
reformulate(c("foo", "I(file.remove('~'))"))
#> ~foo + I(file.remove("~"))
我采纳了上面@LionelHenry 的建议并创建了以下函数,其中包含一些我最初的问题中没有要求的附加功能。
#' Create a formula
#'
#' Creates a new formula object to be used anywhere formulas are used (i.e, `glm`).
#'
#' @param ... any number of arguments to compose the formula
#' @param lhs a boolean indicating if the formula has a left hand side of the argument
#' @param op the operand acting upon the arguments of the right side of the formula.
#' @param group an argument to use as a grouping variable to facet by
#'
#' @return a formula
#'
#' @details If `lhs` is `TRUE`, the first argument provided is used as the left hand side of the formula.
#' The `group` paramenter will add `| group` to the end of the formula. This is useful for packages that support faceting by grouping variables for the purposes of tables or graphs.
#'
#' @export
#'
#' @examples
#' make_formula(var1, var2, var3)
#' make_formula(var1, var2, var3, lhs = FALSE)
#' make_formula(var1, var2, var3, lhs = FALSE, group = var4)
#'
make_formula <- function(..., lhs = TRUE, op = "+", group = NULL) {
args <- rlang::ensyms(...)
n <- length(args)
group <- rlang::enexpr(group)
if(lhs) {
left <- args[[1]]
if (n == 1) {
right <- 1
} else if (n == 2) {
right <- args[[2]]
} else {
right <- purrr::reduce(args[-1], function(out, new) call(op, out, new))
}
} else {
left <- NULL
if (n == 1) {
right <- args[[1]]
} else {
right <- purrr::reduce(args, function(out, new) call(op, out, new))
}
}
if(!is.null(group)) {
group <- rlang::ensym(group)
right <- purrr::reduce(c(right, group), function(out, new) call("|", out, new))
}
rlang::new_formula(left, right, env = rlang::caller_env()) # Forward to the caller environment
}