在 R 中的自定义 DSL 中重新定义组
Redefine groups in a custom DSL in R
我有一个小型 DSL,允许按名称对变量进行分组:
group <- function(.data, ...) {
dots <- quos(...)
for (i in 1:length(dots)) {
in_group <- as.character(dots[[i]])[2]
vec <- trimws(unlist(strsplit(in_group, "[+]")))
.data <- cbind(.data, TRUE)
names(.data) <- c(names(.data)[-length(names(.data))], paste0("group_", names(dots[i])))
.data[, ncol(.data)] <- .data$vars %in% vec
}
return(.data)
}
library(magrittr)
# Some data
df <- data.frame(
vars = c("one", "two", "three", "four"), stringsAsFactors = FALSE
)
# Define a group called abc containing elements two, three and four:
df %>% group(abc = two + three + four)
vars group_abc
1 one FALSE
2 two TRUE
3 three TRUE
4 four TRUE
# Define multiple groups
df %>% group(odd = one + three, even = two + four, prime = one + two + three)
vars group_odd group_even group_prime
1 one TRUE FALSE TRUE
2 two FALSE TRUE TRUE
3 three TRUE FALSE TRUE
4 four FALSE TRUE FALSE
然而,这不允许重新定义组:
df %>% group(abc = two + three + four) %>% group(abc = two)
vars group_abc group_abc
1 one FALSE FALSE
2 two TRUE TRUE
3 three TRUE FALSE
4 four TRUE FALSE
组abc被定义了两次而不是被覆盖。
我试过了:
group2 <- function(.data, ...) {
dots <- quos(...)
for (i in 1:length(dots)) {
in_group <- as.character(dots[[i]])[2]
vec <- trimws(unlist(strsplit(in_group, "[+]")))
if (any(grepl(names(dots[i]), names(.data)))) {
.data[, grepl(names(dots[i]), names(.data))] <- .data$vars %in% vec
} else {
.data <- cbind(.data, TRUE)
names(.data) <- c(names(.data)[-length(names(.data))], paste0("group_", names(dots[i])))
.data[, ncol(.data)] <- .data$vars %in% vec
}
}
return(.data)
}
df %>% group2(abc = two + three + four) %>% group2(abc = two)
vars group_abc
1 one FALSE
2 two TRUE
3 three FALSE
4 four FALSE
这种作品,但看起来非常丑陋..
所以我的问题是:在我的 group
DSL 中重新定义组的好方法是什么?
感谢任何提示。
更多上下文:
我的另一个关于我的 DSL 的一般主题的问题
这是一个非常有趣的问题。您可以使用 dplyr::mutate
到 "overwrite" 现有变量。我们还可以通过使用 purrr::map
来简化您的循环。主要思想是将提供的表达式标记化并构建看起来像 vars %in% c( "token1", "token2", etc. )
的新表达式。然后将生成的表达式传递给 mutate
:
library( tidyverse )
group <- function(.data, ...) {
dots <- enexprs(...) %>% map(rlang::expr_text)
nms <- str_c( "group_", names(dots) )
elems <- dots %>% str_split("[+]") %>% map(str_trim) %>%
map( ~expr(vars %in% !!.x) ) %>% set_names(nms)
.data %>% mutate( !!!elems )
}
df %>% group(odd = one + three, even = two + four, prime = one + two + three)
# vars group_odd group_even group_prime
# 1 one TRUE FALSE TRUE
# 2 two FALSE TRUE TRUE
# 3 three TRUE FALSE TRUE
# 4 four FALSE TRUE FALSE
df %>% group( abc = two + three + four ) %>% group( abc = two )
# vars group_abc
# 1 one FALSE
# 2 two TRUE
# 3 three FALSE
# 4 four FALSE
我有一个小型 DSL,允许按名称对变量进行分组:
group <- function(.data, ...) {
dots <- quos(...)
for (i in 1:length(dots)) {
in_group <- as.character(dots[[i]])[2]
vec <- trimws(unlist(strsplit(in_group, "[+]")))
.data <- cbind(.data, TRUE)
names(.data) <- c(names(.data)[-length(names(.data))], paste0("group_", names(dots[i])))
.data[, ncol(.data)] <- .data$vars %in% vec
}
return(.data)
}
library(magrittr)
# Some data
df <- data.frame(
vars = c("one", "two", "three", "four"), stringsAsFactors = FALSE
)
# Define a group called abc containing elements two, three and four:
df %>% group(abc = two + three + four)
vars group_abc
1 one FALSE
2 two TRUE
3 three TRUE
4 four TRUE
# Define multiple groups
df %>% group(odd = one + three, even = two + four, prime = one + two + three)
vars group_odd group_even group_prime
1 one TRUE FALSE TRUE
2 two FALSE TRUE TRUE
3 three TRUE FALSE TRUE
4 four FALSE TRUE FALSE
然而,这不允许重新定义组:
df %>% group(abc = two + three + four) %>% group(abc = two)
vars group_abc group_abc
1 one FALSE FALSE
2 two TRUE TRUE
3 three TRUE FALSE
4 four TRUE FALSE
组abc被定义了两次而不是被覆盖。
我试过了:
group2 <- function(.data, ...) {
dots <- quos(...)
for (i in 1:length(dots)) {
in_group <- as.character(dots[[i]])[2]
vec <- trimws(unlist(strsplit(in_group, "[+]")))
if (any(grepl(names(dots[i]), names(.data)))) {
.data[, grepl(names(dots[i]), names(.data))] <- .data$vars %in% vec
} else {
.data <- cbind(.data, TRUE)
names(.data) <- c(names(.data)[-length(names(.data))], paste0("group_", names(dots[i])))
.data[, ncol(.data)] <- .data$vars %in% vec
}
}
return(.data)
}
df %>% group2(abc = two + three + four) %>% group2(abc = two)
vars group_abc
1 one FALSE
2 two TRUE
3 three FALSE
4 four FALSE
这种作品,但看起来非常丑陋..
所以我的问题是:在我的 group
DSL 中重新定义组的好方法是什么?
感谢任何提示。
更多上下文:
这是一个非常有趣的问题。您可以使用 dplyr::mutate
到 "overwrite" 现有变量。我们还可以通过使用 purrr::map
来简化您的循环。主要思想是将提供的表达式标记化并构建看起来像 vars %in% c( "token1", "token2", etc. )
的新表达式。然后将生成的表达式传递给 mutate
:
library( tidyverse )
group <- function(.data, ...) {
dots <- enexprs(...) %>% map(rlang::expr_text)
nms <- str_c( "group_", names(dots) )
elems <- dots %>% str_split("[+]") %>% map(str_trim) %>%
map( ~expr(vars %in% !!.x) ) %>% set_names(nms)
.data %>% mutate( !!!elems )
}
df %>% group(odd = one + three, even = two + four, prime = one + two + three)
# vars group_odd group_even group_prime
# 1 one TRUE FALSE TRUE
# 2 two FALSE TRUE TRUE
# 3 three TRUE FALSE TRUE
# 4 four FALSE TRUE FALSE
df %>% group( abc = two + three + four ) %>% group( abc = two )
# vars group_abc
# 1 one FALSE
# 2 two TRUE
# 3 three FALSE
# 4 four FALSE