使用 tidyeval 进行程序化回归建模
Programmatic regression modelling with tidyeval
我正在尝试使用 tidyeval 进行编程。
我想为所选结果变量的 运行 逻辑回归模型编写一个函数:
library(tidyverse)
set.seed(1234)
df <- tibble(id = 1:1000,
group = sample(c("Group 1", "Group 2", "Group 3"), 1000, replace = TRUE),
died = sample(c(0,1), 1000, replace = TRUE))
myfunc <- function(data, outcome){
enquo_var <- enquo(outcome)
fit <- tidy(glm(!!enquo_var ~ group, data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
但是得到:
Error in !enquo_outcome : invalid argument type
(注意真实场景涉及更复杂的功能)。
这可能吗?
我们需要为 glm
创建一个公式来提取它。一种选择是 paste
myfunc <- function(data, outcome){
enquo_var <- enquo(outcome)
fit <- tidy(glm(paste(quo_name(enquo_var), "group", sep="~"), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
如果我们还需要使用tidyverse函数
myfunc <- function(data, outcome){
quo_var <- quo_name(enquo(outcome))
fit <- tidy(glm(rlang::expr(!! rlang::sym(quo_var) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
或者@lionel在评论中提到的get_expr
可以使用
myfunc <- function(data, outcome){
quo_var <- enquo(outcome)
fit <- tidy(glm(rlang::expr(!! rlang::get_expr(quo_var) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
或者@lionel 建议的一种更紧凑的方法,它避免了 enquo/quo_name/sym
转换,而是直接采用 enexpr
中的参数
myfunc <- function(data, outcome){
fit <- tidy(glm(rlang::expr(!! rlang::enexpr(outcome) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
Base NSE 似乎也有效:
library(broom)
myfunc <- function(data, outcome){
outcome_subst <- substitute(outcome)
fit <- tidy(glm(paste(as.name(outcome_subst), "group", sep="~"), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
term estimate std.error statistic p.value conf.low conf.high
1 (Intercept) 0.8238636 0.1121528 -1.727556 0.08406792 0.6606245 1.025838
2 groupGroup 2 1.2587484 0.1571734 1.464102 0.14316606 0.9253116 1.713937
3 groupGroup 3 1.2490778 0.1550546 1.434369 0.15146698 0.9220209 1.693699
我正在尝试使用 tidyeval 进行编程。
我想为所选结果变量的 运行 逻辑回归模型编写一个函数:
library(tidyverse)
set.seed(1234)
df <- tibble(id = 1:1000,
group = sample(c("Group 1", "Group 2", "Group 3"), 1000, replace = TRUE),
died = sample(c(0,1), 1000, replace = TRUE))
myfunc <- function(data, outcome){
enquo_var <- enquo(outcome)
fit <- tidy(glm(!!enquo_var ~ group, data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
但是得到:
Error in !enquo_outcome : invalid argument type
(注意真实场景涉及更复杂的功能)。
这可能吗?
我们需要为 glm
创建一个公式来提取它。一种选择是 paste
myfunc <- function(data, outcome){
enquo_var <- enquo(outcome)
fit <- tidy(glm(paste(quo_name(enquo_var), "group", sep="~"), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
如果我们还需要使用tidyverse函数
myfunc <- function(data, outcome){
quo_var <- quo_name(enquo(outcome))
fit <- tidy(glm(rlang::expr(!! rlang::sym(quo_var) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
或者@lionel在评论中提到的get_expr
可以使用
myfunc <- function(data, outcome){
quo_var <- enquo(outcome)
fit <- tidy(glm(rlang::expr(!! rlang::get_expr(quo_var) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
或者@lionel 建议的一种更紧凑的方法,它避免了 enquo/quo_name/sym
转换,而是直接采用 enexpr
myfunc <- function(data, outcome){
fit <- tidy(glm(rlang::expr(!! rlang::enexpr(outcome) ~ group), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
# term estimate std.error statistic p.value conf.low conf.high
#1 (Intercept) 0.8715084 0.1095300 -1.2556359 0.20924801 0.7026185 1.079852
#2 groupGroup 2 0.9253515 0.1550473 -0.5003736 0.61681204 0.6826512 1.253959
#3 groupGroup 3 1.3692735 0.1557241 2.0181864 0.04357185 1.0095739 1.859403
Base NSE 似乎也有效:
library(broom)
myfunc <- function(data, outcome){
outcome_subst <- substitute(outcome)
fit <- tidy(glm(paste(as.name(outcome_subst), "group", sep="~"), data=data,
family = binomial(link = "logit")),
exponentiate = TRUE, conf.int=TRUE)
fit
}
myfunc(df, died)
term estimate std.error statistic p.value conf.low conf.high
1 (Intercept) 0.8238636 0.1121528 -1.727556 0.08406792 0.6606245 1.025838
2 groupGroup 2 1.2587484 0.1571734 1.464102 0.14316606 0.9253116 1.713937
3 groupGroup 3 1.2490778 0.1550546 1.434369 0.15146698 0.9220209 1.693699