如何省略 do.call 产生的巨大代码?
How to omit huge code produces by do.call?
我想构建一个函数 additive_glm
允许用户在需要时为 glm 函数指定附加参数。
让我们考虑数据:
set.seed(42)
bin_var <- sample(0:1, 125, T)
indep_1 <- rnorm(125)
indep_2 <- rexp(125)
df <- data.frame("Norm" = indep_1, "Exp" = indep_2)
我的函数additive_glm
:
additive_glm <- function(y, x, glm_args = NULL){
do.call("glm", c(list(
formula = y ~ ., data = base::quote(as.data.frame(x)),
family = binomial(link = 'logit')
), glm_args))
}
但现在如果我想 运行 我的函数:
additive(bin_var, df)
我得到:
Call: glm(formula = y ~ ., family = structure(list(family = "binomial",
link = "logit", linkfun = function (mu)
.Call(C_logit_link, mu), linkinv = function (eta)
.Call(C_logit_linkinv, eta), variance = function (mu)
mu * (1 - mu), dev.resids = function (y, mu, wt)
.Call(C_binomial_dev_resids, y, mu, wt), aic = function (y,
n, mu, wt, dev)
{
m <- if (any(n > 1))
n
else wt
-2 * sum(ifelse(m > 0, (wt/m), 0) * dbinom(round(m *
y), round(m), mu, log = TRUE))
}, mu.eta = function (eta)
.Call(C_logit_mu_eta, eta), initialize = expression({
if (NCOL(y) == 1) {
if (is.factor(y))
y <- y != levels(y)[1L]
n <- rep.int(1, nobs)
y[weights == 0] <- 0
if (any(y < 0 | y > 1))
stop("y values must be 0 <= y <= 1")
mustart <- (weights * y + 0.5)/(weights + 1)
m <- weights * y
if (any(abs(m - round(m)) > 0.001))
warning("non-integer #successes in a binomial glm!")
}
else if (NCOL(y) == 2) {
if (any(abs(y - round(y)) > 0.001))
warning("non-integer counts in a binomial glm!")
n <- y[, 1] + y[, 2]
y <- ifelse(n == 0, 0, y[, 1]/n)
weights <- weights * n
mustart <- (n * y + 0.5)/(n + 1)
}
else stop("for the 'binomial' family, y must be a vector of 0 and 1's\nor a 2 column matrix where col 1 is no. successes and col 2 is no. failures")
}), validmu = function (mu)
all(is.finite(mu)) && all(mu > 0 & mu < 1), valideta = function (eta)
TRUE, simulate = function (object, nsim)
{
ftd <- fitted(object)
n <- length(ftd)
ntot <- n * nsim
wts <- object$prior.weights
if (any(wts%%1 != 0))
stop("cannot simulate from non-integer prior.weights")
if (!is.null(m <- object$model)) {
y <- model.response(m)
if (is.factor(y)) {
yy <- factor(1 + rbinom(ntot, size = 1, prob = ftd),
labels = levels(y))
split(yy, rep(seq_len(nsim), each = n))
}
else if (is.matrix(y) && ncol(y) == 2) {
yy <- vector("list", nsim)
for (i in seq_len(nsim)) {
Y <- rbinom(n, size = wts, prob = ftd)
YY <- cbind(Y, wts - Y)
colnames(YY) <- colnames(y)
yy[[i]] <- YY
}
yy
}
else rbinom(ntot, size = wts, prob = ftd)/wts
}
else rbinom(ntot, size = wts, prob = ftd)/wts
}), class = "family"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.2235 -0.2501 -0.2612
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 173.2
Residual Deviance: 169.7 AIC: 175.7
所以我真的得到了我想要的,但是 - 它前面有巨大的 Call
代码。我一直在寻找一些摆脱它的技术,但是我并没有那么成功。你知道如何省略这一大部分不必要的代码吗?
我不明白你为什么要使用 do.call
。我会这样做:
additive_glm <- function(y, x, family = binomial(link = 'logit'), ...){
mc <- match.call()
yname <- mc[["y"]]
xname <- mc[["x"]]
x[[as.character(yname)]] <- y
assign(as.character(xname), x)
eval(substitute(glm(yname ~ ., data = xname, family = family, ...), env = environment()))
}
additive_glm(bin_var, df)
#Call: glm(formula = bin_var ~ ., family = binomial(link = "logit"),
# data = df)
#
#Coefficients:
#(Intercept) Norm Exp
# 0.32821 -0.06504 -0.05252
#
#Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
#Null Deviance: 171
#Residual Deviance: 170.7 AIC: 176.7
注意打印精美的电话。
1) 将家庭参数放在 quote(...)
中。仅更改标记为## 的行。
additive_glm <- function(y, x, glm_args = NULL){
do.call("glm", c(list(
formula = y ~ ., data = base::quote(as.data.frame(x)),
family = quote(binomial(link = 'logit')) ##
), glm_args))
}
additive_glm(bin_var, df)
给予:
Call: glm(formula = y ~ ., family = binomial(link = "logit"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.32821 -0.06504 -0.05252
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 171
Residual Deviance: 170.7 AIC: 176.7
2)另一种可能是:
additive_glm2 <- function(y, x, ...){
glm(y ~ ., data = as.data.frame(x), family = binomial(link = "logit"), ...)
}
additive_glm2(bin_var, df)
给予:
Call: glm(formula = y ~ ., family = binomial(link = "logit"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.32821 -0.06504 -0.05252
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 171
Residual Deviance: 170.7 AIC: 176.7
我想构建一个函数 additive_glm
允许用户在需要时为 glm 函数指定附加参数。
让我们考虑数据:
set.seed(42)
bin_var <- sample(0:1, 125, T)
indep_1 <- rnorm(125)
indep_2 <- rexp(125)
df <- data.frame("Norm" = indep_1, "Exp" = indep_2)
我的函数additive_glm
:
additive_glm <- function(y, x, glm_args = NULL){
do.call("glm", c(list(
formula = y ~ ., data = base::quote(as.data.frame(x)),
family = binomial(link = 'logit')
), glm_args))
}
但现在如果我想 运行 我的函数:
additive(bin_var, df)
我得到:
Call: glm(formula = y ~ ., family = structure(list(family = "binomial",
link = "logit", linkfun = function (mu)
.Call(C_logit_link, mu), linkinv = function (eta)
.Call(C_logit_linkinv, eta), variance = function (mu)
mu * (1 - mu), dev.resids = function (y, mu, wt)
.Call(C_binomial_dev_resids, y, mu, wt), aic = function (y,
n, mu, wt, dev)
{
m <- if (any(n > 1))
n
else wt
-2 * sum(ifelse(m > 0, (wt/m), 0) * dbinom(round(m *
y), round(m), mu, log = TRUE))
}, mu.eta = function (eta)
.Call(C_logit_mu_eta, eta), initialize = expression({
if (NCOL(y) == 1) {
if (is.factor(y))
y <- y != levels(y)[1L]
n <- rep.int(1, nobs)
y[weights == 0] <- 0
if (any(y < 0 | y > 1))
stop("y values must be 0 <= y <= 1")
mustart <- (weights * y + 0.5)/(weights + 1)
m <- weights * y
if (any(abs(m - round(m)) > 0.001))
warning("non-integer #successes in a binomial glm!")
}
else if (NCOL(y) == 2) {
if (any(abs(y - round(y)) > 0.001))
warning("non-integer counts in a binomial glm!")
n <- y[, 1] + y[, 2]
y <- ifelse(n == 0, 0, y[, 1]/n)
weights <- weights * n
mustart <- (n * y + 0.5)/(n + 1)
}
else stop("for the 'binomial' family, y must be a vector of 0 and 1's\nor a 2 column matrix where col 1 is no. successes and col 2 is no. failures")
}), validmu = function (mu)
all(is.finite(mu)) && all(mu > 0 & mu < 1), valideta = function (eta)
TRUE, simulate = function (object, nsim)
{
ftd <- fitted(object)
n <- length(ftd)
ntot <- n * nsim
wts <- object$prior.weights
if (any(wts%%1 != 0))
stop("cannot simulate from non-integer prior.weights")
if (!is.null(m <- object$model)) {
y <- model.response(m)
if (is.factor(y)) {
yy <- factor(1 + rbinom(ntot, size = 1, prob = ftd),
labels = levels(y))
split(yy, rep(seq_len(nsim), each = n))
}
else if (is.matrix(y) && ncol(y) == 2) {
yy <- vector("list", nsim)
for (i in seq_len(nsim)) {
Y <- rbinom(n, size = wts, prob = ftd)
YY <- cbind(Y, wts - Y)
colnames(YY) <- colnames(y)
yy[[i]] <- YY
}
yy
}
else rbinom(ntot, size = wts, prob = ftd)/wts
}
else rbinom(ntot, size = wts, prob = ftd)/wts
}), class = "family"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.2235 -0.2501 -0.2612
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 173.2
Residual Deviance: 169.7 AIC: 175.7
所以我真的得到了我想要的,但是 - 它前面有巨大的 Call
代码。我一直在寻找一些摆脱它的技术,但是我并没有那么成功。你知道如何省略这一大部分不必要的代码吗?
我不明白你为什么要使用 do.call
。我会这样做:
additive_glm <- function(y, x, family = binomial(link = 'logit'), ...){
mc <- match.call()
yname <- mc[["y"]]
xname <- mc[["x"]]
x[[as.character(yname)]] <- y
assign(as.character(xname), x)
eval(substitute(glm(yname ~ ., data = xname, family = family, ...), env = environment()))
}
additive_glm(bin_var, df)
#Call: glm(formula = bin_var ~ ., family = binomial(link = "logit"),
# data = df)
#
#Coefficients:
#(Intercept) Norm Exp
# 0.32821 -0.06504 -0.05252
#
#Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
#Null Deviance: 171
#Residual Deviance: 170.7 AIC: 176.7
注意打印精美的电话。
1) 将家庭参数放在 quote(...)
中。仅更改标记为## 的行。
additive_glm <- function(y, x, glm_args = NULL){
do.call("glm", c(list(
formula = y ~ ., data = base::quote(as.data.frame(x)),
family = quote(binomial(link = 'logit')) ##
), glm_args))
}
additive_glm(bin_var, df)
给予:
Call: glm(formula = y ~ ., family = binomial(link = "logit"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.32821 -0.06504 -0.05252
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 171
Residual Deviance: 170.7 AIC: 176.7
2)另一种可能是:
additive_glm2 <- function(y, x, ...){
glm(y ~ ., data = as.data.frame(x), family = binomial(link = "logit"), ...)
}
additive_glm2(bin_var, df)
给予:
Call: glm(formula = y ~ ., family = binomial(link = "logit"), data = as.data.frame(x))
Coefficients:
(Intercept) Norm Exp
0.32821 -0.06504 -0.05252
Degrees of Freedom: 124 Total (i.e. Null); 122 Residual
Null Deviance: 171
Residual Deviance: 170.7 AIC: 176.7