用于从估算数据进行描述性统计的用户定义函数问题
Issue with user-defined function for descriptive statistics from imputed data
我正在尝试编写一个函数来计算乘法估算数据帧 (mids
) 中变量的均值和 SD。代码在函数外运行良好(如下面两个示例所示),但放在函数内时会产生不可靠的结果。尽管调用了 chl
.
,该函数似乎仍在为 bmi
提供结果
感谢您对此问题的任何见解。最终我希望这个函数能够一次计算多个变量的均值和 SD(即 bmi
和 chl
),但这可能是一个单独的问题。
library(mice, warn.conflicts = FALSE)
data(nhanes)
imp <- mice(nhanes, m = 3, print = FALSE, seed = 123)
# workflow that i want to automate
# from here: https://bookdown.org/mwheymans/bookmi/data-analysis-after-multiple-imputation.html
# example 1 - bmi
impdat <- mice::complete(imp, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$bmi), sd(x$bmi))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
#> [1] 27.117333 3.980506
rm(impdat, pool_mean, result)
# example 2 - chl
impdat <- mice::complete(imp, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$chl), sd(x$chl))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
#> [1] 195.10667 39.95247
rm(impdat, pool_mean, result)
# automating the workflow
automate <- function(a, b) {
impdat <- mice::complete(a, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$b), sd(x$b))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
}
automate(a=imp, b=bmi) # looks correct ... ?
#> [1] 27.117333 3.980506
automate(a=imp, b=chl) # no, it isn't
#> [1] 27.117333 3.980506
这里有两个半问题:
b = bmi
看起来像一个对象 bmi
,它在我们的全局环境中不存在。我们可以为此使用 deparse(susbtitute(x))
,告诉函数等待评估。
- 访问函数
$
,参见?Extract
:[[和$select都是列表的单个元素。主要区别在于 $ 不允许计算索引
automate <- function(a, b) {
b <- deparse(substitute(b))
impdat <- mice::complete(a, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x[[b]]), sd(x[[b]]))))
(Reduce("+", pool_mean)/length(pool_mean))
}
automate(a=imp, b=bmi)
[1] 27.117333 3.980506
automate(a=imp, b=chl)
[1] 195.10667 39.95247
要在变量列表上执行此操作,我们可以将其稍微重写为
automate_list <- function(a, ...){
impdat <- mice::complete(a, action = "long", include = FALSE)
lapply(list(...), function(x){
x = as.name(x)
pool_mean <- with(impdat, by(impdat, .imp, function(y) c(mean(y[[x]]), sd(y[[x]]))))
Reduce("+", pool_mean)/length(pool_mean)
}) |>
setNames(list(...))
}
automate_list(imp, "bmi", "chl")
$bmi
[1] 27.117333 3.980506
$chl
[1] 195.10667 39.95247
我正在尝试编写一个函数来计算乘法估算数据帧 (mids
) 中变量的均值和 SD。代码在函数外运行良好(如下面两个示例所示),但放在函数内时会产生不可靠的结果。尽管调用了 chl
.
bmi
提供结果
感谢您对此问题的任何见解。最终我希望这个函数能够一次计算多个变量的均值和 SD(即 bmi
和 chl
),但这可能是一个单独的问题。
library(mice, warn.conflicts = FALSE)
data(nhanes)
imp <- mice(nhanes, m = 3, print = FALSE, seed = 123)
# workflow that i want to automate
# from here: https://bookdown.org/mwheymans/bookmi/data-analysis-after-multiple-imputation.html
# example 1 - bmi
impdat <- mice::complete(imp, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$bmi), sd(x$bmi))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
#> [1] 27.117333 3.980506
rm(impdat, pool_mean, result)
# example 2 - chl
impdat <- mice::complete(imp, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$chl), sd(x$chl))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
#> [1] 195.10667 39.95247
rm(impdat, pool_mean, result)
# automating the workflow
automate <- function(a, b) {
impdat <- mice::complete(a, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x$b), sd(x$b))))
result <- (Reduce("+", pool_mean)/length(pool_mean))
print(result)
}
automate(a=imp, b=bmi) # looks correct ... ?
#> [1] 27.117333 3.980506
automate(a=imp, b=chl) # no, it isn't
#> [1] 27.117333 3.980506
这里有两个半问题:
b = bmi
看起来像一个对象bmi
,它在我们的全局环境中不存在。我们可以为此使用deparse(susbtitute(x))
,告诉函数等待评估。- 访问函数
$
,参见?Extract
:[[和$select都是列表的单个元素。主要区别在于 $ 不允许计算索引
automate <- function(a, b) {
b <- deparse(substitute(b))
impdat <- mice::complete(a, action = "long", include = FALSE)
pool_mean <- with(impdat, by(impdat, .imp, function(x) c(mean(x[[b]]), sd(x[[b]]))))
(Reduce("+", pool_mean)/length(pool_mean))
}
automate(a=imp, b=bmi)
[1] 27.117333 3.980506
automate(a=imp, b=chl)
[1] 195.10667 39.95247
要在变量列表上执行此操作,我们可以将其稍微重写为
automate_list <- function(a, ...){
impdat <- mice::complete(a, action = "long", include = FALSE)
lapply(list(...), function(x){
x = as.name(x)
pool_mean <- with(impdat, by(impdat, .imp, function(y) c(mean(y[[x]]), sd(y[[x]]))))
Reduce("+", pool_mean)/length(pool_mean)
}) |>
setNames(list(...))
}
automate_list(imp, "bmi", "chl")
$bmi
[1] 27.117333 3.980506
$chl
[1] 195.10667 39.95247