具有估算数据的多项式回归
multinominal regression with imputed data
我需要估算缺失数据,然后使用生成的数据集进行多项式回归。我尝试使用鼠标进行插补,然后使用 nnet 的多项式函数进行多项式回归。但这给了我不可读的输出。这是一个使用 mice 包提供的 nhanes2 数据集的示例:
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('sample','pmm','logreg','norm'))
#age is categorical, bmi is continuous
m <- with(test, multinom(age ~ bmi, model = T))
summary(pool(m))
m1 <- with(test, lm(bmi ~ age, model = T))
summary(pool(m1))
使用上面的代码,'m' 的输出将系数列为 1、2、3 和 4,而不是它们的变量名。使用 lm 命令时 m1 的输出正确。这似乎是 运行 mira 对象上的多项式命令的问题。有人对如何解决这个问题有任何建议,或者对其他效果更好的插补和多项回归方法有建议吗?
编辑
现在已在 mice
的开发分支上更新,并按预期执行:请参阅 https://github.com/stefvanbuuren/mice/issues/85
使用 mulitnom
时有几个问题。 pool
方法混淆了系数和标准误差的顺序 - 所以(asaik)使用 pool
编辑的对象 return 是 not correct (实际上混淆了一点strong - multinom
模型没有特定的 pool
方法,因此它使用在这种情况下不太有效的默认值)。
此外,正如您提到的,然后名称被删除 - 这是由于 pool
调用 names(coef(modelobject))
,但是 multinom
模型 return 系数矩阵,并且所以没有 names
(他们有 rownames
& colnames
)。
因此您可以更改 pool
函数以适应 multinom
模型 - 请参阅下面的 pooly
(实际上您可以编写一个更小的函数来处理此模型 class,但我选择编写一种快速、更通用的方法,希望它不会破坏其他模型 classes,但需要注意的是,我还没有完全测试它没有。)
用你的例子测试
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('polyreg','pmm','logreg','norm'), print=0)
m <- with(test, multinom(age ~ bmi, model = T))
summary(pooly(m))
# est se t df Pr(>|t|) lo 95 hi 95 nmis fmi lambda
# 40-59:(Intercept) 5.8960594 4.5352921 1.300040 12.82882 0.21646037 -3.9151498 15.70726867 NA 0.2951384 0.1931975
# 40-59:bmi -0.2356516 0.1669807 -1.411250 12.81702 0.18198189 -0.5969156 0.12561248 NA 0.2955593 0.1935923
# 60-99:(Intercept) 8.2723321 4.7656701 1.735817 15.55876 0.10235284 -1.8537729 18.39843700 NA 0.1989371 0.1021831
# 60-99:bmi -0.3364014 0.1832718 -1.835533 15.03938 0.08627846 -0.7269469 0.05414413 NA 0.2174394 0.1198595
#
与(我认为)混淆了系数和 se 的原件相比
summary(pool(m))
定义接受 multinom
模型的函数。添加的代码旁边有注释。
pooly <- function (object, method = "smallsample") {
call <- match.call()
if (!is.mira(object))
stop("The object must have class 'mira'")
m <- length(object$analyses)
fa <- getfit(object, 1)
if (m == 1) {
warning("Number of multiple imputations m=1. No pooling done.")
return(fa)
}
analyses <- getfit(object)
if (class(fa)[1] == "lme" && !requireNamespace("nlme", quietly = TRUE))
stop("Package 'nlme' needed fo this function to work. Please install it.",
call. = FALSE)
if ((class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) && !requireNamespace("lme4",
quietly = TRUE))
stop("Package 'lme4' needed fo this function to work. Please install it.",
call. = FALSE)
mess <- try(coef(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no coef() method.")
mess <- try(vcov(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no vcov() method.")
if (class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) {
k <- length(lme4::fixef(fa))
names <- names(lme4::fixef(fa))
}
else if (class(fa)[1] == "polr") {
k <- length(coef(fa)) + length(fa$zeta)
names <- c(names(coef(fa)), names(fa$zeta))
}
# added this ---------------------------------
else if (class(fa)[1] == "multinom") {
k <- length(coef(fa))
names <- rownames(vcov(fa))
}
# --------------------------------------------
else {
k <- length(coef(fa))
names <- names(coef(fa))
}
qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(seq_len(m),
names))
u <- array(NA, dim = c(m, k, k), dimnames = list(seq_len(m),
names, names))
for (i in seq_len(m)) {
fit <- analyses[[i]]
if (class(fit)[1] == "mer") {
qhat[i, ] <- lme4::fixef(fit)
ui <- as.matrix(vcov(fit))
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class mer, fixef(fit): ",
ncol(qhat), ", as.matrix(vcov(fit)): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lmerMod" || inherits(fa, "merMod")) {
qhat[i, ] <- lme4::fixef(fit)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lmerMod, fixed(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lme") {
qhat[i, ] <- fit$coefficients$fixed
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lme, fit$coefficients$fixef: ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "polr") {
qhat[i, ] <- c(coef(fit), fit$zeta)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class polr, c(coef(fit, fit$zeta): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "survreg") {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
parnames <- dimnames(ui)[[1]]
select <- !(parnames %in% "Log(scale)")
ui <- ui[select, select]
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class survreg, coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# added this block -------------------------------------
else if (class(fit)[1] == "multinom") {
qhat[i, ] <- c(t(coef(fit))) # transpose to get same order as standard errors
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class multinom, c(coef(fit)): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# ----------------------------------------------------
else {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
ui <- expandvcov(qhat[i, ], ui)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
}
qbar <- apply(qhat, 2, mean)
ubar <- apply(u, c(2, 3), mean)
e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
b <- (t(e) %*% e)/(m - 1)
t <- ubar + (1 + 1/m) * b
r <- (1 + 1/m) * diag(b/ubar)
lambda <- (1 + 1/m) * diag(b/t)
dfcom <- df.residual(object)
df <- mice.df(m, lambda, dfcom, method)
fmi <- (r + 2/(df + 3))/(r + 1)
names(r) <- names(df) <- names(fmi) <- names(lambda) <- names
fit <- list(call = call, call1 = object$call, call2 = object$call1,
nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar,
ubar = ubar, b = b, t = t, r = r, dfcom = dfcom, df = df,
fmi = fmi, lambda = lambda)
oldClass(fit) <- c("mipo", oldClass(object))
return(fit)
}
environment(pooly) <- environment(mice)
我需要估算缺失数据,然后使用生成的数据集进行多项式回归。我尝试使用鼠标进行插补,然后使用 nnet 的多项式函数进行多项式回归。但这给了我不可读的输出。这是一个使用 mice 包提供的 nhanes2 数据集的示例:
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('sample','pmm','logreg','norm'))
#age is categorical, bmi is continuous
m <- with(test, multinom(age ~ bmi, model = T))
summary(pool(m))
m1 <- with(test, lm(bmi ~ age, model = T))
summary(pool(m1))
使用上面的代码,'m' 的输出将系数列为 1、2、3 和 4,而不是它们的变量名。使用 lm 命令时 m1 的输出正确。这似乎是 运行 mira 对象上的多项式命令的问题。有人对如何解决这个问题有任何建议,或者对其他效果更好的插补和多项回归方法有建议吗?
编辑
现在已在 mice
的开发分支上更新,并按预期执行:请参阅 https://github.com/stefvanbuuren/mice/issues/85
使用 mulitnom
时有几个问题。 pool
方法混淆了系数和标准误差的顺序 - 所以(asaik)使用 pool
编辑的对象 return 是 not correct (实际上混淆了一点strong - multinom
模型没有特定的 pool
方法,因此它使用在这种情况下不太有效的默认值)。
此外,正如您提到的,然后名称被删除 - 这是由于 pool
调用 names(coef(modelobject))
,但是 multinom
模型 return 系数矩阵,并且所以没有 names
(他们有 rownames
& colnames
)。
因此您可以更改 pool
函数以适应 multinom
模型 - 请参阅下面的 pooly
(实际上您可以编写一个更小的函数来处理此模型 class,但我选择编写一种快速、更通用的方法,希望它不会破坏其他模型 classes,但需要注意的是,我还没有完全测试它没有。)
用你的例子测试
library(mice)
library(nnet)
test <- mice(nhanes2, meth=c('polyreg','pmm','logreg','norm'), print=0)
m <- with(test, multinom(age ~ bmi, model = T))
summary(pooly(m))
# est se t df Pr(>|t|) lo 95 hi 95 nmis fmi lambda
# 40-59:(Intercept) 5.8960594 4.5352921 1.300040 12.82882 0.21646037 -3.9151498 15.70726867 NA 0.2951384 0.1931975
# 40-59:bmi -0.2356516 0.1669807 -1.411250 12.81702 0.18198189 -0.5969156 0.12561248 NA 0.2955593 0.1935923
# 60-99:(Intercept) 8.2723321 4.7656701 1.735817 15.55876 0.10235284 -1.8537729 18.39843700 NA 0.1989371 0.1021831
# 60-99:bmi -0.3364014 0.1832718 -1.835533 15.03938 0.08627846 -0.7269469 0.05414413 NA 0.2174394 0.1198595
#
与(我认为)混淆了系数和 se 的原件相比
summary(pool(m))
定义接受 multinom
模型的函数。添加的代码旁边有注释。
pooly <- function (object, method = "smallsample") {
call <- match.call()
if (!is.mira(object))
stop("The object must have class 'mira'")
m <- length(object$analyses)
fa <- getfit(object, 1)
if (m == 1) {
warning("Number of multiple imputations m=1. No pooling done.")
return(fa)
}
analyses <- getfit(object)
if (class(fa)[1] == "lme" && !requireNamespace("nlme", quietly = TRUE))
stop("Package 'nlme' needed fo this function to work. Please install it.",
call. = FALSE)
if ((class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) && !requireNamespace("lme4",
quietly = TRUE))
stop("Package 'lme4' needed fo this function to work. Please install it.",
call. = FALSE)
mess <- try(coef(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no coef() method.")
mess <- try(vcov(fa), silent = TRUE)
if (inherits(mess, "try-error"))
stop("Object has no vcov() method.")
if (class(fa)[1] == "mer" || class(fa)[1] == "lmerMod" ||
inherits(fa, "merMod")) {
k <- length(lme4::fixef(fa))
names <- names(lme4::fixef(fa))
}
else if (class(fa)[1] == "polr") {
k <- length(coef(fa)) + length(fa$zeta)
names <- c(names(coef(fa)), names(fa$zeta))
}
# added this ---------------------------------
else if (class(fa)[1] == "multinom") {
k <- length(coef(fa))
names <- rownames(vcov(fa))
}
# --------------------------------------------
else {
k <- length(coef(fa))
names <- names(coef(fa))
}
qhat <- matrix(NA, nrow = m, ncol = k, dimnames = list(seq_len(m),
names))
u <- array(NA, dim = c(m, k, k), dimnames = list(seq_len(m),
names, names))
for (i in seq_len(m)) {
fit <- analyses[[i]]
if (class(fit)[1] == "mer") {
qhat[i, ] <- lme4::fixef(fit)
ui <- as.matrix(vcov(fit))
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class mer, fixef(fit): ",
ncol(qhat), ", as.matrix(vcov(fit)): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lmerMod" || inherits(fa, "merMod")) {
qhat[i, ] <- lme4::fixef(fit)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lmerMod, fixed(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "lme") {
qhat[i, ] <- fit$coefficients$fixed
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class lme, fit$coefficients$fixef: ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "polr") {
qhat[i, ] <- c(coef(fit), fit$zeta)
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class polr, c(coef(fit, fit$zeta): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
else if (class(fit)[1] == "survreg") {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
parnames <- dimnames(ui)[[1]]
select <- !(parnames %in% "Log(scale)")
ui <- ui[select, select]
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class survreg, coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# added this block -------------------------------------
else if (class(fit)[1] == "multinom") {
qhat[i, ] <- c(t(coef(fit))) # transpose to get same order as standard errors
ui <- vcov(fit)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: class multinom, c(coef(fit)): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
# ----------------------------------------------------
else {
qhat[i, ] <- coef(fit)
ui <- vcov(fit)
ui <- expandvcov(qhat[i, ], ui)
if (ncol(ui) != ncol(qhat))
stop("Different number of parameters: coef(fit): ",
ncol(qhat), ", vcov(fit): ", ncol(ui))
u[i, , ] <- array(ui, dim = c(1, dim(ui)))
}
}
qbar <- apply(qhat, 2, mean)
ubar <- apply(u, c(2, 3), mean)
e <- qhat - matrix(qbar, nrow = m, ncol = k, byrow = TRUE)
b <- (t(e) %*% e)/(m - 1)
t <- ubar + (1 + 1/m) * b
r <- (1 + 1/m) * diag(b/ubar)
lambda <- (1 + 1/m) * diag(b/t)
dfcom <- df.residual(object)
df <- mice.df(m, lambda, dfcom, method)
fmi <- (r + 2/(df + 3))/(r + 1)
names(r) <- names(df) <- names(fmi) <- names(lambda) <- names
fit <- list(call = call, call1 = object$call, call2 = object$call1,
nmis = object$nmis, m = m, qhat = qhat, u = u, qbar = qbar,
ubar = ubar, b = b, t = t, r = r, dfcom = dfcom, df = df,
fmi = fmi, lambda = lambda)
oldClass(fit) <- c("mipo", oldClass(object))
return(fit)
}
environment(pooly) <- environment(mice)