提高拟合许多模型的效率
Improve efficiency of fitting many models
感谢那些帮助我解决之前来自许多模型的关于 and 的问题的人。现在,我可以从许多模型中获取所有系数、p 值和 AIC 值。
md <- "mpg ~ cyl"
xlist <- c("disp", "hp", "am")
n <- length(xlist)
comb_lst <- unlist(lapply(1:n, function(x) combn(xlist, x, simplify = F)), recursive = F)
md_lst <- lapply(comb_lst, function(x) paste(md, "+", paste(x, collapse = "+")))
coefs <- unlist(lapply(md_lst, function(x) lm(as.formula(x),data = mtcars)$coe[2]))
pvalues <- unlist(lapply(md_lst, function(x) summary(lm(as.formula(x), data = mtcars))$coe["cyl", 4]))
aic <- unlist(lapply(md_lst, function(x) AIC(lm(as.formula(x), data = mtcars))))
从上面的代码可以看出,最后 3 行中的每一行都将独立地拟合多个模型一次。这意味着代码将拟合同一组模型 3 次。对于大数据和许多变量,这可能很耗时。我的问题是如何一次拟合所有模型,然后再提取系数、p 值和 AIC 值。
你可以这样做:
models <- lapply(md_lst, function(x) lm(as.formula(x), data = mtcars))
coefs <- unlist(lapply(models, function(x) x$coef[2]))
pvalues <- unlist(lapply(models, function(x) summary(x)$coef["cyl", 4]))
aic <- unlist(lapply(models, function(x) AIC(x)))
为方便起见,我使用了 package I wrote. It can be avoided by using summary
as shown in another 中的一个函数。
我们可以做到:
models <- lapply(md_lst, function(x) do.call(lm, list(formula =x,
data=mtcars)) )
sapply(models, function(x) { cbind(coef(x),
manymodelr::extract_model_info(x, "p_value")["cyl"], AIC(x) )})
我不想使用包(manymodelr)
sapply(models, function(x) {
cbind(coef(x),coef(summary(x))[,4]["cyl"],
AIC(x)
)})
这给了我们一个矩阵列表,其中每一列分别代表系数、p 值和 AIC。
结果(截断)
[[1]]
[,1] [,2] [,3]
(Intercept) 34.66099474 0.03366495 167.1456
cyl -1.58727681 0.03366495 167.1456
disp -0.02058363 0.03366495 167.1456
[[2]]
[,1] [,2] [,3]
(Intercept) 36.9083305 0.0004803752 169.5618
cyl -2.2646936 0.0004803752 169.5618
hp -0.0191217 0.0004803752 169.5618
[[3]]
[,1] [,2] [,3]
(Intercept) 34.522443 1.28456e-07 167.2191
cyl -2.500958 1.28456e-07 167.2191
am 2.567035 1.28456e-07 167.2191
[[4]]
[,1] [,2] [,3]
(Intercept) 34.18491917 0.1349044 168.0184
cyl -1.22741994 0.1349044 168.0184
disp -0.01883809 0.1349044 168.0184
hp -0.01467933 0.1349044 168.0184
这使用了将 FUN
放入 combn()
调用的能力。与其他答案一样,它只进行一次线性回归。
md <- "mpg ~ cyl"
xlist <- c("disp", "hp", "am")
all_models <- unlist(
lapply(seq_along(xlist),
function (k) {
combn(xlist,
k,
FUN = function (x) {
form <- formula(paste(md, '+', paste(x, collapse = '+')))
eval(bquote(lm(.(form), data = mtcars) ))
},
simplify = F
)
}
)
,recursive = F
)
coefs <- sapply(all_models, function(x) x$coe[2])
pvalues <- sapply(all_models, function(x) summary(x)$coe["cyl", 4])
aic <- sapply(all_models, function(x) AIC(x))
感谢那些帮助我解决之前来自许多模型的关于
md <- "mpg ~ cyl"
xlist <- c("disp", "hp", "am")
n <- length(xlist)
comb_lst <- unlist(lapply(1:n, function(x) combn(xlist, x, simplify = F)), recursive = F)
md_lst <- lapply(comb_lst, function(x) paste(md, "+", paste(x, collapse = "+")))
coefs <- unlist(lapply(md_lst, function(x) lm(as.formula(x),data = mtcars)$coe[2]))
pvalues <- unlist(lapply(md_lst, function(x) summary(lm(as.formula(x), data = mtcars))$coe["cyl", 4]))
aic <- unlist(lapply(md_lst, function(x) AIC(lm(as.formula(x), data = mtcars))))
从上面的代码可以看出,最后 3 行中的每一行都将独立地拟合多个模型一次。这意味着代码将拟合同一组模型 3 次。对于大数据和许多变量,这可能很耗时。我的问题是如何一次拟合所有模型,然后再提取系数、p 值和 AIC 值。
你可以这样做:
models <- lapply(md_lst, function(x) lm(as.formula(x), data = mtcars))
coefs <- unlist(lapply(models, function(x) x$coef[2]))
pvalues <- unlist(lapply(models, function(x) summary(x)$coef["cyl", 4]))
aic <- unlist(lapply(models, function(x) AIC(x)))
为方便起见,我使用了 package I wrote. It can be avoided by using summary
as shown in another
我们可以做到:
models <- lapply(md_lst, function(x) do.call(lm, list(formula =x,
data=mtcars)) )
sapply(models, function(x) { cbind(coef(x),
manymodelr::extract_model_info(x, "p_value")["cyl"], AIC(x) )})
我不想使用包(manymodelr)
sapply(models, function(x) {
cbind(coef(x),coef(summary(x))[,4]["cyl"],
AIC(x)
)})
这给了我们一个矩阵列表,其中每一列分别代表系数、p 值和 AIC。
结果(截断)
[[1]]
[,1] [,2] [,3]
(Intercept) 34.66099474 0.03366495 167.1456
cyl -1.58727681 0.03366495 167.1456
disp -0.02058363 0.03366495 167.1456
[[2]]
[,1] [,2] [,3]
(Intercept) 36.9083305 0.0004803752 169.5618
cyl -2.2646936 0.0004803752 169.5618
hp -0.0191217 0.0004803752 169.5618
[[3]]
[,1] [,2] [,3]
(Intercept) 34.522443 1.28456e-07 167.2191
cyl -2.500958 1.28456e-07 167.2191
am 2.567035 1.28456e-07 167.2191
[[4]]
[,1] [,2] [,3]
(Intercept) 34.18491917 0.1349044 168.0184
cyl -1.22741994 0.1349044 168.0184
disp -0.01883809 0.1349044 168.0184
hp -0.01467933 0.1349044 168.0184
这使用了将 FUN
放入 combn()
调用的能力。与其他答案一样,它只进行一次线性回归。
md <- "mpg ~ cyl"
xlist <- c("disp", "hp", "am")
all_models <- unlist(
lapply(seq_along(xlist),
function (k) {
combn(xlist,
k,
FUN = function (x) {
form <- formula(paste(md, '+', paste(x, collapse = '+')))
eval(bquote(lm(.(form), data = mtcars) ))
},
simplify = F
)
}
)
,recursive = F
)
coefs <- sapply(all_models, function(x) x$coe[2])
pvalues <- sapply(all_models, function(x) summary(x)$coe["cyl", 4])
aic <- sapply(all_models, function(x) AIC(x))