如何 运行 AIC 对来自不同模型的 anova 输出
How to run AIC on anova output from different models
这是我使用方差分析的一小部分数据。
dput(c)
structure(list(Mouse.ID = c("DO.1307", "DO.1309", "DO.1311",
"DO.1312", "DO.1316", "DO.1318", "DO.0661", "DO.0669", "DO.0670",
"DO.0673", "DO.0674", "DO.0676"), Sex = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
fAge = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 3L,
2L, 2L), .Label = c("6", "12", "18"), class = "factor"),
Index = structure(c(14L, 5L, 2L, 1L, 6L, 8L, 21L, 24L, 11L,
20L, 12L, 19L), .Label = c("AR001", "AR002", "AR003", "AR004",
"AR005", "AR006", "AR007", "AR008", "AR009", "AR010", "AR011",
"AR012", "AR013", "AR014", "AR015", "AR016", "AR018", "AR019",
"AR020", "AR021", "AR022", "AR023", "AR025", "AR027"), class = "factor"),
Lane = structure(c(7L, 8L, 2L, 1L, 1L, 4L, 6L, 2L, 4L, 5L,
5L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"
), class = "factor"), Gen = structure(c(5L, 5L, 5L, 5L, 5L,
5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("8", "9", "10", "11",
"12"), class = "factor"), PC1 = c(42.4767703038827, 23.4689101841499,
53.38140726592, 94.8512438334955, 65.7816795225369, 44.245669398443,
-23.147618298858, -23.004329868562, -17.0024755772689, -23.9178589007844,
-56.7766982399411, -34.3969872418573), PC2 = c(45.5944770078485,
-1.65198338778925, -17.8737777017898, 15.3618046450436, -15.2567007817601,
-61.1178242127913, 40.5243564641241, 2.99206119995141, -61.4176842149059,
7.10965422446634, 7.28461966315024, -64.1955797075099), PC3 = c(-1.49628266803638,
-1.91335263173293, 5.93465541152824, -16.0302848659995, -10.0886088958855,
-0.944109507320403, -17.0598627155672, -22.1038475592448,
-6.25238299099893, 23.500307567532, 53.4553992426852, -20.1077749520339
), PC4 = c(1.84549247981521, -7.13567033720484, -17.9966832514214,
71.4430035621693, -4.19421663100475, 2.68614263191578, -5.37605681469604,
28.8757760174757, 1.96723351126677, 10.1757811517044, 7.63553142427313,
-0.61083387825962), PC5 = c(-1.5167549402912, -6.684624033326,
-7.16355679965928, -15.4471168600897, -10.8771574792765,
-10.091229971761, 2.49156058897602, -2.2801673669604, -5.45494631567109,
-5.44682692111089, -7.21616736676726, -11.0786655194642),
PC6 = c(1.77085439958321, -7.71184868806282, 6.45110081614657,
-8.40674843913856, 7.94013760009736, 5.73178334378453, -11.625850369587,
1.54093546690149, -4.87370378395642, -22.0735137415442, -2.44337914021456,
0.619440592140127), PC7 = c(-3.54879153004361, 10.5229133300871,
5.47882712621914, 1.0641123870231, -11.584471111461, 1.02312563446584,
7.20873385839409, -17.719801994905, -0.811301497692041, 7.55418040146638,
-4.68437054723712, 1.1158744957288), PC8 = c(9.39979923528603,
-9.73821309966427, 8.49270466097171, 14.6373631871744, 11.116335443361,
-4.27823458955497, -7.19678837565302, 6.24827779166403, 0.224651092284126,
6.10960416152842, -14.6615234719377, -0.410198021192528)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
所有可能组合的 运行 方差分析
我运行
iv <- c("Sex", "fAge", "Index", "Lane", "Gen")
dv <- paste0('PC', 1:8)
rhs <- unlist(sapply(1:length(iv), function(m) apply(combn(iv, m = m), 2, paste, collapse = ' + ')))
frms <- with(expand.grid(dv, rhs), paste(Var1, Var2, sep = ' ~ '))
models <- lapply(frms, function(x) anova(lm(x, data = mrna.pcs)))
names(models) <- frms
> class(models)
[1] "list"
> class(models$`PC1 ~ Sex`)
[1] "anova" "data.frame"
我尝试 运行 每个型号的 AIC
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")
但我确定我做错了什么非常感谢任何帮助或建议
我期待的输出
lhs <- c('mpg', 'cyl', 'disp')
rhs <- c('hp', 'drat')
models = list()
for (i in lhs){
for (j in rhs){
models[[paste(i, "vs", j)]] <- lm(as.formula(paste(i, "~", j)), data = mtcars)
}
}
coefs_mat = expand.grid(lhs, rhs)
mods = apply(coefs_mat, 1, function(row) {
lm(as.formula(paste(row[1], "~", row[2])), data = mtcars)
})
names(mods) = with(coefs_mat, paste(Var1, "vs", Var2))
coefs = lapply(mods, tidy, simplify = F)
# combine
dplyr::bind_rows(coefs, .id = "mod")
summ = lapply(mods, glance, simplify = F)
dplyr::bind_rows(summ, .id = "mod")
这是我期待的输出,输出显示 table 中的 AIC 加上其他数据
mod r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 mpg vs hp 0.602 0.589 3.86 45.5 0.000000179 1 -87.6 181. 186. 448. 30 32
2 cyl vs hp 0.693 0.683 1.01 67.7 0.00000000348 1 -44.6 95.1 99.5 30.4 30 32
3 disp vs hp 0.626 0.613 77.1 50.1 0.0000000714 1 -183. 373. 377. 178284. 30 32
4 mpg vs drat 0.464 0.446 4.49 26.0 0.0000178 1 -92.4 191. 195. 604. 30 32
5 cyl vs drat 0.490 0.473 1.30 28.8 0.00000824 1 -52.7 111. 116. 50.4 30 32
6 disp vs drat 0.504 0.488 88.7 30.5 0.00000528 1 -188. 382. 386. 235995. 30 32
UPDATE
问题解决如下代码
models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)
在上面的代码中使用了 lm
并检查了 model_coef 和性能
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")
当我使用 lm
我在最终 model_performance 输出中得到 AIC 值时效果很好。
但我最初问过 anova
,这就是为什么我要 运行 这个问题,问题是是否可以在 anova
模型上找到 AIC
现在当我运行这个
models3 <- sapply(frms, function(x) anova(lm(x, data = mrna.pcs)), simplify = FALSE, USE.NAMES = TRUE)
model_coefs = map_df(models3, tidy, .id="Model")
model_performance = map_df(models3, glance, .id="Model")
最终输出中没有 AIC 列输出是这样的
model_performance
# A tibble: 24 × 5
Model nrow ncol complete.obs na.fraction
<chr> <int> <int> <int> <dbl>
1 PC1 ~ Sex 2 5 1 0.2
2 PC2 ~ Sex 2 5 1 0.2
3 PC3 ~ Sex 2 5 1 0.2
4 PC4 ~ Sex 2 5 1 0.2
5 PC5 ~ Sex 2 5 1 0.2
6 PC6 ~ Sex 2 5 1 0.2
7 PC7 ~ Sex 2 5 1 0.2
8 PC8 ~ Sex 2 5 1 0.2
9 PC1 ~ fAge 2 5 1 0.2
10 PC2 ~ fAge 2 5 1 0.2
当我运行这个
data.frame(model=names(models3), AIC=sapply(models3, AIC))
收到错误
Error in UseMethod("logLik") :
no applicable method for 'logLik' applied to an object of class "c('anova', 'data.frame')"
这里猜测一下,因为不清楚您期望的输出结果。
您的 models
对象是方差分析列表,而不是模型列表。如果您想要 AIC(是吗?),您需要创建模型列表。
models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)
data.frame(model=names(models), AIC=sapply(models, AIC))
请注意,包含 Index
作为预测变量的模型会产生垃圾,这不足为奇。
编辑:容纳OP的附加信息:
您可以使用上面模型列表中的 broom
函数生成您寻找的 table:
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")
这是我使用方差分析的一小部分数据。
dput(c)
structure(list(Mouse.ID = c("DO.1307", "DO.1309", "DO.1311",
"DO.1312", "DO.1316", "DO.1318", "DO.0661", "DO.0669", "DO.0670",
"DO.0673", "DO.0674", "DO.0676"), Sex = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"),
fAge = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 3L,
2L, 2L), .Label = c("6", "12", "18"), class = "factor"),
Index = structure(c(14L, 5L, 2L, 1L, 6L, 8L, 21L, 24L, 11L,
20L, 12L, 19L), .Label = c("AR001", "AR002", "AR003", "AR004",
"AR005", "AR006", "AR007", "AR008", "AR009", "AR010", "AR011",
"AR012", "AR013", "AR014", "AR015", "AR016", "AR018", "AR019",
"AR020", "AR021", "AR022", "AR023", "AR025", "AR027"), class = "factor"),
Lane = structure(c(7L, 8L, 2L, 1L, 1L, 4L, 6L, 2L, 4L, 5L,
5L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"
), class = "factor"), Gen = structure(c(5L, 5L, 5L, 5L, 5L,
5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("8", "9", "10", "11",
"12"), class = "factor"), PC1 = c(42.4767703038827, 23.4689101841499,
53.38140726592, 94.8512438334955, 65.7816795225369, 44.245669398443,
-23.147618298858, -23.004329868562, -17.0024755772689, -23.9178589007844,
-56.7766982399411, -34.3969872418573), PC2 = c(45.5944770078485,
-1.65198338778925, -17.8737777017898, 15.3618046450436, -15.2567007817601,
-61.1178242127913, 40.5243564641241, 2.99206119995141, -61.4176842149059,
7.10965422446634, 7.28461966315024, -64.1955797075099), PC3 = c(-1.49628266803638,
-1.91335263173293, 5.93465541152824, -16.0302848659995, -10.0886088958855,
-0.944109507320403, -17.0598627155672, -22.1038475592448,
-6.25238299099893, 23.500307567532, 53.4553992426852, -20.1077749520339
), PC4 = c(1.84549247981521, -7.13567033720484, -17.9966832514214,
71.4430035621693, -4.19421663100475, 2.68614263191578, -5.37605681469604,
28.8757760174757, 1.96723351126677, 10.1757811517044, 7.63553142427313,
-0.61083387825962), PC5 = c(-1.5167549402912, -6.684624033326,
-7.16355679965928, -15.4471168600897, -10.8771574792765,
-10.091229971761, 2.49156058897602, -2.2801673669604, -5.45494631567109,
-5.44682692111089, -7.21616736676726, -11.0786655194642),
PC6 = c(1.77085439958321, -7.71184868806282, 6.45110081614657,
-8.40674843913856, 7.94013760009736, 5.73178334378453, -11.625850369587,
1.54093546690149, -4.87370378395642, -22.0735137415442, -2.44337914021456,
0.619440592140127), PC7 = c(-3.54879153004361, 10.5229133300871,
5.47882712621914, 1.0641123870231, -11.584471111461, 1.02312563446584,
7.20873385839409, -17.719801994905, -0.811301497692041, 7.55418040146638,
-4.68437054723712, 1.1158744957288), PC8 = c(9.39979923528603,
-9.73821309966427, 8.49270466097171, 14.6373631871744, 11.116335443361,
-4.27823458955497, -7.19678837565302, 6.24827779166403, 0.224651092284126,
6.10960416152842, -14.6615234719377, -0.410198021192528)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
所有可能组合的 运行 方差分析
我运行
iv <- c("Sex", "fAge", "Index", "Lane", "Gen")
dv <- paste0('PC', 1:8)
rhs <- unlist(sapply(1:length(iv), function(m) apply(combn(iv, m = m), 2, paste, collapse = ' + ')))
frms <- with(expand.grid(dv, rhs), paste(Var1, Var2, sep = ' ~ '))
models <- lapply(frms, function(x) anova(lm(x, data = mrna.pcs)))
names(models) <- frms
> class(models)
[1] "list"
> class(models$`PC1 ~ Sex`)
[1] "anova" "data.frame"
我尝试 运行 每个型号的 AIC
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")
但我确定我做错了什么非常感谢任何帮助或建议
我期待的输出
lhs <- c('mpg', 'cyl', 'disp')
rhs <- c('hp', 'drat')
models = list()
for (i in lhs){
for (j in rhs){
models[[paste(i, "vs", j)]] <- lm(as.formula(paste(i, "~", j)), data = mtcars)
}
}
coefs_mat = expand.grid(lhs, rhs)
mods = apply(coefs_mat, 1, function(row) {
lm(as.formula(paste(row[1], "~", row[2])), data = mtcars)
})
names(mods) = with(coefs_mat, paste(Var1, "vs", Var2))
coefs = lapply(mods, tidy, simplify = F)
# combine
dplyr::bind_rows(coefs, .id = "mod")
summ = lapply(mods, glance, simplify = F)
dplyr::bind_rows(summ, .id = "mod")
这是我期待的输出,输出显示 table 中的 AIC 加上其他数据
mod r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
1 mpg vs hp 0.602 0.589 3.86 45.5 0.000000179 1 -87.6 181. 186. 448. 30 32
2 cyl vs hp 0.693 0.683 1.01 67.7 0.00000000348 1 -44.6 95.1 99.5 30.4 30 32
3 disp vs hp 0.626 0.613 77.1 50.1 0.0000000714 1 -183. 373. 377. 178284. 30 32
4 mpg vs drat 0.464 0.446 4.49 26.0 0.0000178 1 -92.4 191. 195. 604. 30 32
5 cyl vs drat 0.490 0.473 1.30 28.8 0.00000824 1 -52.7 111. 116. 50.4 30 32
6 disp vs drat 0.504 0.488 88.7 30.5 0.00000528 1 -188. 382. 386. 235995. 30 32
UPDATE
问题解决如下代码
models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)
在上面的代码中使用了 lm
并检查了 model_coef 和性能
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")
当我使用 lm
我在最终 model_performance 输出中得到 AIC 值时效果很好。
但我最初问过 anova
,这就是为什么我要 运行 这个问题,问题是是否可以在 anova
模型上找到 AIC
现在当我运行这个
models3 <- sapply(frms, function(x) anova(lm(x, data = mrna.pcs)), simplify = FALSE, USE.NAMES = TRUE)
model_coefs = map_df(models3, tidy, .id="Model")
model_performance = map_df(models3, glance, .id="Model")
最终输出中没有 AIC 列输出是这样的
model_performance
# A tibble: 24 × 5
Model nrow ncol complete.obs na.fraction
<chr> <int> <int> <int> <dbl>
1 PC1 ~ Sex 2 5 1 0.2
2 PC2 ~ Sex 2 5 1 0.2
3 PC3 ~ Sex 2 5 1 0.2
4 PC4 ~ Sex 2 5 1 0.2
5 PC5 ~ Sex 2 5 1 0.2
6 PC6 ~ Sex 2 5 1 0.2
7 PC7 ~ Sex 2 5 1 0.2
8 PC8 ~ Sex 2 5 1 0.2
9 PC1 ~ fAge 2 5 1 0.2
10 PC2 ~ fAge 2 5 1 0.2
当我运行这个
data.frame(model=names(models3), AIC=sapply(models3, AIC))
收到错误
Error in UseMethod("logLik") :
no applicable method for 'logLik' applied to an object of class "c('anova', 'data.frame')"
这里猜测一下,因为不清楚您期望的输出结果。
您的 models
对象是方差分析列表,而不是模型列表。如果您想要 AIC(是吗?),您需要创建模型列表。
models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)
data.frame(model=names(models), AIC=sapply(models, AIC))
请注意,包含 Index
作为预测变量的模型会产生垃圾,这不足为奇。
编辑:容纳OP的附加信息:
您可以使用上面模型列表中的 broom
函数生成您寻找的 table:
model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")