如何运行 AIC 对来自不同模型的 anova 输出

Question

这是我使用方差分析的一小部分数据。

dput(c)
structure(list(Mouse.ID = c("DO.1307", "DO.1309", "DO.1311", 
"DO.1312", "DO.1316", "DO.1318", "DO.0661", "DO.0669", "DO.0670", 
"DO.0673", "DO.0674", "DO.0676"), Sex = structure(c(2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("F", "M"), class = "factor"), 
    fAge = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 3L, 
    2L, 2L), .Label = c("6", "12", "18"), class = "factor"), 
    Index = structure(c(14L, 5L, 2L, 1L, 6L, 8L, 21L, 24L, 11L, 
    20L, 12L, 19L), .Label = c("AR001", "AR002", "AR003", "AR004", 
    "AR005", "AR006", "AR007", "AR008", "AR009", "AR010", "AR011", 
    "AR012", "AR013", "AR014", "AR015", "AR016", "AR018", "AR019", 
    "AR020", "AR021", "AR022", "AR023", "AR025", "AR027"), class = "factor"), 
    Lane = structure(c(7L, 8L, 2L, 1L, 1L, 4L, 6L, 2L, 4L, 5L, 
    5L, 4L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8"
    ), class = "factor"), Gen = structure(c(5L, 5L, 5L, 5L, 5L, 
    5L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("8", "9", "10", "11", 
    "12"), class = "factor"), PC1 = c(42.4767703038827, 23.4689101841499, 
    53.38140726592, 94.8512438334955, 65.7816795225369, 44.245669398443, 
    -23.147618298858, -23.004329868562, -17.0024755772689, -23.9178589007844, 
    -56.7766982399411, -34.3969872418573), PC2 = c(45.5944770078485, 
    -1.65198338778925, -17.8737777017898, 15.3618046450436, -15.2567007817601, 
    -61.1178242127913, 40.5243564641241, 2.99206119995141, -61.4176842149059, 
    7.10965422446634, 7.28461966315024, -64.1955797075099), PC3 = c(-1.49628266803638, 
    -1.91335263173293, 5.93465541152824, -16.0302848659995, -10.0886088958855, 
    -0.944109507320403, -17.0598627155672, -22.1038475592448, 
    -6.25238299099893, 23.500307567532, 53.4553992426852, -20.1077749520339
    ), PC4 = c(1.84549247981521, -7.13567033720484, -17.9966832514214, 
    71.4430035621693, -4.19421663100475, 2.68614263191578, -5.37605681469604, 
    28.8757760174757, 1.96723351126677, 10.1757811517044, 7.63553142427313, 
    -0.61083387825962), PC5 = c(-1.5167549402912, -6.684624033326, 
    -7.16355679965928, -15.4471168600897, -10.8771574792765, 
    -10.091229971761, 2.49156058897602, -2.2801673669604, -5.45494631567109, 
    -5.44682692111089, -7.21616736676726, -11.0786655194642), 
    PC6 = c(1.77085439958321, -7.71184868806282, 6.45110081614657, 
    -8.40674843913856, 7.94013760009736, 5.73178334378453, -11.625850369587, 
    1.54093546690149, -4.87370378395642, -22.0735137415442, -2.44337914021456, 
    0.619440592140127), PC7 = c(-3.54879153004361, 10.5229133300871, 
    5.47882712621914, 1.0641123870231, -11.584471111461, 1.02312563446584, 
    7.20873385839409, -17.719801994905, -0.811301497692041, 7.55418040146638, 
    -4.68437054723712, 1.1158744957288), PC8 = c(9.39979923528603, 
    -9.73821309966427, 8.49270466097171, 14.6373631871744, 11.116335443361, 
    -4.27823458955497, -7.19678837565302, 6.24827779166403, 0.224651092284126, 
    6.10960416152842, -14.6615234719377, -0.410198021192528)), row.names = c(NA, 
-12L), class = c("tbl_df", "tbl", "data.frame"))

所有可能组合的运行方差分析

我运行

iv <- c("Sex", "fAge", "Index", "Lane", "Gen")  
dv <- paste0('PC', 1:8)
rhs <- unlist(sapply(1:length(iv), function(m) apply(combn(iv, m = m), 2, paste, collapse = ' + ')))
frms <- with(expand.grid(dv, rhs), paste(Var1, Var2, sep = ' ~ '))

models <- lapply(frms, function(x) anova(lm(x, data = mrna.pcs)))
names(models) <- frms

> class(models)
[1] "list"
> class(models$`PC1 ~ Sex`)
[1] "anova"      "data.frame"

我尝试运行每个型号的 AIC

model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")

但我确定我做错了什么非常感谢任何帮助或建议

我期待的输出

lhs <- c('mpg', 'cyl', 'disp')
rhs <- c('hp', 'drat')

models = list()
for (i in lhs){
  for (j in rhs){
    models[[paste(i, "vs", j)]] <- lm(as.formula(paste(i, "~", j)), data = mtcars)
  }
}

coefs_mat = expand.grid(lhs, rhs)
mods = apply(coefs_mat, 1, function(row) {
  lm(as.formula(paste(row[1], "~", row[2])), data = mtcars)
})
names(mods) = with(coefs_mat, paste(Var1, "vs", Var2))

coefs = lapply(mods, tidy, simplify = F)
# combine
dplyr::bind_rows(coefs, .id = "mod")
summ = lapply(mods, glance, simplify = F)
dplyr::bind_rows(summ, .id = "mod")

这是我期待的输出，输出显示 table 中的 AIC 加上其他数据

mod          r.squared adj.r.squared sigma statistic       p.value    df logLik   AIC   BIC deviance df.residual  nobs
  <chr>            <dbl>         <dbl> <dbl>     <dbl>         <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>       <int> <int>
1 mpg vs hp        0.602         0.589  3.86      45.5 0.000000179       1  -87.6 181.  186.     448.           30    32
2 cyl vs hp        0.693         0.683  1.01      67.7 0.00000000348     1  -44.6  95.1  99.5     30.4          30    32
3 disp vs hp       0.626         0.613 77.1       50.1 0.0000000714      1 -183.  373.  377.  178284.           30    32
4 mpg vs drat      0.464         0.446  4.49      26.0 0.0000178         1  -92.4 191.  195.     604.           30    32
5 cyl vs drat      0.490         0.473  1.30      28.8 0.00000824        1  -52.7 111.  116.      50.4          30    32
6 disp vs drat     0.504         0.488 88.7       30.5 0.00000528        1 -188.  382.  386.  235995.           30    32


UPDATE

问题解决如下代码

models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)

在上面的代码中使用了 lm 并检查了 model_coef 和性能

model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")

当我使用 lm 我在最终 model_performance 输出中得到 AIC 值时效果很好。

但我最初问过 anova，这就是为什么我要运行这个问题，问题是是否可以在 anova 模型上找到 AIC

现在当我运行这个

models3 <- sapply(frms, function(x) anova(lm(x, data = mrna.pcs)), simplify = FALSE, USE.NAMES = TRUE)

model_coefs = map_df(models3, tidy, .id="Model")
model_performance = map_df(models3, glance, .id="Model")

最终输出中没有 AIC 列输出是这样的

model_performance
# A tibble: 24 × 5
   Model       nrow  ncol complete.obs na.fraction
   <chr>      <int> <int>        <int>       <dbl>
 1 PC1 ~ Sex      2     5            1         0.2
 2 PC2 ~ Sex      2     5            1         0.2
 3 PC3 ~ Sex      2     5            1         0.2
 4 PC4 ~ Sex      2     5            1         0.2
 5 PC5 ~ Sex      2     5            1         0.2
 6 PC6 ~ Sex      2     5            1         0.2
 7 PC7 ~ Sex      2     5            1         0.2
 8 PC8 ~ Sex      2     5            1         0.2
 9 PC1 ~ fAge     2     5            1         0.2
10 PC2 ~ fAge     2     5            1         0.2

当我运行这个

 data.frame(model=names(models3), AIC=sapply(models3, AIC))

收到错误

Error in UseMethod("logLik") : 
  no applicable method for 'logLik' applied to an object of class "c('anova', 'data.frame')"

Answer 1

这里猜测一下，因为不清楚您期望的输出结果。

您的 models 对象是方差分析列表，而不是模型列表。如果您想要 AIC（是吗？），您需要创建模型列表。

models <- sapply(frms, function(x) lm(x, data = mrna.pcs), simplify = FALSE, USE.NAMES = TRUE)
data.frame(model=names(models), AIC=sapply(models, AIC))

请注意，包含 Index 作为预测变量的模型会产生垃圾，这不足为奇。

编辑：容纳OP的附加信息：

您可以使用上面模型列表中的 broom 函数生成您寻找的 table：

model_coefs = map_df(models, tidy, .id="Model")
model_performance = map_df(models, glance, .id="Model")

如何 运行 AIC 对来自不同模型的 anova 输出

How to run AIC on anova output from different models

r

anova

lm

如何运行 AIC 对来自不同模型的 anova 输出