在 R 中解释嵌套混合效应模型输出
Interpreting nested mixed effects model output in R
我在解释嵌套混合效应模型中的基线系数时遇到问题。我安装了一个模型 Test.Score ~ Subject + (1|School/Class) 因为 class 嵌套在学校内。当我使用 coef(model) 查看系数时,它们似乎违反直觉:
$`Class:School`
(Intercept) SubjectMaths
1:A 82.73262 -4.108333
1:B 83.98870 -4.108333
1:C 82.26456 -4.108333
2:A 82.25383 -4.108333
2:B 78.22047 -4.108333
2:C 80.18982 -4.108333
$School
(Intercept) SubjectMaths
A 88.39636 -4.108333
B 77.74404 -4.108333
C 78.68460 -4.108333
attr(,"class")
[1] "coef.mer"
学校里的class怎么能给出比学校里低很多的值呢?下面要复制的数据:
Test.Score = c(94,88,86,90,94,87,87,92,89,92,87,94,93,91,89,92,91,
91,95,91,82,84,90,81,92,89,85,94,88,94,94,94,86,94,93,84,82,
92,92,83,89,83,81,87,84,80,81,83,88,82,81,90,82,85,87,82,86,
84,87,88,82,91,95,77,88,87,79,75,91,77,82,91,95,92,89,83,79,
90,83,83,82,79,79,78,83,82,81,77,80,79,84,83,81,78,77,75,76,
76,84,75,78,78,71,79,70,75,75,78,76,71,76,76,73,71,80,70,71,
78,71,74,76,74,74,77,81,78,79,76,82,79,80,73,72,83,72,81,81,
72,79,74,67,75,71,66,65,71,73,69,65,67,71,72,68,73,65,65,74,
67,72,72,82,70,72,86,89,87,87,88,74,92,70,89,86,63,68,74,88,
71,88,91,76,86,75,79,76,69,86,71,78,67,67,73,69,81,79,78,80,
72,81,69,72,75,76,68,72,78,78,77,71,73,70,77,75,75,69,77,74,
76,68,78,76,75,68,74,69,78,76,70,79,78,67,65,86,88,65,88,73,
66,65,85)
School = rep(c("A","B","C"), each = 80)
Class = rep(c("1","2"), each = 20,6)
Subject = rep(c("English","Maths"), each = 40, 3)
data = data.frame(Test.Score, School, Class, Subject)
data$Class = factor(data$Class)
mod = lmer(Test.Score ~ Subject + (1|School/Class), REML = F,
data = data)
coef(mod)
问题是为每个随机效应列出的系数 仅 该特定随机效应的效应。特别是,2 级 School:Class
系数仅反映学校内 Class 与总体人口平均值的偏差 - 而不是 学校层面的影响出色地。这可能看起来很奇怪或错误,但是 (1) 你可以用 predict()
得到你想要的东西(见下文)和 (2) lme4
并没有真正的 "nesting",因此通常很难确定给定的一组系数中应包含哪些随机效应(这是一种解释,而不是借口)。
就其价值而言,coef()
对于装有 nlme::lme
...
的型号确实可以正常工作
library(lme4)
## using sum-to-zero contrasts for convenience
mod = lmer(Test.Score ~ Subject + (1|School/Class), REML = FALSE,
data = data, contrasts=list(Subject=contr.sum))
pframe <- with(data,expand.grid(School=levels(School),
Subject=levels(Subject),
Class=levels(Class)))
pframe$Test.Score <- predict(mod,newdata=pframe)
如果你想要平均 Class 值,你需要平均英语和数学分数...
nlme::lme
中的同一型号:
mod2 = nlme::lme(Test.Score ~ Subject, random = ~ 1|School/Class, method="ML",
data = data, contrasts=list(Subject=contr.sum))
coef(mod2) ## Class within School
coef(mod2,level=1) ## School-level
使用一些单调乏味的工具(和 tidyverse 工具 - 这也可以通过其他方式完成),重新排列绘图系数:
rr2 <- tibble::rownames_to_column(coef(mod)[["Class:School"]])
rr2 <- dplyr::rename(rr2,Test.Score=`(Intercept)`)
rr2 <- tidyr::separate(rowname,data=rr2,into=c("Class","School"))
rr2$Subject <- NA
rr3 <- tibble::rownames_to_column(coef(mod)[["School"]])
rr3 <- dplyr::rename(rr3,Test.Score=`(Intercept)`,School=rowname)
rr3$Subject <- NA
rr3$Class <- 1.5
一起绘制所有内容(数据、预测、系数):
library(ggplot2); theme_set(theme_bw())
ggplot(data,aes(Class,Test.Score,colour=Subject))+
geom_boxplot()+
geom_point(data=pframe,size=3,shape=16,position=position_dodge(width=0.75))+
facet_wrap(~School,labeller=label_both)+
geom_point(data=rr2,size=3,shape=17)+
geom_hline(yintercept=fixef(mod)["(Intercept)"],lty=2)+
geom_point(data=rr3,size=5,shape=18)+
theme(panel.spacing=grid::unit(0,"lines")) ## cosmetic
彩色点是预测;灰色点是系数(三角形 = class-级别;菱形 = 学校级别)。
我在解释嵌套混合效应模型中的基线系数时遇到问题。我安装了一个模型 Test.Score ~ Subject + (1|School/Class) 因为 class 嵌套在学校内。当我使用 coef(model) 查看系数时,它们似乎违反直觉:
$`Class:School`
(Intercept) SubjectMaths
1:A 82.73262 -4.108333
1:B 83.98870 -4.108333
1:C 82.26456 -4.108333
2:A 82.25383 -4.108333
2:B 78.22047 -4.108333
2:C 80.18982 -4.108333
$School
(Intercept) SubjectMaths
A 88.39636 -4.108333
B 77.74404 -4.108333
C 78.68460 -4.108333
attr(,"class")
[1] "coef.mer"
学校里的class怎么能给出比学校里低很多的值呢?下面要复制的数据:
Test.Score = c(94,88,86,90,94,87,87,92,89,92,87,94,93,91,89,92,91,
91,95,91,82,84,90,81,92,89,85,94,88,94,94,94,86,94,93,84,82,
92,92,83,89,83,81,87,84,80,81,83,88,82,81,90,82,85,87,82,86,
84,87,88,82,91,95,77,88,87,79,75,91,77,82,91,95,92,89,83,79,
90,83,83,82,79,79,78,83,82,81,77,80,79,84,83,81,78,77,75,76,
76,84,75,78,78,71,79,70,75,75,78,76,71,76,76,73,71,80,70,71,
78,71,74,76,74,74,77,81,78,79,76,82,79,80,73,72,83,72,81,81,
72,79,74,67,75,71,66,65,71,73,69,65,67,71,72,68,73,65,65,74,
67,72,72,82,70,72,86,89,87,87,88,74,92,70,89,86,63,68,74,88,
71,88,91,76,86,75,79,76,69,86,71,78,67,67,73,69,81,79,78,80,
72,81,69,72,75,76,68,72,78,78,77,71,73,70,77,75,75,69,77,74,
76,68,78,76,75,68,74,69,78,76,70,79,78,67,65,86,88,65,88,73,
66,65,85)
School = rep(c("A","B","C"), each = 80)
Class = rep(c("1","2"), each = 20,6)
Subject = rep(c("English","Maths"), each = 40, 3)
data = data.frame(Test.Score, School, Class, Subject)
data$Class = factor(data$Class)
mod = lmer(Test.Score ~ Subject + (1|School/Class), REML = F,
data = data)
coef(mod)
问题是为每个随机效应列出的系数 仅 该特定随机效应的效应。特别是,2 级 School:Class
系数仅反映学校内 Class 与总体人口平均值的偏差 - 而不是 学校层面的影响出色地。这可能看起来很奇怪或错误,但是 (1) 你可以用 predict()
得到你想要的东西(见下文)和 (2) lme4
并没有真正的 "nesting",因此通常很难确定给定的一组系数中应包含哪些随机效应(这是一种解释,而不是借口)。
就其价值而言,coef()
对于装有 nlme::lme
...
library(lme4)
## using sum-to-zero contrasts for convenience
mod = lmer(Test.Score ~ Subject + (1|School/Class), REML = FALSE,
data = data, contrasts=list(Subject=contr.sum))
pframe <- with(data,expand.grid(School=levels(School),
Subject=levels(Subject),
Class=levels(Class)))
pframe$Test.Score <- predict(mod,newdata=pframe)
如果你想要平均 Class 值,你需要平均英语和数学分数...
nlme::lme
中的同一型号:
mod2 = nlme::lme(Test.Score ~ Subject, random = ~ 1|School/Class, method="ML",
data = data, contrasts=list(Subject=contr.sum))
coef(mod2) ## Class within School
coef(mod2,level=1) ## School-level
使用一些单调乏味的工具(和 tidyverse 工具 - 这也可以通过其他方式完成),重新排列绘图系数:
rr2 <- tibble::rownames_to_column(coef(mod)[["Class:School"]])
rr2 <- dplyr::rename(rr2,Test.Score=`(Intercept)`)
rr2 <- tidyr::separate(rowname,data=rr2,into=c("Class","School"))
rr2$Subject <- NA
rr3 <- tibble::rownames_to_column(coef(mod)[["School"]])
rr3 <- dplyr::rename(rr3,Test.Score=`(Intercept)`,School=rowname)
rr3$Subject <- NA
rr3$Class <- 1.5
一起绘制所有内容(数据、预测、系数):
library(ggplot2); theme_set(theme_bw())
ggplot(data,aes(Class,Test.Score,colour=Subject))+
geom_boxplot()+
geom_point(data=pframe,size=3,shape=16,position=position_dodge(width=0.75))+
facet_wrap(~School,labeller=label_both)+
geom_point(data=rr2,size=3,shape=17)+
geom_hline(yintercept=fixef(mod)["(Intercept)"],lty=2)+
geom_point(data=rr3,size=5,shape=18)+
theme(panel.spacing=grid::unit(0,"lines")) ## cosmetic
彩色点是预测;灰色点是系数(三角形 = class-级别;菱形 = 学校级别)。