在 R 中使用 GAM 进行纵向建模
Longitudinal modeling with GAM in R
我有两个时间点的大脑数据,分别编码为 1,2
。我想用mgcv::gam
来测量time 1
和time 2
之间每个大脑区域(Y
)的变化。每个主题都有一个 ID
变量,我也想将其纳入模型。
我正在努力编写正确的模型和绘图
#model
mygam <- mgcv::gam(Y ~ (Timepoint) + Sex + s(Age) + Years_Education + s(ID, by = Timepoint),
method = "REML", data = DF_w)
#plot
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])
一些示例数据:
structure(list(ID = c(33714L, 35377L, 38623L, 38806L, 39593L,
39820L, 39951L, 40286L, 40556L, 40798L, 40800L, 40815L, 43762L,
50848L, 52183L, 52461L, 52577L, 53202L, 53320L, 53873L, 54153L,
54206L, 54581L, 55122L, 55267L, 55462L, 55612L, 55920L, 56022L,
56307L, 56420L, 56679L, 57405L, 57445L, 57480L, 57725L, 57809L,
58004L, 58215L, 58229L, 58503L, 59326L, 59327L, 59344L, 59361L,
59865L, 60099L, 60100L, 60280L, 60384L, 60429L, 60493L, 60503L,
60575L, 60603L, 60664L, 60846L, 61415L, 61656L, 61749L, 61883L,
62081L, 62210L, 62285L, 62937L, 62983L, 63327L, 63329L, 64081L,
64328L, 64418L, 64507L, 64596L, 65178L, 65250L, 65302L, 65478L,
65480L, 65487L, 65572L, 65802L, 65935L, 65974L, 65975L, 65978L,
65991L, 65995L, 66013L, 66154L, 66237L, 66245L, 66389L, 66396L,
66460L, 66572L, 66589L, 67174L, 73230L, 73525L, 73539L, 73677L,
73942L, 73953L, 74034L, 74113L, 74114L, 74427L, 74439L, 74607L,
74641L, 74657L, 74794L, 74800L, 74836L, 74942L, 74952L, 74962L,
74969L, 74977L, 74985L, 74989L, 75220L, 75229L, 75407L, 75653L,
75732L, 75735L, 75757L, 75895L, 75898L, 76381L, 76559L, 76574L,
76594L, 76595L, 76746L, 76751L, 76755L, 76759L, 76775L, 77088L,
77091L, 77099L, 77134L, 77188L, 77203L, 77252L, 77304L, 77413L,
77453L, 77528L, 77556L, 77585L, 77668L, 78262L, 79724L, 79730L,
79850L, 79977L, 80052L, 80819L, 80901L, 80932L, 81064L, 81065L,
81071L, 81098L, 81142L, 81175L, 81727L, 33714L, 35377L, 38623L,
38806L, 39593L, 39820L, 39951L, 40286L, 40556L, 40798L, 40800L,
40815L, 43762L, 50848L, 52183L, 52461L, 52577L, 53202L, 53320L,
53873L, 54153L, 54206L, 54581L, 55122L, 55267L, 55462L, 55612L,
55920L, 56022L, 56307L, 56420L, 56679L, 57405L, 57445L, 57480L,
57725L, 57809L, 58004L, 58215L, 58229L, 58503L, 59326L, 59327L,
59344L, 59361L, 59865L, 60099L, 60100L, 60280L, 60384L, 60429L,
60493L, 60503L, 60575L, 60603L, 60664L, 60846L, 61415L, 61656L,
61749L, 61883L, 62081L, 62210L, 62285L, 62937L, 62983L, 63327L,
63329L, 64081L, 64328L, 64418L, 64507L, 64596L, 65178L, 65250L,
65302L, 65478L, 65480L, 65487L, 65572L, 65802L, 65935L, 65974L,
65975L, 65978L, 65991L, 65995L, 66013L, 66154L, 66237L, 66245L,
66389L, 66396L, 66460L, 66572L, 66589L, 67174L, 73230L, 73525L,
73539L, 73677L, 73942L, 73953L, 74034L, 74113L, 74114L, 74427L,
74439L, 74607L, 74641L, 74657L, 74794L, 74800L, 74836L, 74942L,
74952L, 74962L, 74969L, 74977L, 74985L, 74989L, 75220L, 75229L,
75407L, 75653L, 75732L, 75735L, 75757L, 75895L, 75898L, 76381L,
76559L, 76574L, 76594L, 76595L, 76746L, 76751L, 76755L, 76759L,
76775L, 77088L, 77091L, 77099L, 77134L, 77188L, 77203L, 77252L,
77304L, 77413L, 77453L, 77528L, 77556L, 77585L, 77668L, 78262L,
79724L, 79730L, 79850L, 79977L, 80052L, 80819L, 80901L, 80932L,
81064L, 81065L, 81071L, 81098L, 81142L, 81175L, 81727L), Age = c(15L,
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L,
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L,
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L,
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L,
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L,
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L,
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L,
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L,
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L,
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L,
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L,
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L,
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L, 15L,
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L,
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L,
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L,
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L,
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L,
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L,
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L,
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L,
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L,
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L,
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L,
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L,
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L), Sex = c("Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Female",
"Male", "Male", "Male", "Male", "Female", "Male", "Male", "Male",
"Female", "Female", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Female", "Female", "Female", "Male", "Female", "Female", "Male",
"Female", "Male", "Female", "Male", "Female", "Male", "Male",
"Female", "Male", "Male", "Male", "Female", "Female", "Female",
"Male", "Female", "Male", "Female", "Female", "Male", "Male",
"Female", "Male", "Male", "Female", "Female", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Male",
"Male", "Male", "Female", "Female", "Female", "Female", "Female",
"Male", "Female", "Female", "Male", "Male", "Female", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Male",
"Female", "Female", "Male", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Female", "Male", "Female",
"Male", "Female", "Female", "Female", "Female", "Female", "Male",
"Male", "Female", "Female", "Male", "Female", "Male", "Female",
"Female", "Male", "Female", "Female", "Female", "Male", "Female",
"Male", "Male", "Male", "Female", "Female", "Male", "Male", "Male",
"Female", "Female", "Male", "Female", "Female", "Male", "Female",
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female",
"Male", "Male", "Female", "Male", "Male", "Male", "Male", "Male",
"Male", "Female", "Female", "Male", "Female", "Female", "Female",
"Female", "Female", "Male", "Female", "Male", "Male", "Male",
"Male", "Female", "Male", "Male", "Male", "Female", "Female",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Male", "Female", "Male", "Female", "Female",
"Female", "Male", "Female", "Female", "Male", "Female", "Male",
"Female", "Male", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Female",
"Male", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Female", "Female", "Male", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Female",
"Female", "Male", "Male", "Female", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Female",
"Male", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Male", "Female", "Male", "Female", "Male", "Female",
"Female", "Female", "Female", "Female", "Male", "Male", "Female",
"Female", "Male", "Female", "Male", "Female", "Female", "Male",
"Female", "Female", "Female", "Male", "Female", "Male", "Male",
"Male", "Female", "Female", "Male", "Male", "Male", "Female",
"Female", "Male", "Female", "Female", "Male", "Female", "Female",
"Male", "Female", "Male", "Male", "Male", "Male", "Female", "Male",
"Male", "Female", "Male", "Male", "Male", "Male", "Male", "Male",
"Female", "Female"), Years_Education = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA, NA,
16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA, 17L,
14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L, NA,
NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA, 16L,
NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L, 18L,
14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA, 16L,
NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA, 21L,
18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L, 18L,
16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA,
NA, 16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA,
17L, 14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L,
NA, NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA,
16L, NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L,
18L, 14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA,
16L, NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA,
21L, 18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L,
18L, 16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA), Timepoint = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), Y = c(0.316483, 0.3510685, 0.267147, 0.254661, 0.3249415,
0.2461125, 0.3238965, 0.375393, 0.2516335, 0.3038465, 0.3206155,
0.2868495, 0.3346625, 0.297433, 0.2869195, 0.3351975, 0.315691,
0.2660795, 0.3195265, 0.3481915, 0.2785565, 0.3471645, 0.315884,
0.251254, 0.3061215, 0.334314, 0.2758345, 0.3539715, 0.307193,
0.224277, 0.302592, 0.327028, 0.2510545, 0.307193, 0.361899,
0.231769, 0.350601, 0.337318, 0.302148, 0.321019, 0.335075, 0.3318655,
0.3464885, 0.348252, 0.3281445, 0.3341985, 0.3437265, 0.28315,
0.3351165, 0.344683, 0.328796, 0.2996415, 0.329305, 0.294367,
0.3512895, 0.3617735, 0.320697, 0.307166, 0.31381, 0.299173,
0.332073, 0.3212095, 0.2990235, 0.323863, 0.3450765, 0.2904305,
0.3595685, 0.3391065, 0.3245175, 0.3418675, 0.2913055, 0.298335,
0.3394605, 0.344435, 0.284265, 0.3228975, 0.3116815, 0.3157865,
0.31368, 0.286124, 0.3255705, 0.343879, 0.3436655, 0.2713715,
0.3381005, 0.3385825, 0.3190355, 0.3597505, 0.3271475, 0.3115485,
0.3276145, 0.3437875, 0.3158815, 0.3218285, 0.326796, 0.3061665,
0.3555745, 0.3250755, 0.321343, 0.337907, 0.3159475, 0.301024,
0.3316655, 0.3446295, 0.3135155, 0.3220135, 0.3710685, 0.2866515,
0.300246, 0.3141975, 0.332256, 0.3296115, 0.321672, 0.287862,
0.354463, 0.321344, 0.304759, 0.337911, 0.3248345, 0.3188435,
0.3068095, 0.3741625, 0.336544, 0.351038, 0.343052, 0.3070755,
0.3310035, 0.3137875, 0.328222, 0.3308255, 0.309383, 0.3096755,
0.299197, 0.3684665, 0.3453295, 0.3565655, 0.3318945, 0.3180955,
0.356223, 0.331051, 0.2844205, 0.316385, 0.347013, 0.326344,
0.3122595, 0.318758, 0.340933, 0.337239, 0.320081, 0.3142475,
0.34704, 0.2590365, 0.31095, 0.317086, 0.342804, 0.271582, 0.2907645,
0.3033985, 0.3154525, 0.3431455, 0.2930245, 0.321643, 0.3315875,
0.328915, 0.317671, 0.2761495, 0.316245, 0.2994025, 0.318245,
0.321339, 0.2885835, 0.4122365, 0.360488, 0.38055, 0.303596,
0.4333405, 0.3850125, 0.3844505, 0.3769385, 0.355161, 0.3364065,
0.418439, 0.4192125, 0.42794, 0.4094405, 0.4032555, 0.3959455,
0.3589195, 0.324668, 0.385214, 0.4323475, 0.3646705, 0.4504675,
0.359749, 0.372037, 0.3742195, 0.2957245, 0.3762205, 0.400279,
0.361942, 0.379597, 0.3540285, 0.3292105, 0.4370465, 0.404536,
0.355939, 0.3330285, 0.3895195, 0.413656, 0.386604, 0.384212,
0.305916, 0.355114, 0.3440965, 0.3500305, 0.388537, 0.346431,
0.3394215, 0.3871285, 0.3946955, 0.409459, 0.454724, 0.422755,
0.399704, 0.3929715, 0.3473955, 0.359285, 0.4414455, 0.590826,
0.427441, 0.387022, 0.3764, 0.3959585, 0.3014585, 0.487619, 0.454884,
0.4070005, 0.4467275, 0.465484, 0.403643, 0.406269, 0.391375,
0.403408, 0.409922, 0.3901505, 0.3131075, 0.405408, 0.4024165,
0.4020865, 0.4253735, 0.433137, 0.4444525, 0.398744, 0.3378935,
0.29192, 0.382819, 0.4414085, 0.484809, 0.432799, 0.3276835,
0.4749325, 0.4284325, 0.320546, 0.4325895, 0.3743695, 0.395499,
0.427603, 0.500991, 0.3820045, 0.3838265, 0.3551735, 0.5114045,
0.433393, 0.378873, 0.435692, 0.4510495, 0.413954, 0.333738,
0.3538425, 0.5222565, 0.5051025, 0.4181785, 0.4526625, 0.346886,
0.453598, 0.4338855, 0.3979005, 0.378339, 0.4218445, 0.459127,
0.376291, 0.457066, 0.419093, 0.324645, 0.4700745, 0.3207305,
0.4078725, 0.466963, 0.425291, 0.480983, 0.365733, 0.52236, 0.3796195,
0.5241235, 0.385068, 0.4372355, 0.4265785, 0.373038, 0.4076305,
0.313645, 0.417434, 0.4494865, 0.4868035, 0.366238, 0.364009,
0.313894, 0.371546, 0.385691, 0.3840595, 0.3589585, 0.4525265,
0.4209155, 0.39987, 0.3591295, 0.4241465, 0.5168695, 0.2102594,
0.405142, 0.391291, 0.386246, 0.3784075, 0.4903315, 0.334801,
0.3767035, 0.4459985, 0.3702205, 0.4718795, 0.34822, 0.3262135,
0.318815)), class = "data.frame", row.names = c(NA, -340L))
您的设置存在几个问题。 by
术语指定不正确,在您的数据中 id
不是一个因素,它需要是一个因素,并且参数是相反的。您的模型在 id
上平滑了 timepoint
。在下面的示例中,它是固定的,您可以看到 mgcv
无法随着时间的推移适应 id
特定平滑,因为没有足够的数据。
library('mgcv')
library('gratia')
library('ggplot2')
# df <- your data
names(df) <- tolower(names(df))
summary(df)
# id age sex years_education timepoint y
# 33714 : 2 Min. : 6.00 Length:340 Min. :11.00 Min. :1.0 Min. :0.2103
# 35377 : 2 1st Qu.:13.00 Class :character 1st Qu.:14.00 1st Qu.:1.0 1st Qu.:0.3188
# 38623 : 2 Median :43.00 Mode :character Median :16.00 Median :1.5 Median :0.3449
# 38806 : 2 Mean :36.38 Mean :15.79 Mean :1.5 Mean :0.3583
# 39593 : 2 3rd Qu.:57.00 3rd Qu.:18.00 3rd Qu.:2.0 3rd Qu.:0.3949
# 39820 : 2 Max. :71.00 Max. :21.00 Max. :2.0 Max. :0.5908
# (Other):328 NA's :180
apply(df, 2, function(x) sum(is.na(x)))
# missing 180 cases of `years_education`
mean(df[df$sex == "Male",]$y)
#[1] 0.3675737
mean(df[df$sex == "Female",]$y)
#[1] 0.3712672
## your model without education
## this smooths over ids which are "integer" and rather should be factor
mygam1 <- mgcv::gam(y ~ (timepoint) + sex + s(age) + s(id, by = timepoint),
method = "REML", data = df)
summary(mygam1)
# Family: gaussian
# Link function: identity
#
# Formula:
# y ~ (timepoint) + sex + s(age) + s(id, by = timepoint)
#
# Parametric coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.239312 0.007405 32.318 < 2e-16 ***
# timepoint 0.094749 0.019223 4.929 1.31e-06 ***
# sexMale 0.001933 0.004719 0.410 0.682
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Approximate significance of smooth terms:
# edf Ref.df F p-value
# s(age) 1.001 1.002 9.660 0.00203 **
# s(id):timepoint 3.423 4.257 3.609 0.00828 **
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Rank: 21/22
# R-sq.(adj) = 0.499 Deviance explained = 50.8%
# -REML = -573.51 Scale est. = 0.001709 n = 340
# this plot doesnt make sense
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])
# an alterantive which proposes what you desired in mygam1, can't be done, you dont have enough timepoints
df$id <- as.factor(df$id)
mygam2 <- mgcv::gam(y ~ (timepoint) + sex + s(age) + s(timepoint, by = id),
method = "REML", data = df)
# Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
# A term has fewer unique covariate combinations than specified maximum degrees of freedom
鉴于目标,您可能只需要 id
的随机效应,然后您可以根据这些效应在时间点上测试均值差异。这是在下面用 s(var, bs = "re") 术语实现的。我还将可能性更改为 beta,它正确支持 y
。为了绘制估计函数,我鼓励您查看 gratia
包,这使得它更容易并且非常可定制。我在下面的示例中使用 draw
(您可能还想使用 estimate_smooth
)。
mygam_alt <- mgcv::gam(y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re"),
method = "REML", data = df, family = betar)
summary(mygam_alt)
# Family: Beta regression(129.815)
# Link function: logit
#
# Formula:
# y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re")
#
# Parametric coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.842034 0.055622 -15.138 <2e-16 ***
# as.factor(timepoint)2 0.338984 0.019852 17.076 <2e-16 ***
# sexMale -0.004194 0.020406 -0.206 0.837
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Approximate significance of smooth terms:
# edf Ref.df Chi.sq p-value
# s(age) 1.001 1.001 10.048 0.00153 **
# s(id) 0.734 1.000 2.762 0.05242 .
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# R-sq.(adj) = 0.483 Deviance explained = 48.3%
# -REML = -585.63 Scale est. = 1 n = 340
gam.check(mygam_alt)
# Method: REML Optimizer: outer newton
# full convergence after 8 iterations.
# Gradient range [-0.0002248251,0.0001459622]
# (score -585.6282 & scale 1).
# Hessian positive definite, eigenvalue range [0.0002246851,168.796].
# Model rank = 13 / 13
#
# Basis dimension (k) checking results. Low p-value (k-index<1) may
# indicate that k is too low, especially if edf is close to k'.
# *** You can see here, it might not be worth including these smooths either ***
# k' edf k-index p-value
# s(age) 9.000 1.001 0.95 0.15
# s(id) 1.000 0.734 0.99 0.42
x <- draw(mygam_alt)
x
为了完整性,您也没有足够的观测值来平滑因子(这适用于 k
的任何选择,然后是基函数的数量。
mygam_alt_fs <- mgcv::gam(y ~ timepoint + sex + s(age, bs = "tp") + s(timepoint,id, bs = "fs"),
method = "REML", data = df, family = betar)
# Error in smooth.construct.tp.smooth.spec(object, data, knots) :
# A term has fewer unique covariate combinations than specified maximum degrees of freedom
我有两个时间点的大脑数据,分别编码为 1,2
。我想用mgcv::gam
来测量time 1
和time 2
之间每个大脑区域(Y
)的变化。每个主题都有一个 ID
变量,我也想将其纳入模型。
我正在努力编写正确的模型和绘图
#model
mygam <- mgcv::gam(Y ~ (Timepoint) + Sex + s(Age) + Years_Education + s(ID, by = Timepoint),
method = "REML", data = DF_w)
#plot
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])
一些示例数据:
structure(list(ID = c(33714L, 35377L, 38623L, 38806L, 39593L,
39820L, 39951L, 40286L, 40556L, 40798L, 40800L, 40815L, 43762L,
50848L, 52183L, 52461L, 52577L, 53202L, 53320L, 53873L, 54153L,
54206L, 54581L, 55122L, 55267L, 55462L, 55612L, 55920L, 56022L,
56307L, 56420L, 56679L, 57405L, 57445L, 57480L, 57725L, 57809L,
58004L, 58215L, 58229L, 58503L, 59326L, 59327L, 59344L, 59361L,
59865L, 60099L, 60100L, 60280L, 60384L, 60429L, 60493L, 60503L,
60575L, 60603L, 60664L, 60846L, 61415L, 61656L, 61749L, 61883L,
62081L, 62210L, 62285L, 62937L, 62983L, 63327L, 63329L, 64081L,
64328L, 64418L, 64507L, 64596L, 65178L, 65250L, 65302L, 65478L,
65480L, 65487L, 65572L, 65802L, 65935L, 65974L, 65975L, 65978L,
65991L, 65995L, 66013L, 66154L, 66237L, 66245L, 66389L, 66396L,
66460L, 66572L, 66589L, 67174L, 73230L, 73525L, 73539L, 73677L,
73942L, 73953L, 74034L, 74113L, 74114L, 74427L, 74439L, 74607L,
74641L, 74657L, 74794L, 74800L, 74836L, 74942L, 74952L, 74962L,
74969L, 74977L, 74985L, 74989L, 75220L, 75229L, 75407L, 75653L,
75732L, 75735L, 75757L, 75895L, 75898L, 76381L, 76559L, 76574L,
76594L, 76595L, 76746L, 76751L, 76755L, 76759L, 76775L, 77088L,
77091L, 77099L, 77134L, 77188L, 77203L, 77252L, 77304L, 77413L,
77453L, 77528L, 77556L, 77585L, 77668L, 78262L, 79724L, 79730L,
79850L, 79977L, 80052L, 80819L, 80901L, 80932L, 81064L, 81065L,
81071L, 81098L, 81142L, 81175L, 81727L, 33714L, 35377L, 38623L,
38806L, 39593L, 39820L, 39951L, 40286L, 40556L, 40798L, 40800L,
40815L, 43762L, 50848L, 52183L, 52461L, 52577L, 53202L, 53320L,
53873L, 54153L, 54206L, 54581L, 55122L, 55267L, 55462L, 55612L,
55920L, 56022L, 56307L, 56420L, 56679L, 57405L, 57445L, 57480L,
57725L, 57809L, 58004L, 58215L, 58229L, 58503L, 59326L, 59327L,
59344L, 59361L, 59865L, 60099L, 60100L, 60280L, 60384L, 60429L,
60493L, 60503L, 60575L, 60603L, 60664L, 60846L, 61415L, 61656L,
61749L, 61883L, 62081L, 62210L, 62285L, 62937L, 62983L, 63327L,
63329L, 64081L, 64328L, 64418L, 64507L, 64596L, 65178L, 65250L,
65302L, 65478L, 65480L, 65487L, 65572L, 65802L, 65935L, 65974L,
65975L, 65978L, 65991L, 65995L, 66013L, 66154L, 66237L, 66245L,
66389L, 66396L, 66460L, 66572L, 66589L, 67174L, 73230L, 73525L,
73539L, 73677L, 73942L, 73953L, 74034L, 74113L, 74114L, 74427L,
74439L, 74607L, 74641L, 74657L, 74794L, 74800L, 74836L, 74942L,
74952L, 74962L, 74969L, 74977L, 74985L, 74989L, 75220L, 75229L,
75407L, 75653L, 75732L, 75735L, 75757L, 75895L, 75898L, 76381L,
76559L, 76574L, 76594L, 76595L, 76746L, 76751L, 76755L, 76759L,
76775L, 77088L, 77091L, 77099L, 77134L, 77188L, 77203L, 77252L,
77304L, 77413L, 77453L, 77528L, 77556L, 77585L, 77668L, 78262L,
79724L, 79730L, 79850L, 79977L, 80052L, 80819L, 80901L, 80932L,
81064L, 81065L, 81071L, 81098L, 81142L, 81175L, 81727L), Age = c(15L,
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L,
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L,
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L,
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L,
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L,
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L,
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L,
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L,
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L,
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L,
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L,
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L,
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L, 15L,
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L,
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L,
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L,
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L,
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L,
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L,
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L,
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L,
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L,
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L,
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L,
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L,
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L), Sex = c("Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Female",
"Male", "Male", "Male", "Male", "Female", "Male", "Male", "Male",
"Female", "Female", "Male", "Male", "Male", "Male", "Male", "Male",
"Male", "Male", "Female", "Female", "Male", "Female", "Male",
"Female", "Female", "Female", "Male", "Female", "Female", "Male",
"Female", "Male", "Female", "Male", "Female", "Male", "Male",
"Female", "Male", "Male", "Male", "Female", "Female", "Female",
"Male", "Female", "Male", "Female", "Female", "Male", "Male",
"Female", "Male", "Male", "Female", "Female", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Male",
"Male", "Male", "Female", "Female", "Female", "Female", "Female",
"Male", "Female", "Female", "Male", "Male", "Female", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Male",
"Female", "Female", "Male", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Male", "Female", "Male", "Female",
"Male", "Female", "Female", "Female", "Female", "Female", "Male",
"Male", "Female", "Female", "Male", "Female", "Male", "Female",
"Female", "Male", "Female", "Female", "Female", "Male", "Female",
"Male", "Male", "Male", "Female", "Female", "Male", "Male", "Male",
"Female", "Female", "Male", "Female", "Female", "Male", "Female",
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female",
"Male", "Male", "Female", "Male", "Male", "Male", "Male", "Male",
"Male", "Female", "Female", "Male", "Female", "Female", "Female",
"Female", "Female", "Male", "Female", "Male", "Male", "Male",
"Male", "Female", "Male", "Male", "Male", "Female", "Female",
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male",
"Female", "Female", "Male", "Female", "Male", "Female", "Female",
"Female", "Male", "Female", "Female", "Male", "Female", "Male",
"Female", "Male", "Female", "Male", "Male", "Female", "Male",
"Male", "Male", "Female", "Female", "Female", "Male", "Female",
"Male", "Female", "Female", "Male", "Male", "Female", "Male",
"Male", "Female", "Female", "Male", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Male", "Male",
"Female", "Female", "Female", "Female", "Female", "Male", "Female",
"Female", "Male", "Male", "Female", "Male", "Female", "Female",
"Female", "Female", "Female", "Male", "Male", "Female", "Female",
"Male", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Female", "Female", "Female", "Female", "Female", "Female",
"Female", "Male", "Female", "Male", "Female", "Male", "Female",
"Female", "Female", "Female", "Female", "Male", "Male", "Female",
"Female", "Male", "Female", "Male", "Female", "Female", "Male",
"Female", "Female", "Female", "Male", "Female", "Male", "Male",
"Male", "Female", "Female", "Male", "Male", "Male", "Female",
"Female", "Male", "Female", "Female", "Male", "Female", "Female",
"Male", "Female", "Male", "Male", "Male", "Male", "Female", "Male",
"Male", "Female", "Male", "Male", "Male", "Male", "Male", "Male",
"Female", "Female"), Years_Education = c(NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA, NA,
16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA, 17L,
14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L, NA,
NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA, 16L,
NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L, 18L,
14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA, 16L,
NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA, 21L,
18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L, 18L,
16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA,
NA, 16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA,
17L, 14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L,
NA, NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA,
16L, NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L,
18L, 14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA,
16L, NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA,
21L, 18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L,
18L, 16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA), Timepoint = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), Y = c(0.316483, 0.3510685, 0.267147, 0.254661, 0.3249415,
0.2461125, 0.3238965, 0.375393, 0.2516335, 0.3038465, 0.3206155,
0.2868495, 0.3346625, 0.297433, 0.2869195, 0.3351975, 0.315691,
0.2660795, 0.3195265, 0.3481915, 0.2785565, 0.3471645, 0.315884,
0.251254, 0.3061215, 0.334314, 0.2758345, 0.3539715, 0.307193,
0.224277, 0.302592, 0.327028, 0.2510545, 0.307193, 0.361899,
0.231769, 0.350601, 0.337318, 0.302148, 0.321019, 0.335075, 0.3318655,
0.3464885, 0.348252, 0.3281445, 0.3341985, 0.3437265, 0.28315,
0.3351165, 0.344683, 0.328796, 0.2996415, 0.329305, 0.294367,
0.3512895, 0.3617735, 0.320697, 0.307166, 0.31381, 0.299173,
0.332073, 0.3212095, 0.2990235, 0.323863, 0.3450765, 0.2904305,
0.3595685, 0.3391065, 0.3245175, 0.3418675, 0.2913055, 0.298335,
0.3394605, 0.344435, 0.284265, 0.3228975, 0.3116815, 0.3157865,
0.31368, 0.286124, 0.3255705, 0.343879, 0.3436655, 0.2713715,
0.3381005, 0.3385825, 0.3190355, 0.3597505, 0.3271475, 0.3115485,
0.3276145, 0.3437875, 0.3158815, 0.3218285, 0.326796, 0.3061665,
0.3555745, 0.3250755, 0.321343, 0.337907, 0.3159475, 0.301024,
0.3316655, 0.3446295, 0.3135155, 0.3220135, 0.3710685, 0.2866515,
0.300246, 0.3141975, 0.332256, 0.3296115, 0.321672, 0.287862,
0.354463, 0.321344, 0.304759, 0.337911, 0.3248345, 0.3188435,
0.3068095, 0.3741625, 0.336544, 0.351038, 0.343052, 0.3070755,
0.3310035, 0.3137875, 0.328222, 0.3308255, 0.309383, 0.3096755,
0.299197, 0.3684665, 0.3453295, 0.3565655, 0.3318945, 0.3180955,
0.356223, 0.331051, 0.2844205, 0.316385, 0.347013, 0.326344,
0.3122595, 0.318758, 0.340933, 0.337239, 0.320081, 0.3142475,
0.34704, 0.2590365, 0.31095, 0.317086, 0.342804, 0.271582, 0.2907645,
0.3033985, 0.3154525, 0.3431455, 0.2930245, 0.321643, 0.3315875,
0.328915, 0.317671, 0.2761495, 0.316245, 0.2994025, 0.318245,
0.321339, 0.2885835, 0.4122365, 0.360488, 0.38055, 0.303596,
0.4333405, 0.3850125, 0.3844505, 0.3769385, 0.355161, 0.3364065,
0.418439, 0.4192125, 0.42794, 0.4094405, 0.4032555, 0.3959455,
0.3589195, 0.324668, 0.385214, 0.4323475, 0.3646705, 0.4504675,
0.359749, 0.372037, 0.3742195, 0.2957245, 0.3762205, 0.400279,
0.361942, 0.379597, 0.3540285, 0.3292105, 0.4370465, 0.404536,
0.355939, 0.3330285, 0.3895195, 0.413656, 0.386604, 0.384212,
0.305916, 0.355114, 0.3440965, 0.3500305, 0.388537, 0.346431,
0.3394215, 0.3871285, 0.3946955, 0.409459, 0.454724, 0.422755,
0.399704, 0.3929715, 0.3473955, 0.359285, 0.4414455, 0.590826,
0.427441, 0.387022, 0.3764, 0.3959585, 0.3014585, 0.487619, 0.454884,
0.4070005, 0.4467275, 0.465484, 0.403643, 0.406269, 0.391375,
0.403408, 0.409922, 0.3901505, 0.3131075, 0.405408, 0.4024165,
0.4020865, 0.4253735, 0.433137, 0.4444525, 0.398744, 0.3378935,
0.29192, 0.382819, 0.4414085, 0.484809, 0.432799, 0.3276835,
0.4749325, 0.4284325, 0.320546, 0.4325895, 0.3743695, 0.395499,
0.427603, 0.500991, 0.3820045, 0.3838265, 0.3551735, 0.5114045,
0.433393, 0.378873, 0.435692, 0.4510495, 0.413954, 0.333738,
0.3538425, 0.5222565, 0.5051025, 0.4181785, 0.4526625, 0.346886,
0.453598, 0.4338855, 0.3979005, 0.378339, 0.4218445, 0.459127,
0.376291, 0.457066, 0.419093, 0.324645, 0.4700745, 0.3207305,
0.4078725, 0.466963, 0.425291, 0.480983, 0.365733, 0.52236, 0.3796195,
0.5241235, 0.385068, 0.4372355, 0.4265785, 0.373038, 0.4076305,
0.313645, 0.417434, 0.4494865, 0.4868035, 0.366238, 0.364009,
0.313894, 0.371546, 0.385691, 0.3840595, 0.3589585, 0.4525265,
0.4209155, 0.39987, 0.3591295, 0.4241465, 0.5168695, 0.2102594,
0.405142, 0.391291, 0.386246, 0.3784075, 0.4903315, 0.334801,
0.3767035, 0.4459985, 0.3702205, 0.4718795, 0.34822, 0.3262135,
0.318815)), class = "data.frame", row.names = c(NA, -340L))
您的设置存在几个问题。 by
术语指定不正确,在您的数据中 id
不是一个因素,它需要是一个因素,并且参数是相反的。您的模型在 id
上平滑了 timepoint
。在下面的示例中,它是固定的,您可以看到 mgcv
无法随着时间的推移适应 id
特定平滑,因为没有足够的数据。
library('mgcv')
library('gratia')
library('ggplot2')
# df <- your data
names(df) <- tolower(names(df))
summary(df)
# id age sex years_education timepoint y
# 33714 : 2 Min. : 6.00 Length:340 Min. :11.00 Min. :1.0 Min. :0.2103
# 35377 : 2 1st Qu.:13.00 Class :character 1st Qu.:14.00 1st Qu.:1.0 1st Qu.:0.3188
# 38623 : 2 Median :43.00 Mode :character Median :16.00 Median :1.5 Median :0.3449
# 38806 : 2 Mean :36.38 Mean :15.79 Mean :1.5 Mean :0.3583
# 39593 : 2 3rd Qu.:57.00 3rd Qu.:18.00 3rd Qu.:2.0 3rd Qu.:0.3949
# 39820 : 2 Max. :71.00 Max. :21.00 Max. :2.0 Max. :0.5908
# (Other):328 NA's :180
apply(df, 2, function(x) sum(is.na(x)))
# missing 180 cases of `years_education`
mean(df[df$sex == "Male",]$y)
#[1] 0.3675737
mean(df[df$sex == "Female",]$y)
#[1] 0.3712672
## your model without education
## this smooths over ids which are "integer" and rather should be factor
mygam1 <- mgcv::gam(y ~ (timepoint) + sex + s(age) + s(id, by = timepoint),
method = "REML", data = df)
summary(mygam1)
# Family: gaussian
# Link function: identity
#
# Formula:
# y ~ (timepoint) + sex + s(age) + s(id, by = timepoint)
#
# Parametric coefficients:
# Estimate Std. Error t value Pr(>|t|)
# (Intercept) 0.239312 0.007405 32.318 < 2e-16 ***
# timepoint 0.094749 0.019223 4.929 1.31e-06 ***
# sexMale 0.001933 0.004719 0.410 0.682
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Approximate significance of smooth terms:
# edf Ref.df F p-value
# s(age) 1.001 1.002 9.660 0.00203 **
# s(id):timepoint 3.423 4.257 3.609 0.00828 **
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Rank: 21/22
# R-sq.(adj) = 0.499 Deviance explained = 50.8%
# -REML = -573.51 Scale est. = 0.001709 n = 340
# this plot doesnt make sense
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])
# an alterantive which proposes what you desired in mygam1, can't be done, you dont have enough timepoints
df$id <- as.factor(df$id)
mygam2 <- mgcv::gam(y ~ (timepoint) + sex + s(age) + s(timepoint, by = id),
method = "REML", data = df)
# Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) :
# A term has fewer unique covariate combinations than specified maximum degrees of freedom
鉴于目标,您可能只需要 id
的随机效应,然后您可以根据这些效应在时间点上测试均值差异。这是在下面用 s(var, bs = "re") 术语实现的。我还将可能性更改为 beta,它正确支持 y
。为了绘制估计函数,我鼓励您查看 gratia
包,这使得它更容易并且非常可定制。我在下面的示例中使用 draw
(您可能还想使用 estimate_smooth
)。
mygam_alt <- mgcv::gam(y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re"),
method = "REML", data = df, family = betar)
summary(mygam_alt)
# Family: Beta regression(129.815)
# Link function: logit
#
# Formula:
# y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re")
#
# Parametric coefficients:
# Estimate Std. Error z value Pr(>|z|)
# (Intercept) -0.842034 0.055622 -15.138 <2e-16 ***
# as.factor(timepoint)2 0.338984 0.019852 17.076 <2e-16 ***
# sexMale -0.004194 0.020406 -0.206 0.837
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Approximate significance of smooth terms:
# edf Ref.df Chi.sq p-value
# s(age) 1.001 1.001 10.048 0.00153 **
# s(id) 0.734 1.000 2.762 0.05242 .
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# R-sq.(adj) = 0.483 Deviance explained = 48.3%
# -REML = -585.63 Scale est. = 1 n = 340
gam.check(mygam_alt)
# Method: REML Optimizer: outer newton
# full convergence after 8 iterations.
# Gradient range [-0.0002248251,0.0001459622]
# (score -585.6282 & scale 1).
# Hessian positive definite, eigenvalue range [0.0002246851,168.796].
# Model rank = 13 / 13
#
# Basis dimension (k) checking results. Low p-value (k-index<1) may
# indicate that k is too low, especially if edf is close to k'.
# *** You can see here, it might not be worth including these smooths either ***
# k' edf k-index p-value
# s(age) 9.000 1.001 0.95 0.15
# s(id) 1.000 0.734 0.99 0.42
x <- draw(mygam_alt)
x
为了完整性,您也没有足够的观测值来平滑因子(这适用于 k
的任何选择,然后是基函数的数量。
mygam_alt_fs <- mgcv::gam(y ~ timepoint + sex + s(age, bs = "tp") + s(timepoint,id, bs = "fs"),
method = "REML", data = df, family = betar)
# Error in smooth.construct.tp.smooth.spec(object, data, knots) :
# A term has fewer unique covariate combinations than specified maximum degrees of freedom