在 R 中使用 GAM 进行纵向建模

Longitudinal modeling with GAM in R

我有两个时间点的大脑数据,分别编码为 1,2。我想用mgcv::gam来测量time 1time 2之间每个大脑区域(Y)的变化。每个主题都有一个 ID 变量,我也想将其纳入模型。

我正在努力编写正确的模型和绘图

#model
mygam <- mgcv::gam(Y ~ (Timepoint) + Sex + s(Age) + Years_Education + s(ID, by = Timepoint),
    method = "REML", data = DF_w)

#plot
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])

一些示例数据:

structure(list(ID = c(33714L, 35377L, 38623L, 38806L, 39593L, 
39820L, 39951L, 40286L, 40556L, 40798L, 40800L, 40815L, 43762L, 
50848L, 52183L, 52461L, 52577L, 53202L, 53320L, 53873L, 54153L, 
54206L, 54581L, 55122L, 55267L, 55462L, 55612L, 55920L, 56022L, 
56307L, 56420L, 56679L, 57405L, 57445L, 57480L, 57725L, 57809L, 
58004L, 58215L, 58229L, 58503L, 59326L, 59327L, 59344L, 59361L, 
59865L, 60099L, 60100L, 60280L, 60384L, 60429L, 60493L, 60503L, 
60575L, 60603L, 60664L, 60846L, 61415L, 61656L, 61749L, 61883L, 
62081L, 62210L, 62285L, 62937L, 62983L, 63327L, 63329L, 64081L, 
64328L, 64418L, 64507L, 64596L, 65178L, 65250L, 65302L, 65478L, 
65480L, 65487L, 65572L, 65802L, 65935L, 65974L, 65975L, 65978L, 
65991L, 65995L, 66013L, 66154L, 66237L, 66245L, 66389L, 66396L, 
66460L, 66572L, 66589L, 67174L, 73230L, 73525L, 73539L, 73677L, 
73942L, 73953L, 74034L, 74113L, 74114L, 74427L, 74439L, 74607L, 
74641L, 74657L, 74794L, 74800L, 74836L, 74942L, 74952L, 74962L, 
74969L, 74977L, 74985L, 74989L, 75220L, 75229L, 75407L, 75653L, 
75732L, 75735L, 75757L, 75895L, 75898L, 76381L, 76559L, 76574L, 
76594L, 76595L, 76746L, 76751L, 76755L, 76759L, 76775L, 77088L, 
77091L, 77099L, 77134L, 77188L, 77203L, 77252L, 77304L, 77413L, 
77453L, 77528L, 77556L, 77585L, 77668L, 78262L, 79724L, 79730L, 
79850L, 79977L, 80052L, 80819L, 80901L, 80932L, 81064L, 81065L, 
81071L, 81098L, 81142L, 81175L, 81727L, 33714L, 35377L, 38623L, 
38806L, 39593L, 39820L, 39951L, 40286L, 40556L, 40798L, 40800L, 
40815L, 43762L, 50848L, 52183L, 52461L, 52577L, 53202L, 53320L, 
53873L, 54153L, 54206L, 54581L, 55122L, 55267L, 55462L, 55612L, 
55920L, 56022L, 56307L, 56420L, 56679L, 57405L, 57445L, 57480L, 
57725L, 57809L, 58004L, 58215L, 58229L, 58503L, 59326L, 59327L, 
59344L, 59361L, 59865L, 60099L, 60100L, 60280L, 60384L, 60429L, 
60493L, 60503L, 60575L, 60603L, 60664L, 60846L, 61415L, 61656L, 
61749L, 61883L, 62081L, 62210L, 62285L, 62937L, 62983L, 63327L, 
63329L, 64081L, 64328L, 64418L, 64507L, 64596L, 65178L, 65250L, 
65302L, 65478L, 65480L, 65487L, 65572L, 65802L, 65935L, 65974L, 
65975L, 65978L, 65991L, 65995L, 66013L, 66154L, 66237L, 66245L, 
66389L, 66396L, 66460L, 66572L, 66589L, 67174L, 73230L, 73525L, 
73539L, 73677L, 73942L, 73953L, 74034L, 74113L, 74114L, 74427L, 
74439L, 74607L, 74641L, 74657L, 74794L, 74800L, 74836L, 74942L, 
74952L, 74962L, 74969L, 74977L, 74985L, 74989L, 75220L, 75229L, 
75407L, 75653L, 75732L, 75735L, 75757L, 75895L, 75898L, 76381L, 
76559L, 76574L, 76594L, 76595L, 76746L, 76751L, 76755L, 76759L, 
76775L, 77088L, 77091L, 77099L, 77134L, 77188L, 77203L, 77252L, 
77304L, 77413L, 77453L, 77528L, 77556L, 77585L, 77668L, 78262L, 
79724L, 79730L, 79850L, 79977L, 80052L, 80819L, 80901L, 80932L, 
81064L, 81065L, 81071L, 81098L, 81142L, 81175L, 81727L), Age = c(15L, 
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L, 
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L, 
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L, 
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L, 
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L, 
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L, 
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L, 
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L, 
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L, 
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L, 
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L, 
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L, 
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L, 15L, 
15L, 42L, 62L, 66L, 57L, 42L, 65L, 9L, 11L, 16L, 9L, 53L, 16L, 
16L, 14L, 50L, 43L, 8L, 6L, 61L, 14L, 10L, 15L, 13L, 15L, 8L, 
9L, 9L, 8L, 9L, 9L, 13L, 56L, 10L, 7L, 8L, 8L, 6L, 15L, 42L, 
8L, 11L, 43L, 69L, 14L, 12L, 10L, 16L, 12L, 10L, 6L, 13L, 66L, 
11L, 12L, 13L, 10L, 65L, 13L, 14L, 12L, 43L, 51L, 63L, 17L, 9L, 
12L, 44L, 69L, 11L, 10L, 12L, 10L, 10L, 70L, 54L, 45L, 42L, 54L, 
14L, 42L, 44L, 16L, 15L, 43L, 45L, 50L, 53L, 53L, 49L, 69L, 14L, 
65L, 14L, 13L, 67L, 59L, 52L, 54L, 44L, 62L, 69L, 10L, 63L, 57L, 
12L, 62L, 9L, 53L, 54L, 66L, 49L, 63L, 51L, 9L, 45L, 49L, 49L, 
61L, 62L, 57L, 67L, 65L, 45L, 16L, 55L, 64L, 67L, 56L, 52L, 63L, 
10L, 62L, 14L, 66L, 68L, 15L, 13L, 43L, 47L, 55L, 69L, 67L, 52L, 
15L, 64L, 55L, 44L, 13L, 48L, 71L, 64L, 13L, 50L, 61L, 70L, 57L, 
51L, 46L, 57L, 69L, 46L, 8L, 11L, 46L, 71L, 38L, 56L, 17L), Sex = c("Male", 
"Female", "Female", "Female", "Female", "Female", "Male", "Female", 
"Male", "Male", "Male", "Male", "Female", "Male", "Male", "Male", 
"Female", "Female", "Male", "Male", "Male", "Male", "Male", "Male", 
"Male", "Male", "Female", "Female", "Male", "Female", "Male", 
"Female", "Female", "Female", "Male", "Female", "Female", "Male", 
"Female", "Male", "Female", "Male", "Female", "Male", "Male", 
"Female", "Male", "Male", "Male", "Female", "Female", "Female", 
"Male", "Female", "Male", "Female", "Female", "Male", "Male", 
"Female", "Male", "Male", "Female", "Female", "Male", "Male", 
"Female", "Female", "Female", "Female", "Female", "Male", "Male", 
"Male", "Male", "Female", "Female", "Female", "Female", "Female", 
"Male", "Female", "Female", "Male", "Male", "Female", "Male", 
"Female", "Female", "Female", "Female", "Female", "Male", "Male", 
"Female", "Female", "Male", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Male", "Female", "Male", "Female", 
"Male", "Female", "Female", "Female", "Female", "Female", "Male", 
"Male", "Female", "Female", "Male", "Female", "Male", "Female", 
"Female", "Male", "Female", "Female", "Female", "Male", "Female", 
"Male", "Male", "Male", "Female", "Female", "Male", "Male", "Male", 
"Female", "Female", "Male", "Female", "Female", "Male", "Female", 
"Female", "Male", "Female", "Male", "Male", "Male", "Male", "Female", 
"Male", "Male", "Female", "Male", "Male", "Male", "Male", "Male", 
"Male", "Female", "Female", "Male", "Female", "Female", "Female", 
"Female", "Female", "Male", "Female", "Male", "Male", "Male", 
"Male", "Female", "Male", "Male", "Male", "Female", "Female", 
"Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male", 
"Female", "Female", "Male", "Female", "Male", "Female", "Female", 
"Female", "Male", "Female", "Female", "Male", "Female", "Male", 
"Female", "Male", "Female", "Male", "Male", "Female", "Male", 
"Male", "Male", "Female", "Female", "Female", "Male", "Female", 
"Male", "Female", "Female", "Male", "Male", "Female", "Male", 
"Male", "Female", "Female", "Male", "Male", "Female", "Female", 
"Female", "Female", "Female", "Male", "Male", "Male", "Male", 
"Female", "Female", "Female", "Female", "Female", "Male", "Female", 
"Female", "Male", "Male", "Female", "Male", "Female", "Female", 
"Female", "Female", "Female", "Male", "Male", "Female", "Female", 
"Male", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Female", "Female", "Female", "Female", "Female", "Female", 
"Female", "Male", "Female", "Male", "Female", "Male", "Female", 
"Female", "Female", "Female", "Female", "Male", "Male", "Female", 
"Female", "Male", "Female", "Male", "Female", "Female", "Male", 
"Female", "Female", "Female", "Male", "Female", "Male", "Male", 
"Male", "Female", "Female", "Male", "Male", "Male", "Female", 
"Female", "Male", "Female", "Female", "Male", "Female", "Female", 
"Male", "Female", "Male", "Male", "Male", "Male", "Female", "Male", 
"Male", "Female", "Male", "Male", "Male", "Male", "Male", "Male", 
"Female", "Female"), Years_Education = c(NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA, NA, 
16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA, 17L, 
14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L, NA, 
NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA, 16L, 
NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L, 18L, 
14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA, 16L, 
NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA, 21L, 
18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L, 18L, 
16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, 16L, NA, NA, NA, 14L, 12L, 16L, NA, NA, 
NA, 16L, 16L, NA, NA, NA, NA, NA, 18L, 13L, 13L, 16L, 16L, NA, 
17L, 14L, NA, NA, 18L, 18L, 16L, 16L, 18L, 16L, 16L, NA, 14L, 
NA, NA, 14L, 16L, 14L, 16L, 16L, 12L, 18L, NA, 14L, 12L, NA, 
16L, NA, 16L, 16L, 18L, 16L, 21L, 18L, NA, 16L, 11L, 18L, 14L, 
18L, 14L, 18L, 12L, 18L, NA, 18L, 20L, 16L, 12L, 17L, 14L, NA, 
16L, NA, 12L, 12L, NA, NA, 13L, 18L, 18L, 17L, 16L, 12L, NA, 
21L, 18L, 18L, NA, 16L, 18L, 18L, NA, 12L, 19L, 16L, 16L, 12L, 
18L, 16L, 19L, 15L, NA, NA, 12L, 17L, 12L, 18L, NA), Timepoint = c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L), Y = c(0.316483, 0.3510685, 0.267147, 0.254661, 0.3249415, 
0.2461125, 0.3238965, 0.375393, 0.2516335, 0.3038465, 0.3206155, 
0.2868495, 0.3346625, 0.297433, 0.2869195, 0.3351975, 0.315691, 
0.2660795, 0.3195265, 0.3481915, 0.2785565, 0.3471645, 0.315884, 
0.251254, 0.3061215, 0.334314, 0.2758345, 0.3539715, 0.307193, 
0.224277, 0.302592, 0.327028, 0.2510545, 0.307193, 0.361899, 
0.231769, 0.350601, 0.337318, 0.302148, 0.321019, 0.335075, 0.3318655, 
0.3464885, 0.348252, 0.3281445, 0.3341985, 0.3437265, 0.28315, 
0.3351165, 0.344683, 0.328796, 0.2996415, 0.329305, 0.294367, 
0.3512895, 0.3617735, 0.320697, 0.307166, 0.31381, 0.299173, 
0.332073, 0.3212095, 0.2990235, 0.323863, 0.3450765, 0.2904305, 
0.3595685, 0.3391065, 0.3245175, 0.3418675, 0.2913055, 0.298335, 
0.3394605, 0.344435, 0.284265, 0.3228975, 0.3116815, 0.3157865, 
0.31368, 0.286124, 0.3255705, 0.343879, 0.3436655, 0.2713715, 
0.3381005, 0.3385825, 0.3190355, 0.3597505, 0.3271475, 0.3115485, 
0.3276145, 0.3437875, 0.3158815, 0.3218285, 0.326796, 0.3061665, 
0.3555745, 0.3250755, 0.321343, 0.337907, 0.3159475, 0.301024, 
0.3316655, 0.3446295, 0.3135155, 0.3220135, 0.3710685, 0.2866515, 
0.300246, 0.3141975, 0.332256, 0.3296115, 0.321672, 0.287862, 
0.354463, 0.321344, 0.304759, 0.337911, 0.3248345, 0.3188435, 
0.3068095, 0.3741625, 0.336544, 0.351038, 0.343052, 0.3070755, 
0.3310035, 0.3137875, 0.328222, 0.3308255, 0.309383, 0.3096755, 
0.299197, 0.3684665, 0.3453295, 0.3565655, 0.3318945, 0.3180955, 
0.356223, 0.331051, 0.2844205, 0.316385, 0.347013, 0.326344, 
0.3122595, 0.318758, 0.340933, 0.337239, 0.320081, 0.3142475, 
0.34704, 0.2590365, 0.31095, 0.317086, 0.342804, 0.271582, 0.2907645, 
0.3033985, 0.3154525, 0.3431455, 0.2930245, 0.321643, 0.3315875, 
0.328915, 0.317671, 0.2761495, 0.316245, 0.2994025, 0.318245, 
0.321339, 0.2885835, 0.4122365, 0.360488, 0.38055, 0.303596, 
0.4333405, 0.3850125, 0.3844505, 0.3769385, 0.355161, 0.3364065, 
0.418439, 0.4192125, 0.42794, 0.4094405, 0.4032555, 0.3959455, 
0.3589195, 0.324668, 0.385214, 0.4323475, 0.3646705, 0.4504675, 
0.359749, 0.372037, 0.3742195, 0.2957245, 0.3762205, 0.400279, 
0.361942, 0.379597, 0.3540285, 0.3292105, 0.4370465, 0.404536, 
0.355939, 0.3330285, 0.3895195, 0.413656, 0.386604, 0.384212, 
0.305916, 0.355114, 0.3440965, 0.3500305, 0.388537, 0.346431, 
0.3394215, 0.3871285, 0.3946955, 0.409459, 0.454724, 0.422755, 
0.399704, 0.3929715, 0.3473955, 0.359285, 0.4414455, 0.590826, 
0.427441, 0.387022, 0.3764, 0.3959585, 0.3014585, 0.487619, 0.454884, 
0.4070005, 0.4467275, 0.465484, 0.403643, 0.406269, 0.391375, 
0.403408, 0.409922, 0.3901505, 0.3131075, 0.405408, 0.4024165, 
0.4020865, 0.4253735, 0.433137, 0.4444525, 0.398744, 0.3378935, 
0.29192, 0.382819, 0.4414085, 0.484809, 0.432799, 0.3276835, 
0.4749325, 0.4284325, 0.320546, 0.4325895, 0.3743695, 0.395499, 
0.427603, 0.500991, 0.3820045, 0.3838265, 0.3551735, 0.5114045, 
0.433393, 0.378873, 0.435692, 0.4510495, 0.413954, 0.333738, 
0.3538425, 0.5222565, 0.5051025, 0.4181785, 0.4526625, 0.346886, 
0.453598, 0.4338855, 0.3979005, 0.378339, 0.4218445, 0.459127, 
0.376291, 0.457066, 0.419093, 0.324645, 0.4700745, 0.3207305, 
0.4078725, 0.466963, 0.425291, 0.480983, 0.365733, 0.52236, 0.3796195, 
0.5241235, 0.385068, 0.4372355, 0.4265785, 0.373038, 0.4076305, 
0.313645, 0.417434, 0.4494865, 0.4868035, 0.366238, 0.364009, 
0.313894, 0.371546, 0.385691, 0.3840595, 0.3589585, 0.4525265, 
0.4209155, 0.39987, 0.3591295, 0.4241465, 0.5168695, 0.2102594, 
0.405142, 0.391291, 0.386246, 0.3784075, 0.4903315, 0.334801, 
0.3767035, 0.4459985, 0.3702205, 0.4718795, 0.34822, 0.3262135, 
0.318815)), class = "data.frame", row.names = c(NA, -340L))

您的设置存在几个问题。 by 术语指定不正确,在您的数据中 id 不是一个因素,它需要是一个因素,并且参数是相反的。您的模型在 id 上平滑了 timepoint。在下面的示例中,它是固定的,您可以看到 mgcv 无法随着时间的推移适应 id 特定平滑,因为没有足够的数据。

library('mgcv')
library('gratia')
library('ggplot2')
# df <- your data

names(df) <- tolower(names(df))
summary(df)
# id           age            sex            years_education   timepoint         y         
# 33714  :  2   Min.   : 6.00   Length:340         Min.   :11.00   Min.   :1.0   Min.   :0.2103  
# 35377  :  2   1st Qu.:13.00   Class :character   1st Qu.:14.00   1st Qu.:1.0   1st Qu.:0.3188  
# 38623  :  2   Median :43.00   Mode  :character   Median :16.00   Median :1.5   Median :0.3449  
# 38806  :  2   Mean   :36.38                      Mean   :15.79   Mean   :1.5   Mean   :0.3583  
# 39593  :  2   3rd Qu.:57.00                      3rd Qu.:18.00   3rd Qu.:2.0   3rd Qu.:0.3949  
# 39820  :  2   Max.   :71.00                      Max.   :21.00   Max.   :2.0   Max.   :0.5908  
# (Other):328                                      NA's   :180      
apply(df, 2, function(x) sum(is.na(x)))
# missing 180 cases of `years_education`
mean(df[df$sex == "Male",]$y)
#[1] 0.3675737
mean(df[df$sex == "Female",]$y)
#[1] 0.3712672

## your model without education 
## this smooths over ids which are "integer" and rather should be factor 
 mygam1 <- mgcv::gam(y ~ (timepoint) + sex + s(age) +  s(id, by = timepoint),
                    method = "REML", data = df)
 summary(mygam1)
# Family: gaussian 
# Link function: identity 
# 
# Formula:
#   y ~ (timepoint) + sex + s(age) + s(id, by = timepoint)
# 
# Parametric coefficients:
#   Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 0.239312   0.007405  32.318  < 2e-16 ***
#   timepoint   0.094749   0.019223   4.929 1.31e-06 ***
#   sexMale     0.001933   0.004719   0.410    0.682    
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Approximate significance of smooth terms:
#   edf Ref.df     F p-value   
# s(age)          1.001  1.002 9.660 0.00203 **
#   s(id):timepoint 3.423  4.257 3.609 0.00828 **
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Rank: 21/22
# R-sq.(adj) =  0.499   Deviance explained = 50.8%
# -REML = -573.51  Scale est. = 0.001709  n = 340

# this plot doesnt make sense 
plot(mygam, residuals = T, pages = 1, shade = T, shade.col = "lightblue", shift = coef(mygam)[1])



# an alterantive which proposes what you desired in mygam1, can't be done, you dont have enough timepoints 

df$id <- as.factor(df$id)
mygam2 <- mgcv::gam(y ~ (timepoint) + sex + s(age) +  s(timepoint, by = id),
                    method = "REML", data = df)
# Error in smooth.construct.tp.smooth.spec(object, dk$data, dk$knots) : 
#   A term has fewer unique covariate combinations than specified maximum degrees of freedom

鉴于目标,您可能只需要 id 的随机效应,然后您可以根据这些效应在时间点上测试均值差异。这是在下面用 s(var, bs = "re") 术语实现的。我还将可能性更改为 beta,它正确支持 y。为了绘制估计函数,我鼓励您查看 gratia 包,这使得它更容易并且非常可定制。我在下面的示例中使用 draw(您可能还想使用 estimate_smooth)。


mygam_alt <- mgcv::gam(y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re"),
                   method = "REML", data = df, family = betar)
summary(mygam_alt)
# Family: Beta regression(129.815) 
# Link function: logit 
# 
# Formula:
#   y ~ as.factor(timepoint) + sex + s(age, bs = "tp") + s(id, bs = "re")
# 
# Parametric coefficients:
#   Estimate Std. Error z value Pr(>|z|)    
# (Intercept)           -0.842034   0.055622 -15.138   <2e-16 ***
#   as.factor(timepoint)2  0.338984   0.019852  17.076   <2e-16 ***
#   sexMale               -0.004194   0.020406  -0.206    0.837    
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Approximate significance of smooth terms:
#   edf Ref.df Chi.sq p-value   
# s(age) 1.001  1.001 10.048 0.00153 **
#   s(id)  0.734  1.000  2.762 0.05242 . 
# ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# R-sq.(adj) =  0.483   Deviance explained = 48.3%
# -REML = -585.63  Scale est. = 1         n = 340
gam.check(mygam_alt)
# Method: REML   Optimizer: outer newton
# full convergence after 8 iterations.
# Gradient range [-0.0002248251,0.0001459622]
# (score -585.6282 & scale 1).
# Hessian positive definite, eigenvalue range [0.0002246851,168.796].
# Model rank =  13 / 13 
# 
# Basis dimension (k) checking results. Low p-value (k-index<1) may
# indicate that k is too low, especially if edf is close to k'.
# *** You can see here, it might not be worth including these smooths either ***
#           k'   edf k-index p-value
# s(age) 9.000 1.001    0.95    0.15
# s(id)  1.000 0.734    0.99    0.42
x <- draw(mygam_alt)
x

为了完整性,您也没有足够的观测值来平滑因子(这适用于 k 的任何选择,然后是基函数的数量。

mygam_alt_fs <- mgcv::gam(y ~ timepoint + sex + s(age, bs = "tp") + s(timepoint,id, bs = "fs"),
                       method = "REML", data = df, family = betar)
# Error in smooth.construct.tp.smooth.spec(object, data, knots) : 
#   A term has fewer unique covariate combinations than specified maximum degrees of freedom