将 LME 模型预测值的回归线添加到 ggplot 对象
Add regression lines from predicted values of a LME model to ggplot object
我有关于连续结果的重复测量数据:
library(magrittr)
library(ggplot2)
library(nlme)
mydata <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 23L, 23L, 24L, 24L, 24L, 24L), .Label = c("2",
"3", "4", "7", "8", "13", "14", "20", "21", "22", "24", "25",
"27", "29", "30", "31", "34", "36", "37", "38", "39", "40", "48",
"49", "50", "51", "52", "54", "58", "60", "61", "65", "74", "75",
"76", "77", "80", "81", "82", "83", "84", "86", "87", "88", "92",
"94", "95", "96", "103", "104", "105", "114", "115", "116", "117",
"119", "125", "126", "127", "132", "134", "135", "137", "138",
"141", "142", "145", "152", "153", "154", "157", "159", "160",
"162", "164", "165", "171", "172", "179", "180", "184", "185",
"189", "194", "195", "197", "198", "202", "203", "205", "209",
"213", "221", "253", "255", "258", "262", "271", "273", "277",
"279", "310", "315", "320"), class = "factor"), date_ct = structure(c(15923,
16122, 16715, 16902, 17086, 18003, 16150, 16841, 16421, 16764,
16951, 17135, 18011, 16622, 18247, 16582, 16752, 18045, 16729,
16862, 17042, 17226, 18102, 16568, 16736, 16916, 17100, 18040,
16743, 16841, 16589, 16729, 16526, 16729, 16619, 16862, 17042,
17226, 16407, 18437, 16512, 16953, 16457, 16946, 17112, 17310,
17989, 16573, 16841, 15923, 16752, 16505, 16729, 16909, 17107,
18038, 16540, 16743, 15951, 16122, 16624, 18202, 16623, 18221,
16694, 16715, 16902, 17086, 18037, 16451, 16743, 16421, 16736,
16909, 17100), class = "Date"), age = c(56.6, 57.1, 58.8, 59.3,
59.8, 62.3, 43.2, 45.1, 52, 52.9, 53.4, 53.9, 56.3, 58.5, 63,
57.4, 57.9, 61.4, 57.8, 58.2, 58.7, 59.2, 61.6, 52.4, 52.8, 53.3,
53.8, 56.4, 70.8, 71.1, 61.4, 61.8, 59.2, 59.8, 61.5, 62.2, 62.7,
63.2, 48.9, 54.5, 54.2, 55.4, 50.1, 51.4, 51.8, 52.4, 54.3, 55.4,
56.1, 48.6, 50.9, 64.2, 64.8, 65.3, 65.8, 68.4, 68.3, 68.8, 66.7,
67.1, 60.5, 64.8, 56.5, 60.9, 62.7, 62.8, 63.3, 63.8, 66.4, 49,
49.8, 61, 61.8, 62.3, 62.8), continuous_outcome = c(1636.4, 544.1,
1408, 1594.7, 1719.4, 2345.9, 115.3, 226, 2678.2, 3451.6, 3702.7,
3632.7, 5805, 155.2, 1095, 992.2, 296.6, 2020.4, 3708.6, 2710.7,
2934.2, 3080.4, 4489.7, 3459.4, 4965.3, 5553.1, 5037.8, 7315.7,
29980.8, 35407.5, 2263.2, 2060.6, 3220.7, 4467.1, 5902.3, 6407.2,
5947.1, 6271.6, 306, 689.3, 1430.6, 1672.1, 9.9, 58.7, 69.9,
125.3, 39.5, 3842.5, 5136.3, 216.6, 332.4, 5719.3, 5386, 5490.7,
5268.2, 6166.7, 12520.6, 12981.8, 2896.1, 2976.8, 5495.6, 6470.6,
4235.5, 7603.5, 3887, 3344.5, 2885.7, 3324.1, 6401, 1942.2, 2000.9,
2401.7, 2231.5, 2749.7, 2741.7)), row.names = c(NA, -75L), class = c("tbl_df",
"tbl", "data.frame"))
年龄似乎与我的连续变量没有线性关系:
ggplot(mydata, aes(x=age, y=continuous_outcome, group=ID, color=factor(ID))) +
geom_point() + geom_line() + theme(legend.position = "none") +
geom_smooth(method="loess", formula=y~x, aes(group=1))
为了调整年龄的非线性(并考虑重复测量设计),我安装了 'normal' 一阶 lme 回归,然后是一个包含年龄多项式的模型:
Model1 <-
lme(continuous_outcome ~ age,
random=~1|ID,
data=mydata,
method="ML")
Model2 <-
lme(continuous_outcome ~ poly(age, degree = 5, raw = TRUE),
random=~1|ID,
data=mydata,
method="ML")
# Save residuals and predicted values
mydata$model1_predicted <- predict(Model1)
mydata$model1_residuals <- residuals(Model1)
mydata$model2_predicted <- predict(Model2)
mydata$model2_residuals <- residuals(Model2)
有没有办法使用预测值,为每个模型获得回归线?特别是对于模型 2,我希望看到每个多项式的回归线。
我试过以下方法:
ggplot(mydata, aes(x = age, y = continuous_outcome, colour=factor(ID))) +
geom_point(shape = 16, size=1.8) + theme(legend.position = "none") +
geom_line(aes(y=model2_predicted))
每个患者 (ID) 生成一条回归线:
有没有办法获得 'overall' 行?我已经尝试在 geom_line
的美学映射中指定 group=1
,但这并没有产生预期的结果。
谢谢!
我想你想为线条分配一种颜色以统一组:
ggplot(mydata, aes(x = age, y = continuous_outcome, colour=factor(ID))) +
geom_point(shape = 16, size=1.8) + theme(legend.position = "none") +
geom_line(colour = 'gray', aes(y=model2_predicted))
给出:
我已经投票并接受了@BrianMontgomery 的回答,谢谢。这确实把点统一成了一条线。
作为附加评论,我认为我对我的混合模型预测对每个患者产生的预测感到困惑,如上图所示。我原以为会有一条直线,就像在线性回归中一样,但是由于我在模型中指定了随机斜率 (1|ID
),所以每个患者都有一条直线。从这个意义上说,我最初的问题从一开始就没有多大意义。
以上述为例,使用线性回归模型将得出以下结果
lm_model <-
lm(formula=continuous_outcome ~ age,
data=mydata)
mydata$lm_model_predicted <- predict(lm_model) # Save predicted values
ggplot(mydata, aes(x=age, # Plotting predicted values
y=continuous_outcome,
colour=factor(ID))) +
geom_point(shape= 16,
size=1.8) +
theme(legend.position="none") +
geom_line(aes(y=lm_model_predicted))
这是我期待的直线。如上所述,我绘制预测值的模型(即 'Model2')是一个线性混合模型,因此不会预测一条直线。
我有关于连续结果的重复测量数据:
library(magrittr)
library(ggplot2)
library(nlme)
mydata <- structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 4L, 4L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 11L, 11L, 11L,
12L, 12L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L,
22L, 22L, 22L, 22L, 22L, 23L, 23L, 24L, 24L, 24L, 24L), .Label = c("2",
"3", "4", "7", "8", "13", "14", "20", "21", "22", "24", "25",
"27", "29", "30", "31", "34", "36", "37", "38", "39", "40", "48",
"49", "50", "51", "52", "54", "58", "60", "61", "65", "74", "75",
"76", "77", "80", "81", "82", "83", "84", "86", "87", "88", "92",
"94", "95", "96", "103", "104", "105", "114", "115", "116", "117",
"119", "125", "126", "127", "132", "134", "135", "137", "138",
"141", "142", "145", "152", "153", "154", "157", "159", "160",
"162", "164", "165", "171", "172", "179", "180", "184", "185",
"189", "194", "195", "197", "198", "202", "203", "205", "209",
"213", "221", "253", "255", "258", "262", "271", "273", "277",
"279", "310", "315", "320"), class = "factor"), date_ct = structure(c(15923,
16122, 16715, 16902, 17086, 18003, 16150, 16841, 16421, 16764,
16951, 17135, 18011, 16622, 18247, 16582, 16752, 18045, 16729,
16862, 17042, 17226, 18102, 16568, 16736, 16916, 17100, 18040,
16743, 16841, 16589, 16729, 16526, 16729, 16619, 16862, 17042,
17226, 16407, 18437, 16512, 16953, 16457, 16946, 17112, 17310,
17989, 16573, 16841, 15923, 16752, 16505, 16729, 16909, 17107,
18038, 16540, 16743, 15951, 16122, 16624, 18202, 16623, 18221,
16694, 16715, 16902, 17086, 18037, 16451, 16743, 16421, 16736,
16909, 17100), class = "Date"), age = c(56.6, 57.1, 58.8, 59.3,
59.8, 62.3, 43.2, 45.1, 52, 52.9, 53.4, 53.9, 56.3, 58.5, 63,
57.4, 57.9, 61.4, 57.8, 58.2, 58.7, 59.2, 61.6, 52.4, 52.8, 53.3,
53.8, 56.4, 70.8, 71.1, 61.4, 61.8, 59.2, 59.8, 61.5, 62.2, 62.7,
63.2, 48.9, 54.5, 54.2, 55.4, 50.1, 51.4, 51.8, 52.4, 54.3, 55.4,
56.1, 48.6, 50.9, 64.2, 64.8, 65.3, 65.8, 68.4, 68.3, 68.8, 66.7,
67.1, 60.5, 64.8, 56.5, 60.9, 62.7, 62.8, 63.3, 63.8, 66.4, 49,
49.8, 61, 61.8, 62.3, 62.8), continuous_outcome = c(1636.4, 544.1,
1408, 1594.7, 1719.4, 2345.9, 115.3, 226, 2678.2, 3451.6, 3702.7,
3632.7, 5805, 155.2, 1095, 992.2, 296.6, 2020.4, 3708.6, 2710.7,
2934.2, 3080.4, 4489.7, 3459.4, 4965.3, 5553.1, 5037.8, 7315.7,
29980.8, 35407.5, 2263.2, 2060.6, 3220.7, 4467.1, 5902.3, 6407.2,
5947.1, 6271.6, 306, 689.3, 1430.6, 1672.1, 9.9, 58.7, 69.9,
125.3, 39.5, 3842.5, 5136.3, 216.6, 332.4, 5719.3, 5386, 5490.7,
5268.2, 6166.7, 12520.6, 12981.8, 2896.1, 2976.8, 5495.6, 6470.6,
4235.5, 7603.5, 3887, 3344.5, 2885.7, 3324.1, 6401, 1942.2, 2000.9,
2401.7, 2231.5, 2749.7, 2741.7)), row.names = c(NA, -75L), class = c("tbl_df",
"tbl", "data.frame"))
年龄似乎与我的连续变量没有线性关系:
ggplot(mydata, aes(x=age, y=continuous_outcome, group=ID, color=factor(ID))) +
geom_point() + geom_line() + theme(legend.position = "none") +
geom_smooth(method="loess", formula=y~x, aes(group=1))
为了调整年龄的非线性(并考虑重复测量设计),我安装了 'normal' 一阶 lme 回归,然后是一个包含年龄多项式的模型:
Model1 <-
lme(continuous_outcome ~ age,
random=~1|ID,
data=mydata,
method="ML")
Model2 <-
lme(continuous_outcome ~ poly(age, degree = 5, raw = TRUE),
random=~1|ID,
data=mydata,
method="ML")
# Save residuals and predicted values
mydata$model1_predicted <- predict(Model1)
mydata$model1_residuals <- residuals(Model1)
mydata$model2_predicted <- predict(Model2)
mydata$model2_residuals <- residuals(Model2)
有没有办法使用预测值,为每个模型获得回归线?特别是对于模型 2,我希望看到每个多项式的回归线。
我试过以下方法:
ggplot(mydata, aes(x = age, y = continuous_outcome, colour=factor(ID))) +
geom_point(shape = 16, size=1.8) + theme(legend.position = "none") +
geom_line(aes(y=model2_predicted))
每个患者 (ID) 生成一条回归线:
有没有办法获得 'overall' 行?我已经尝试在 geom_line
的美学映射中指定 group=1
,但这并没有产生预期的结果。
谢谢!
我想你想为线条分配一种颜色以统一组:
ggplot(mydata, aes(x = age, y = continuous_outcome, colour=factor(ID))) +
geom_point(shape = 16, size=1.8) + theme(legend.position = "none") +
geom_line(colour = 'gray', aes(y=model2_predicted))
给出:
我已经投票并接受了@BrianMontgomery 的回答,谢谢。这确实把点统一成了一条线。
作为附加评论,我认为我对我的混合模型预测对每个患者产生的预测感到困惑,如上图所示。我原以为会有一条直线,就像在线性回归中一样,但是由于我在模型中指定了随机斜率 (1|ID
),所以每个患者都有一条直线。从这个意义上说,我最初的问题从一开始就没有多大意义。
以上述为例,使用线性回归模型将得出以下结果
lm_model <-
lm(formula=continuous_outcome ~ age,
data=mydata)
mydata$lm_model_predicted <- predict(lm_model) # Save predicted values
ggplot(mydata, aes(x=age, # Plotting predicted values
y=continuous_outcome,
colour=factor(ID))) +
geom_point(shape= 16,
size=1.8) +
theme(legend.position="none") +
geom_line(aes(y=lm_model_predicted))
这是我期待的直线。如上所述,我绘制预测值的模型(即 'Model2')是一个线性混合模型,因此不会预测一条直线。