使用 nlme、gfeffects 和 sjplot 绘制 lme 模型对重复测量数据的人口水平预测

Question

我有一个长格式的数据集，包含 19 名患者 (ID) 中进行的 60 次重复测量。患者进行了不同数量的测量（2 次测量 [n=11]，随后是 5 次测量 [n=5]、3 次 [n=1]、4 次 [n=1] 和 6 次测量 [n=1]，具有不同的时间间隔（fu_time 以年为单位）。数据如下所示：

library(tidyverse)
library(magrittr)
library(broom.mixed)
library(nlme)        
library(ggeffects)
library(sjPlot)

mydata <- 
structure(list(ID = c(2, 2, 2, 2, 2, 2, 3, 3, 4, 4, 4, 4, 4, 
7, 7, 8, 8, 8, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 20, 20, 
21, 21, 22, 22, 24, 24, 24, 24, 25, 25, 27, 27, 29, 29, 29, 29, 
29, 30, 30, 31, 31, 34, 34, 34, 34, 34, 36, 36, 37, 37), fu_time = c(0, 
0.545, 2.168, 2.68, 3.184, 5.695, 0, 1.892, 0, 0.939, 1.451, 
1.955, 4.353, 0, 4.449, 0, 0.465, 4.005, 0, 0.364, 0.857, 1.361, 
3.759, 0, 0.46, 0.953, 1.457, 4.03, 0, 0.268, 0, 0.383, 0, 0.556, 
0, 0.665, 1.158, 1.662, 0, 5.558, 0, 1.207, 0, 1.339, 1.793, 
2.335, 4.194, 0, 0.734, 0, 2.27, 0, 0.613, 1.106, 1.648, 4.197, 
0, 0.556, 0, 0.468), sex = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 2L, 2L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"), 
    age = c(56.6, 56.6, 56.6, 56.6, 56.6, 56.6, 43.2, 43.2, 52, 
    52, 52, 52, 52, 58.5, 58.5, 57.4, 57.4, 57.4, 57.8, 57.8, 
    57.8, 57.8, 57.8, 52.4, 52.4, 52.4, 52.4, 52.4, 70.8, 70.8, 
    61.4, 61.4, 59.2, 59.2, 61.5, 61.5, 61.5, 61.5, 48.9, 48.9, 
    54.2, 54.2, 50.1, 50.1, 50.1, 50.1, 50.1, 55.4, 55.4, 48.6, 
    48.6, 64.2, 64.2, 64.2, 64.2, 64.2, 68.3, 68.3, 66.7, 66.7
    ), age_standardized = c(-0.112074178471796, -0.112074178471796, 
    -0.112074178471796, -0.112074178471796, -0.112074178471796, 
    -0.112074178471796, -1.75787581301653, -1.75787581301653, 
    -0.677050858987151, -0.677050858987151, -0.677050858987151, 
    -0.677050858987151, -0.677050858987151, 0.121285754784546, 
    0.121285754784546, -0.0138173644691261, -0.0138173644691261, 
    -0.0138173644691261, 0.035311042532209, 0.035311042532209, 
    0.035311042532209, 0.035311042532209, 0.035311042532209, 
    -0.627922451985816, -0.627922451985816, -0.627922451985816, 
    -0.627922451985816, -0.627922451985816, 1.6319842700756, 
    1.6319842700756, 0.477466705544226, 0.477466705544226, 0.207260467036883, 
    0.207260467036883, 0.48974880729456, 0.48974880729456, 0.48974880729456, 
    0.48974880729456, -1.0577960132475, -1.0577960132475, -0.406844620479807, 
    -0.406844620479807, -0.910410792243494, -0.910410792243494, 
    -0.910410792243494, -0.910410792243494, -0.910410792243494, 
    -0.259459399475802, -0.259459399475802, -1.0946423184985, 
    -1.0946423184985, 0.821365554553573, 0.821365554553573, 0.821365554553573, 
    0.821365554553573, 0.821365554553573, 1.32493172631726, 1.32493172631726, 
    1.12841809831192, 1.12841809831192), hypertension = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), total_cholesterol = c(3.8, 3.8, 
    3.8, 3.8, 3.8, 3.8, 3.6, 3.6, 5.208, 5.208, 5.208, 5.208, 
    5.208, 5.2, 5.2, 4.5, 4.5, 4.5, 3.9, 3.9, 3.9, 3.9, 3.9, 
    4.3, 4.3, 4.3, 4.3, 4.3, 3.94, 3.94, 5.468, 5.468, 4.76, 
    4.76, 4.2, 4.2, 4.2, 4.2, 3.1, 3.1, 4.796, 4.796, 5.4, 5.4, 
    5.4, 5.4, 5.4, 4.7, 4.7, 5.516, 5.516, 4.6, 4.6, 4.6, 4.6, 
    4.6, 4.096, 4.096, 7.2, 7.2), log_outcome = c(8.42915657313723, 
    8.7009137691239, 8.86077267189618, 8.96246948326419, 9.03130758075829, 
    9.12345262370784, 5.92634460299296, 6.50615330482121, 8.82605479483354, 
    9.28945716416388, 9.31314123590531, 9.29155486879811, 9.54380093520124, 
    6.3387281707835, 8.42119585511192, 8.09141312479266, 8.08323601190682, 
    8.08345205795634, 8.84248862871191, 8.79677243460523, 8.83902619880979, 
    8.88249368941836, 9.3739060089624, 9.21813577692571, 9.41892283483421, 
    9.51110081224594, 9.44403926910489, 9.90810686887857, 11.2473988053982, 
    11.3367621074836, 9.14484138974349, 9.13587164023184, 9.15404308484749, 
    9.29984347265724, 9.6945365587177, 9.51513026752813, 9.53564697229274, 
    9.59066055858977, 7.06643401737976, 7.7842004897582, 8.40862917559632, 
    8.74727168230293, 4.10135196585983, 5.72042611547146, 5.91627217786197, 
    6.60537958438705, 7.17379497306441, 9.02372811447485, 9.162613156671, 
    6.99925650048289, 7.38343760944252, 9.28357810757782, 9.35451537017142, 
    9.34029788589738, 9.34596520171378, 9.29507953850898, 10.4746495321547, 
    10.5224161779052, 8.94686953356746, 8.96847859420429)), row.names = c(NA, 
60L), class = "data.frame")

head(mydata)
  ID fu_time    sex  age age_standardized hypertension total_cholesterol log_outcome
1  2   0.000 Female 56.6       -0.1120742           No               3.8    8.429157
2  2   0.545 Female 56.6       -0.1120742           No               3.8    8.700914
3  2   2.168 Female 56.6       -0.1120742           No               3.8    8.860773
4  2   2.680 Female 56.6       -0.1120742           No               3.8    8.962469
5  2   3.184 Female 56.6       -0.1120742           No               3.8    9.031308
6  2   5.695 Female 56.6       -0.1120742           No               3.8    9.123453

目标是评估结果进展的风险因素。由于版本偏斜，结果已经过自然对数转换。我用相应的取幂固定效应指定的线性混合模型：

ris <- 
  lme(fixed=log_outcome ~ fu_time + age*fu_time + sex*fu_time + hypertension*fu_time + 
        total_cholesterol*fu_time, 
      random=~ 1 + fu_time|ID, 
      data=mydata, 
      method="ML")

# get coefficients
fixef_coefs <- 
  tidy(ris, conf.int=TRUE) %>% 
  select(term, estimate, conf.low, conf.high, p.value) %>% 
  mutate_at(vars(estimate, conf.low, conf.high), ~round(exp(.), 2)) %>% 
  mutate(p.value=round(p.value, 3))
fixef_coefs <- fixef_coefs[1:10, ] # remove random effects from dataset

fixef_coefs
# A tibble: 10 x 5
   term                      estimate conf.low conf.high p.value
   <chr>                        <dbl>    <dbl>     <dbl>   <dbl>
 1 (Intercept)                   0.67     0.03     16.8    0.82 
 2 fu_time                       2.21     1.1       4.45   0.043
 3 age                           1.23     1.15      1.3    0    
 4 sexMale                       2.02     0.8       5.15   0.162
 5 hypertensionYes               0.4      0.16      1      0.07 
 6 total_cholesterol             0.58     0.37      0.91   0.032
 7 fu_time:age                   0.98     0.97      0.99   0.001
 8 fu_time:sexMale               1.08     0.9       1.29   0.42 
 9 fu_time:hypertensionYes       1.22     1.04      1.43   0.03 
10 fu_time:total_cholesterol     1.12     1.03      1.22   0.018

因此，随着时间的推移，年龄、高血压和总胆固醇与结果的变化相关，因为它们显示出与 fu_time 的显着相互作用。我的下一步是绘制例如高血压的边际效应，最好使用 ggemmeans/sjplot.

然而，我仍然不知道如何实现这一点。我已经通读了 ggeffects, the difference between ggeffects and ggemmeans, and plotting interaction effects in sjplot.

上的小插曲

我已经尝试了几种方法。

(1) 运行 ggemmeans(ris, type="random", terms="fu_time:age")，这给出了模型中不存在具有该名称的术语的错误。

(2) 运行 plot_model(ris, type = "int")。这会为协变量生成罐子，但这有两个问题：首先，这些图是在对数尺度上，而我希望它们在 back-transformed/exponentiated 尺度上，第二个我不确定这些是否是实际的与 fu_time.

的交互项

最好我想使用 ggemmeans 制作我的 own/a 新数据集（因此将连续变量保持在其均值，将分类变量保持在观察到的样本比例），这样我就可以对变量进行计算，然后自己绘制图表。

Answer 1

有点离题，但我建议 parameters::model_parameters() 用于汇总表：

library(parameters)
library(nlme)        
library(ggeffects)

ris <- lme(
  log_outcome ~ fu_time + age * fu_time + sex * fu_time + hypertension * fu_time + total_cholesterol * fu_time, 
  random =  ~ 1 + fu_time | ID, 
  data = mydata, 
  method = "ML"
)

model_parameters(ris, effects = "fixed", exponentiate = TRUE)
#> # Fixed Effects
#> 
#> Parameter                    | Coefficient |       SE |        95% CI |     t | df |      p
#> -------------------------------------------------------------------------------------------
#> (Intercept)                  |        0.67 |     1.17 | [0.03, 16.76] | -0.23 | 36 | 0.820 
#> fu time                      |        2.21 |     0.84 | [1.10,  4.45] |  2.09 | 36 | 0.043 
#> age                          |        1.23 |     0.04 | [1.15,  1.30] |  6.51 | 14 | < .001
#> sex [Male]                   |        2.02 |     0.97 | [0.80,  5.15] |  1.48 | 14 | 0.162 
#> hypertension [Yes]           |        0.40 |     0.19 | [0.16,  1.00] | -1.96 | 14 | 0.070 
#> total cholesterol            |        0.58 |     0.13 | [0.37,  0.91] | -2.37 | 14 | 0.032 
#> fu time * age                |        0.98 | 6.43e-03 | [0.97,  0.99] | -3.44 | 36 | 0.001 
#> fu time * sex [Male]         |        1.08 |     0.10 | [0.90,  1.29] |  0.82 | 36 | 0.420 
#> fu time * hypertension [Yes] |        1.22 |     0.11 | [1.04,  1.43] |  2.26 | 36 | 0.030 
#> fu time * total cholesterol  |        1.12 |     0.05 | [1.03,  1.22] |  2.47 | 36 | 0.018
#> 
#> Uncertainty intervals (equal-tailed) and p values (two-tailed) computed using a
#>   Wald t-distribution approximation.

^{由 reprex package (v2.0.1)}

于 2021-11-17 创建

关于你的情节，这是你要找的吗？

ggpredict(ris, c("age", "fu_time")) |> plot()

ggpredict(ris, c("hypertension", "fu_time")) |> plot()

ggpredict(ris, c("total_cholesterol", "fu_time")) |> plot()

^{由 reprex package (v2.0.1)}

于 2021-11-17 创建

使用 nlme、gfeffects 和 sjplot 绘制 lme 模型对重复测量数据的人口水平预测

Plotting population-level predictions from lme model on repeated measurements data using nlme, ggeffects, and sjplot

r

mixed-models

nlme

sjplot

marginal-effects