在 R 中使用 ggpredict 创建绘图时如何反向转换用 log1p 转换的变量

How to backtransform variables transformed with log1p when creating a plot using ggpredict in R

我拟合了一个 glm 模型,不得不用 log1p 转换一些变量。我现在想创建一个带有反向转换比例的 ggpredict 图。我在使用 glm 函数之前转换了变量。

这是我的原始数据和代码的几个变量示例:

library(tidyverse)
library(dplyr) 
library(lme4)
library(plyr)

rsf.data <- read.csv('2nd_rsf_data_summer.csv') %>% 
  dplyr::select(case,sex,slope,DI.water,DI.forest,tree.cover,
                DI.settle,NDVI)

 case sex        slope   DI.water DI.forest tree.cover DI.settle     NDVI
1      1   m 12.667814255 1459.07495    0.0000         88  141.4214 7260.152
2      1   m 26.927335739   22.36068    0.0000         82  200.0000 6721.043
3      1   m 25.230054855  478.01672    0.0000         85  565.6854 7038.326
4      1   m 28.119552612  247.58836    0.0000         83  360.5551 6963.435
5      1   m  6.043220520 1266.05688    0.0000         85  412.3105 6563.739
6      1   m 23.913673401  325.57642    0.0000         88  300.0000 6613.870
7      1   m 23.718715668  155.56349    0.0000         84  223.6068 6613.870
8      1   m 10.384339333  850.94061    0.0000         82  447.2136 6880.652
9      1   m 35.839290619  206.15527    0.0000         91  223.6068 6878.457
10     1   m  9.129651070  999.29980    0.0000         98  583.0952 6985.435
11     1   m  9.918550491 1435.89697  100.0000          0  223.6068 6913.783
12     1   m 10.238316536  820.79230    0.0000         82  447.2136 6880.652
13     1   m 23.787746429  607.45367    0.0000         86  670.8204 7038.326
14     1   m 11.453801155 1221.47449    0.0000         90  412.3105 6420.804
15     1   m 38.264831543  828.79431    0.0000         89  670.8204 6985.978
16     1   m 51.730064392  150.33296    0.0000         85  141.4214 6748.283
17     1   m 15.523488045  942.60278    0.0000         97  538.5165 5681.152
18     1   m 18.852766037 1709.41516    0.0000         65  707.1068 6067.348
19     1   m 52.105167389  322.49030    0.0000         82  223.6068 6232.391
20     1   m 20.871398926  778.97369    0.0000         58  538.5165 6517.522
21     1   m 33.949001312  680.95520    0.0000         79  640.3124 6333.848
22     1   m  3.358919621 1261.94299    0.0000         93 1170.4700 5883.870
23     1   m 32.564548492  910.65912    0.0000         81  800.0000 6417.435
24     1   m 12.367906570  957.75781  100.0000         17  300.0000 5673.522
25     1   m  3.392679691 1861.18237  100.0000         92 1216.5525 6276.652
26     1   m 18.972101212 1380.50720  100.0000          0  761.5773 6351.935
27     1   m  7.946177006  870.22986  100.0000          7  943.3981 6066.652
28     1   m  5.672803402 1533.88403  100.0000          0  282.8427 6986.696
29     1   m 25.292509079 1202.41431    0.0000         93  100.0000 5894.043
30     1   m 27.800992966  114.01755  100.0000          0  316.2278 5394.587
31     1   m 47.638587952  692.96460    0.0000         77  447.2136 5822.543
32     1   m  8.413535118 1272.79224  200.0000          0  282.8427 5510.739
33     1   m  5.348886967 1092.74878    0.0000         82  538.5165 6180.304
34     1   m 52.188602448  486.00409  316.2278          0  141.4214 4732.783
35     1   m 46.540889740  901.38782    0.0000         76 1204.1594 7246.087
36     1   m  4.446354389 1223.31519  100.0000          4  282.8427 5843.935
37     1   m 38.106906891  614.00323    0.0000         74  509.9019 6512.174
38     1   m 15.907997131 1830.95593    0.0000         82  781.0250 6358.783
39     1   m 10.993578911 1762.32800    0.0000         78 1264.9111 6162.500
40     1   m 15.220952988 1265.89893  100.0000          3 1140.1754 6416.239
41     1   m 55.204212189  576.28119  200.0000          0  360.5551 4888.696
42     1   m 10.736002922  973.24207    0.0000         25  900.0000 6280.826
43     1   m 35.273666382  483.01141    0.0000         88  424.2640 6263.761
44     1   m 19.182659149 1366.93091    0.0000         85 1280.6249 6132.739
45     1   m 22.921430588  411.46082  100.0000         23  360.5551 6080.109
46     1   f 30.371747971  650.69189    0.0000          0  316.2278 5170.457
47     1   f 45.077022552  353.55341  360.5551          0  200.0000 4320.413
48     1   f 41.561599731  616.19800  200.0000          0  282.8427 4688.870
49     1   f  1.623542666 1300.96118    0.0000         82 1264.9111 5883.870
50     1   f 41.779499054 1109.23389    0.0000         82  900.0000 6782.087

#log+1 transformations
rsf.data$DI.water.log <- log1p(rsf.data$DI.water)
rsf.data$DI.forest.log <- log1p(rsf.data$DI.forest)
rsf.data$DI.settle.log <- log1p(rsf.data$DI.settle)

#standardize
rsf.data[2:5] <- as.data.frame(scale(rsf.data[2:5]))

#set NAs to 0
rsf.data[is.na(rsf.data)] <- 0

#create sex column for interaction terms
rsf.data <- mutate(rsf.data, 
                   sex. = case_when(sex %in% c("f") ~ "1",TRUE ~ "0"))

rsf.data$sex. <- as.factor(rsf.data$sex.)

#glm
glm.closed6 <- glm(case ~ sex.*(poly(DI.water.log,2) + DI.forest.log + DI.settle.log + NDVI) + slope,  data=rsf.data, family=binomial(link="logit"))
summary(glm.closed6)

library(magrittr)
library(ggeffects)
library(sjmisc)
library(splines)
library(sjPlot)
library(ggplot2)

# plot ggpredict
ggpredict(glm.global1, c("DI.settle.log", "sex.")) %>% plot()

这就是输出,几乎是我想要的,但我希望通过将其返回转换来使轴更有意义。

我试图将 log1p 包含到 glm 函数中而不是之前进行转换,但这意味着我还必须包含缩放比例,这似乎是 ggpredict 的问题。另外,这意味着我在转换和缩放之前将所有 NA 都变为 0,这不是我想要的。

glm.closed99 <- glm(case ~ sex.*(scale(poly(log1p(DI.water),2)) + scale(log1p(DI.forest)) + scale(log1p(DI.settle)) + scale(NDVI)) + scale(slope),  data=rsf.data, family=binomial(link="logit"))

ggpredict(glm.closed99 , c("DI.settle", "sex.")) %>% plot()

Error in h(simpleError(msg, call)) : 
error in evaluating the argument 'x' in selecting a method for function 'plot': error in evaluating the argument 'x' in selecting a method for function 'scale': 'degree' must be less than number of unique points

有没有办法将反向转换包含到 ggpredict 函数中?

当我无法使用 built-in 绘图方法来执行我想要的操作时,我会使用 ggpredict() 来获取预测值并自己构建绘图。我无法让您的模型工作(您只在 case=1 处给了我们回复)所以我正在编写我自己的稍微简单的示例。

加载包并适配模型:

library(tidyverse)
library(ggeffects)
mm <- (mtcars
  %>% mutate(across(hp, ~log1p(. - min(.))),
             across(am, factor))
)
m1 <- lm(mpg ~ hp * am, data = mm)

基地ggpredict情节:

gg0 <- plot(ggpredict(m1))

获取预测值(沿 x-axis 的间距比默认值更细,因为一旦我们 back-transform 就会绘制曲线),并且 back-transform x-axis 变量通过手:

dd <- (ggpredict(m1, terms = c("hp [n=50]", "am"))
  %>% as_tibble()
  %>% mutate(xo = expm1(x))
)

geom_line()geom_ribbon() 构建一个图:

gg1 <- (ggplot(dd)
  + aes(xo, predicted, ymin = conf.low, ymax = conf.high,
        colour  = factor(group), fill = factor(group))
  + geom_line()
  + geom_ribbon(colour = NA, alpha = 0.3)
)

相同的图,但具有变换的 x-axis 比例:

gg2 <- gg1 + scale_x_continuous(trans = "log1p")

合并:

cowplot::plot_grid(gg0, gg1, gg2, nrow = 1)

您可以看到这有点难看 — 恢复原始轴标签等还有一些工作要做 — 但这是基本思路。