在 R 中使用 ggpredict 创建绘图时如何反向转换用 log1p 转换的变量
How to backtransform variables transformed with log1p when creating a plot using ggpredict in R
我拟合了一个 glm 模型,不得不用 log1p 转换一些变量。我现在想创建一个带有反向转换比例的 ggpredict 图。我在使用 glm 函数之前转换了变量。
这是我的原始数据和代码的几个变量示例:
library(tidyverse)
library(dplyr)
library(lme4)
library(plyr)
rsf.data <- read.csv('2nd_rsf_data_summer.csv') %>%
dplyr::select(case,sex,slope,DI.water,DI.forest,tree.cover,
DI.settle,NDVI)
case sex slope DI.water DI.forest tree.cover DI.settle NDVI
1 1 m 12.667814255 1459.07495 0.0000 88 141.4214 7260.152
2 1 m 26.927335739 22.36068 0.0000 82 200.0000 6721.043
3 1 m 25.230054855 478.01672 0.0000 85 565.6854 7038.326
4 1 m 28.119552612 247.58836 0.0000 83 360.5551 6963.435
5 1 m 6.043220520 1266.05688 0.0000 85 412.3105 6563.739
6 1 m 23.913673401 325.57642 0.0000 88 300.0000 6613.870
7 1 m 23.718715668 155.56349 0.0000 84 223.6068 6613.870
8 1 m 10.384339333 850.94061 0.0000 82 447.2136 6880.652
9 1 m 35.839290619 206.15527 0.0000 91 223.6068 6878.457
10 1 m 9.129651070 999.29980 0.0000 98 583.0952 6985.435
11 1 m 9.918550491 1435.89697 100.0000 0 223.6068 6913.783
12 1 m 10.238316536 820.79230 0.0000 82 447.2136 6880.652
13 1 m 23.787746429 607.45367 0.0000 86 670.8204 7038.326
14 1 m 11.453801155 1221.47449 0.0000 90 412.3105 6420.804
15 1 m 38.264831543 828.79431 0.0000 89 670.8204 6985.978
16 1 m 51.730064392 150.33296 0.0000 85 141.4214 6748.283
17 1 m 15.523488045 942.60278 0.0000 97 538.5165 5681.152
18 1 m 18.852766037 1709.41516 0.0000 65 707.1068 6067.348
19 1 m 52.105167389 322.49030 0.0000 82 223.6068 6232.391
20 1 m 20.871398926 778.97369 0.0000 58 538.5165 6517.522
21 1 m 33.949001312 680.95520 0.0000 79 640.3124 6333.848
22 1 m 3.358919621 1261.94299 0.0000 93 1170.4700 5883.870
23 1 m 32.564548492 910.65912 0.0000 81 800.0000 6417.435
24 1 m 12.367906570 957.75781 100.0000 17 300.0000 5673.522
25 1 m 3.392679691 1861.18237 100.0000 92 1216.5525 6276.652
26 1 m 18.972101212 1380.50720 100.0000 0 761.5773 6351.935
27 1 m 7.946177006 870.22986 100.0000 7 943.3981 6066.652
28 1 m 5.672803402 1533.88403 100.0000 0 282.8427 6986.696
29 1 m 25.292509079 1202.41431 0.0000 93 100.0000 5894.043
30 1 m 27.800992966 114.01755 100.0000 0 316.2278 5394.587
31 1 m 47.638587952 692.96460 0.0000 77 447.2136 5822.543
32 1 m 8.413535118 1272.79224 200.0000 0 282.8427 5510.739
33 1 m 5.348886967 1092.74878 0.0000 82 538.5165 6180.304
34 1 m 52.188602448 486.00409 316.2278 0 141.4214 4732.783
35 1 m 46.540889740 901.38782 0.0000 76 1204.1594 7246.087
36 1 m 4.446354389 1223.31519 100.0000 4 282.8427 5843.935
37 1 m 38.106906891 614.00323 0.0000 74 509.9019 6512.174
38 1 m 15.907997131 1830.95593 0.0000 82 781.0250 6358.783
39 1 m 10.993578911 1762.32800 0.0000 78 1264.9111 6162.500
40 1 m 15.220952988 1265.89893 100.0000 3 1140.1754 6416.239
41 1 m 55.204212189 576.28119 200.0000 0 360.5551 4888.696
42 1 m 10.736002922 973.24207 0.0000 25 900.0000 6280.826
43 1 m 35.273666382 483.01141 0.0000 88 424.2640 6263.761
44 1 m 19.182659149 1366.93091 0.0000 85 1280.6249 6132.739
45 1 m 22.921430588 411.46082 100.0000 23 360.5551 6080.109
46 1 f 30.371747971 650.69189 0.0000 0 316.2278 5170.457
47 1 f 45.077022552 353.55341 360.5551 0 200.0000 4320.413
48 1 f 41.561599731 616.19800 200.0000 0 282.8427 4688.870
49 1 f 1.623542666 1300.96118 0.0000 82 1264.9111 5883.870
50 1 f 41.779499054 1109.23389 0.0000 82 900.0000 6782.087
#log+1 transformations
rsf.data$DI.water.log <- log1p(rsf.data$DI.water)
rsf.data$DI.forest.log <- log1p(rsf.data$DI.forest)
rsf.data$DI.settle.log <- log1p(rsf.data$DI.settle)
#standardize
rsf.data[2:5] <- as.data.frame(scale(rsf.data[2:5]))
#set NAs to 0
rsf.data[is.na(rsf.data)] <- 0
#create sex column for interaction terms
rsf.data <- mutate(rsf.data,
sex. = case_when(sex %in% c("f") ~ "1",TRUE ~ "0"))
rsf.data$sex. <- as.factor(rsf.data$sex.)
#glm
glm.closed6 <- glm(case ~ sex.*(poly(DI.water.log,2) + DI.forest.log + DI.settle.log + NDVI) + slope, data=rsf.data, family=binomial(link="logit"))
summary(glm.closed6)
library(magrittr)
library(ggeffects)
library(sjmisc)
library(splines)
library(sjPlot)
library(ggplot2)
# plot ggpredict
ggpredict(glm.global1, c("DI.settle.log", "sex.")) %>% plot()
这就是输出,几乎是我想要的,但我希望通过将其返回转换来使轴更有意义。
我试图将 log1p 包含到 glm 函数中而不是之前进行转换,但这意味着我还必须包含缩放比例,这似乎是 ggpredict 的问题。另外,这意味着我在转换和缩放之前将所有 NA 都变为 0,这不是我想要的。
glm.closed99 <- glm(case ~ sex.*(scale(poly(log1p(DI.water),2)) + scale(log1p(DI.forest)) + scale(log1p(DI.settle)) + scale(NDVI)) + scale(slope), data=rsf.data, family=binomial(link="logit"))
ggpredict(glm.closed99 , c("DI.settle", "sex.")) %>% plot()
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'plot': error in evaluating the argument 'x' in selecting a method for function 'scale': 'degree' must be less than number of unique points
有没有办法将反向转换包含到 ggpredict 函数中?
当我无法使用 built-in 绘图方法来执行我想要的操作时,我会使用 ggpredict()
来获取预测值并自己构建绘图。我无法让您的模型工作(您只在 case=1
处给了我们回复)所以我正在编写我自己的稍微简单的示例。
加载包并适配模型:
library(tidyverse)
library(ggeffects)
mm <- (mtcars
%>% mutate(across(hp, ~log1p(. - min(.))),
across(am, factor))
)
m1 <- lm(mpg ~ hp * am, data = mm)
基地ggpredict
情节:
gg0 <- plot(ggpredict(m1))
获取预测值(沿 x-axis 的间距比默认值更细,因为一旦我们 back-transform 就会绘制曲线),并且 back-transform x-axis 变量通过手:
dd <- (ggpredict(m1, terms = c("hp [n=50]", "am"))
%>% as_tibble()
%>% mutate(xo = expm1(x))
)
用 geom_line()
和 geom_ribbon()
构建一个图:
gg1 <- (ggplot(dd)
+ aes(xo, predicted, ymin = conf.low, ymax = conf.high,
colour = factor(group), fill = factor(group))
+ geom_line()
+ geom_ribbon(colour = NA, alpha = 0.3)
)
相同的图,但具有变换的 x-axis 比例:
gg2 <- gg1 + scale_x_continuous(trans = "log1p")
合并:
cowplot::plot_grid(gg0, gg1, gg2, nrow = 1)
您可以看到这有点难看 — 恢复原始轴标签等还有一些工作要做 — 但这是基本思路。
我拟合了一个 glm 模型,不得不用 log1p 转换一些变量。我现在想创建一个带有反向转换比例的 ggpredict 图。我在使用 glm 函数之前转换了变量。
这是我的原始数据和代码的几个变量示例:
library(tidyverse)
library(dplyr)
library(lme4)
library(plyr)
rsf.data <- read.csv('2nd_rsf_data_summer.csv') %>%
dplyr::select(case,sex,slope,DI.water,DI.forest,tree.cover,
DI.settle,NDVI)
case sex slope DI.water DI.forest tree.cover DI.settle NDVI
1 1 m 12.667814255 1459.07495 0.0000 88 141.4214 7260.152
2 1 m 26.927335739 22.36068 0.0000 82 200.0000 6721.043
3 1 m 25.230054855 478.01672 0.0000 85 565.6854 7038.326
4 1 m 28.119552612 247.58836 0.0000 83 360.5551 6963.435
5 1 m 6.043220520 1266.05688 0.0000 85 412.3105 6563.739
6 1 m 23.913673401 325.57642 0.0000 88 300.0000 6613.870
7 1 m 23.718715668 155.56349 0.0000 84 223.6068 6613.870
8 1 m 10.384339333 850.94061 0.0000 82 447.2136 6880.652
9 1 m 35.839290619 206.15527 0.0000 91 223.6068 6878.457
10 1 m 9.129651070 999.29980 0.0000 98 583.0952 6985.435
11 1 m 9.918550491 1435.89697 100.0000 0 223.6068 6913.783
12 1 m 10.238316536 820.79230 0.0000 82 447.2136 6880.652
13 1 m 23.787746429 607.45367 0.0000 86 670.8204 7038.326
14 1 m 11.453801155 1221.47449 0.0000 90 412.3105 6420.804
15 1 m 38.264831543 828.79431 0.0000 89 670.8204 6985.978
16 1 m 51.730064392 150.33296 0.0000 85 141.4214 6748.283
17 1 m 15.523488045 942.60278 0.0000 97 538.5165 5681.152
18 1 m 18.852766037 1709.41516 0.0000 65 707.1068 6067.348
19 1 m 52.105167389 322.49030 0.0000 82 223.6068 6232.391
20 1 m 20.871398926 778.97369 0.0000 58 538.5165 6517.522
21 1 m 33.949001312 680.95520 0.0000 79 640.3124 6333.848
22 1 m 3.358919621 1261.94299 0.0000 93 1170.4700 5883.870
23 1 m 32.564548492 910.65912 0.0000 81 800.0000 6417.435
24 1 m 12.367906570 957.75781 100.0000 17 300.0000 5673.522
25 1 m 3.392679691 1861.18237 100.0000 92 1216.5525 6276.652
26 1 m 18.972101212 1380.50720 100.0000 0 761.5773 6351.935
27 1 m 7.946177006 870.22986 100.0000 7 943.3981 6066.652
28 1 m 5.672803402 1533.88403 100.0000 0 282.8427 6986.696
29 1 m 25.292509079 1202.41431 0.0000 93 100.0000 5894.043
30 1 m 27.800992966 114.01755 100.0000 0 316.2278 5394.587
31 1 m 47.638587952 692.96460 0.0000 77 447.2136 5822.543
32 1 m 8.413535118 1272.79224 200.0000 0 282.8427 5510.739
33 1 m 5.348886967 1092.74878 0.0000 82 538.5165 6180.304
34 1 m 52.188602448 486.00409 316.2278 0 141.4214 4732.783
35 1 m 46.540889740 901.38782 0.0000 76 1204.1594 7246.087
36 1 m 4.446354389 1223.31519 100.0000 4 282.8427 5843.935
37 1 m 38.106906891 614.00323 0.0000 74 509.9019 6512.174
38 1 m 15.907997131 1830.95593 0.0000 82 781.0250 6358.783
39 1 m 10.993578911 1762.32800 0.0000 78 1264.9111 6162.500
40 1 m 15.220952988 1265.89893 100.0000 3 1140.1754 6416.239
41 1 m 55.204212189 576.28119 200.0000 0 360.5551 4888.696
42 1 m 10.736002922 973.24207 0.0000 25 900.0000 6280.826
43 1 m 35.273666382 483.01141 0.0000 88 424.2640 6263.761
44 1 m 19.182659149 1366.93091 0.0000 85 1280.6249 6132.739
45 1 m 22.921430588 411.46082 100.0000 23 360.5551 6080.109
46 1 f 30.371747971 650.69189 0.0000 0 316.2278 5170.457
47 1 f 45.077022552 353.55341 360.5551 0 200.0000 4320.413
48 1 f 41.561599731 616.19800 200.0000 0 282.8427 4688.870
49 1 f 1.623542666 1300.96118 0.0000 82 1264.9111 5883.870
50 1 f 41.779499054 1109.23389 0.0000 82 900.0000 6782.087
#log+1 transformations
rsf.data$DI.water.log <- log1p(rsf.data$DI.water)
rsf.data$DI.forest.log <- log1p(rsf.data$DI.forest)
rsf.data$DI.settle.log <- log1p(rsf.data$DI.settle)
#standardize
rsf.data[2:5] <- as.data.frame(scale(rsf.data[2:5]))
#set NAs to 0
rsf.data[is.na(rsf.data)] <- 0
#create sex column for interaction terms
rsf.data <- mutate(rsf.data,
sex. = case_when(sex %in% c("f") ~ "1",TRUE ~ "0"))
rsf.data$sex. <- as.factor(rsf.data$sex.)
#glm
glm.closed6 <- glm(case ~ sex.*(poly(DI.water.log,2) + DI.forest.log + DI.settle.log + NDVI) + slope, data=rsf.data, family=binomial(link="logit"))
summary(glm.closed6)
library(magrittr)
library(ggeffects)
library(sjmisc)
library(splines)
library(sjPlot)
library(ggplot2)
# plot ggpredict
ggpredict(glm.global1, c("DI.settle.log", "sex.")) %>% plot()
这就是输出,几乎是我想要的,但我希望通过将其返回转换来使轴更有意义。
我试图将 log1p 包含到 glm 函数中而不是之前进行转换,但这意味着我还必须包含缩放比例,这似乎是 ggpredict 的问题。另外,这意味着我在转换和缩放之前将所有 NA 都变为 0,这不是我想要的。
glm.closed99 <- glm(case ~ sex.*(scale(poly(log1p(DI.water),2)) + scale(log1p(DI.forest)) + scale(log1p(DI.settle)) + scale(NDVI)) + scale(slope), data=rsf.data, family=binomial(link="logit"))
ggpredict(glm.closed99 , c("DI.settle", "sex.")) %>% plot()
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'plot': error in evaluating the argument 'x' in selecting a method for function 'scale': 'degree' must be less than number of unique points
有没有办法将反向转换包含到 ggpredict 函数中?
当我无法使用 built-in 绘图方法来执行我想要的操作时,我会使用 ggpredict()
来获取预测值并自己构建绘图。我无法让您的模型工作(您只在 case=1
处给了我们回复)所以我正在编写我自己的稍微简单的示例。
加载包并适配模型:
library(tidyverse)
library(ggeffects)
mm <- (mtcars
%>% mutate(across(hp, ~log1p(. - min(.))),
across(am, factor))
)
m1 <- lm(mpg ~ hp * am, data = mm)
基地ggpredict
情节:
gg0 <- plot(ggpredict(m1))
获取预测值(沿 x-axis 的间距比默认值更细,因为一旦我们 back-transform 就会绘制曲线),并且 back-transform x-axis 变量通过手:
dd <- (ggpredict(m1, terms = c("hp [n=50]", "am"))
%>% as_tibble()
%>% mutate(xo = expm1(x))
)
用 geom_line()
和 geom_ribbon()
构建一个图:
gg1 <- (ggplot(dd)
+ aes(xo, predicted, ymin = conf.low, ymax = conf.high,
colour = factor(group), fill = factor(group))
+ geom_line()
+ geom_ribbon(colour = NA, alpha = 0.3)
)
相同的图,但具有变换的 x-axis 比例:
gg2 <- gg1 + scale_x_continuous(trans = "log1p")
合并:
cowplot::plot_grid(gg0, gg1, gg2, nrow = 1)
您可以看到这有点难看 — 恢复原始轴标签等还有一些工作要做 — 但这是基本思路。