glmm:标准化与真实尺度变量表示模型

glmm: Standardize vs real-scale variables representation model

我在 glmm 调整之前标准化了我的输入变量,但在最终图中,我的变量和预测值的真实规模存在问题。在我的例子中:

我做:

#Packages
library(lme4)
library(ggplot2)
library(ggeffects)
library(tidyverse)
library(bbmle) 
library(broom)

#Open my dataset
myds<-read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
str(myds)
# 'data.frame': 400 obs. of  4 variables:
#  $ temp       : num  0 0 0 0 0 0 0 0 0 0 ...
#  $ storage    : int  5 5 5 5 5 5 5 5 5 5 ...
#  $ rep        : chr  "r1" "r2" "r3" "r4" ...
#  $ development: int  0 23 22 27 24 25 24 22 0 22 ...

# Storage (days) is temporally correlated with temperature then mixed model
ds.scale<- myds %>%
  mutate(across(c(temp, storage), ~ drop(scale(.))))

# Models creation Poisson/Negative binomial
m_1 <- glmer(development ~ temp + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")
m_2 <- glmer(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_3 <- glmer(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale, 
                 family = "poisson")  
m_4 <- glmer.nb(development ~ temp + storage +
               (1 | storage ), data = ds.scale)
m_5 <- glmer.nb(development ~ poly(temp,2) + storage +
               (1 | storage ), data = ds.scale)  
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) +
               (1 | storage ), data = ds.scale)   
modList <- tibble::lst(m_1,m_2,m_3,m_4,m_5,m_6)
bbmle::AICtab(modList)

#     dAIC df
# m_6  0.0 7 
# m_3  1.0 6 
# m_5  3.3 6 
# m_2  5.0 5 
# m_4 17.9 5 
# m_1 21.0 4

# Plot the results for my better model (m_6)
mydf <- ggpredict(m_6, terms = c("temp [all]", "storage[all]"))

# For temp
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(temp, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "temp", y = "development")

# For storage
ggplot(mydf, aes(x, predicted)) +
  geom_point(data=myds, aes(storage, development), alpha = 0.5) + 
  geom_line() +
  labs(x = "storage", y = "development")
# -------------------------------------------------------------------------------------------  

但我希望 tempstorage 变量的原始比例在我更好的模型 (m_6) 中表示。 正确的方法是什么? 不要标准化我的输入变量,尽管有警告(Model is nearly unidentifiable: very large eigenvalue - Rescale variables?)? 最后进行一些数据转换?

请问有什么帮助吗?

基本思想是将非标准化变量的范围值映射到标准化变量的范围值,然后使用 ggplot 中的 scale_x_...() 来更改标签。

library(datawizard)
library(lme4)
library(ggeffects)
library(ggplot2)

myds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
d.scale <- standardize(myds, select = c("temp", "storage"))
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) + (1 | storage ), data = d.scale)


# for temp
mydf <- ggpredict(m_6, terms = "temp [all]")

# retrieve center and scale from standardization
center_temp <- attributes(d.scale)$center["temp"]
scale_temp <- attributes(d.scale)$scale["temp"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1, 2)
new_range <- round(scaled_range * scale_temp + center_temp)

# scaled range
plot(mydf, add.data = TRUE)

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )


# 用于存储 mydf <- ggpredict(m_6, terms = "存储[全部]")

# retrieve center and scale from standardization
center_storage <- attributes(d.scale)$center["storage"]
scale_storage <- attributes(d.scale)$scale["storage"]

# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1)
new_range <- round(scaled_range * scale_storage + center_storage)

# scaled range
plot(mydf, add.data = TRUE)

# original range
plot(mydf, add.data = TRUE) +
  scale_x_continuous(
    breaks = scaled_range,
    labels = new_range
  )