glmm:标准化与真实尺度变量表示模型
glmm: Standardize vs real-scale variables representation model
我在 glmm 调整之前标准化了我的输入变量,但在最终图中,我的变量和预测值的真实规模存在问题。在我的例子中:
我做:
#Packages
library(lme4)
library(ggplot2)
library(ggeffects)
library(tidyverse)
library(bbmle)
library(broom)
#Open my dataset
myds<-read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
str(myds)
# 'data.frame': 400 obs. of 4 variables:
# $ temp : num 0 0 0 0 0 0 0 0 0 0 ...
# $ storage : int 5 5 5 5 5 5 5 5 5 5 ...
# $ rep : chr "r1" "r2" "r3" "r4" ...
# $ development: int 0 23 22 27 24 25 24 22 0 22 ...
# Storage (days) is temporally correlated with temperature then mixed model
ds.scale<- myds %>%
mutate(across(c(temp, storage), ~ drop(scale(.))))
# Models creation Poisson/Negative binomial
m_1 <- glmer(development ~ temp + storage +
(1 | storage ), data = ds.scale,
family = "poisson")
m_2 <- glmer(development ~ poly(temp,2) + storage +
(1 | storage ), data = ds.scale,
family = "poisson")
m_3 <- glmer(development ~ poly(temp,2) + poly(storage,2) +
(1 | storage ), data = ds.scale,
family = "poisson")
m_4 <- glmer.nb(development ~ temp + storage +
(1 | storage ), data = ds.scale)
m_5 <- glmer.nb(development ~ poly(temp,2) + storage +
(1 | storage ), data = ds.scale)
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) +
(1 | storage ), data = ds.scale)
modList <- tibble::lst(m_1,m_2,m_3,m_4,m_5,m_6)
bbmle::AICtab(modList)
# dAIC df
# m_6 0.0 7
# m_3 1.0 6
# m_5 3.3 6
# m_2 5.0 5
# m_4 17.9 5
# m_1 21.0 4
# Plot the results for my better model (m_6)
mydf <- ggpredict(m_6, terms = c("temp [all]", "storage[all]"))
# For temp
ggplot(mydf, aes(x, predicted)) +
geom_point(data=myds, aes(temp, development), alpha = 0.5) +
geom_line() +
labs(x = "temp", y = "development")
# For storage
ggplot(mydf, aes(x, predicted)) +
geom_point(data=myds, aes(storage, development), alpha = 0.5) +
geom_line() +
labs(x = "storage", y = "development")
# -------------------------------------------------------------------------------------------
但我希望 temp
和 storage
变量的原始比例在我更好的模型 (m_6
) 中表示。
正确的方法是什么?
不要标准化我的输入变量,尽管有警告(Model is nearly unidentifiable: very large eigenvalue - Rescale variables?
)?
最后进行一些数据转换?
请问有什么帮助吗?
基本思想是将非标准化变量的范围值映射到标准化变量的范围值,然后使用 ggplot
中的 scale_x_...()
来更改标签。
library(datawizard)
library(lme4)
library(ggeffects)
library(ggplot2)
myds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
d.scale <- standardize(myds, select = c("temp", "storage"))
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) + (1 | storage ), data = d.scale)
# for temp
mydf <- ggpredict(m_6, terms = "temp [all]")
# retrieve center and scale from standardization
center_temp <- attributes(d.scale)$center["temp"]
scale_temp <- attributes(d.scale)$scale["temp"]
# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1, 2)
new_range <- round(scaled_range * scale_temp + center_temp)
# scaled range
plot(mydf, add.data = TRUE)
# original range
plot(mydf, add.data = TRUE) +
scale_x_continuous(
breaks = scaled_range,
labels = new_range
)
# 用于存储
mydf <- ggpredict(m_6, terms = "存储[全部]")
# retrieve center and scale from standardization
center_storage <- attributes(d.scale)$center["storage"]
scale_storage <- attributes(d.scale)$scale["storage"]
# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1)
new_range <- round(scaled_range * scale_storage + center_storage)
# scaled range
plot(mydf, add.data = TRUE)
# original range
plot(mydf, add.data = TRUE) +
scale_x_continuous(
breaks = scaled_range,
labels = new_range
)
我在 glmm 调整之前标准化了我的输入变量,但在最终图中,我的变量和预测值的真实规模存在问题。在我的例子中:
我做:
#Packages
library(lme4)
library(ggplot2)
library(ggeffects)
library(tidyverse)
library(bbmle)
library(broom)
#Open my dataset
myds<-read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
str(myds)
# 'data.frame': 400 obs. of 4 variables:
# $ temp : num 0 0 0 0 0 0 0 0 0 0 ...
# $ storage : int 5 5 5 5 5 5 5 5 5 5 ...
# $ rep : chr "r1" "r2" "r3" "r4" ...
# $ development: int 0 23 22 27 24 25 24 22 0 22 ...
# Storage (days) is temporally correlated with temperature then mixed model
ds.scale<- myds %>%
mutate(across(c(temp, storage), ~ drop(scale(.))))
# Models creation Poisson/Negative binomial
m_1 <- glmer(development ~ temp + storage +
(1 | storage ), data = ds.scale,
family = "poisson")
m_2 <- glmer(development ~ poly(temp,2) + storage +
(1 | storage ), data = ds.scale,
family = "poisson")
m_3 <- glmer(development ~ poly(temp,2) + poly(storage,2) +
(1 | storage ), data = ds.scale,
family = "poisson")
m_4 <- glmer.nb(development ~ temp + storage +
(1 | storage ), data = ds.scale)
m_5 <- glmer.nb(development ~ poly(temp,2) + storage +
(1 | storage ), data = ds.scale)
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) +
(1 | storage ), data = ds.scale)
modList <- tibble::lst(m_1,m_2,m_3,m_4,m_5,m_6)
bbmle::AICtab(modList)
# dAIC df
# m_6 0.0 7
# m_3 1.0 6
# m_5 3.3 6
# m_2 5.0 5
# m_4 17.9 5
# m_1 21.0 4
# Plot the results for my better model (m_6)
mydf <- ggpredict(m_6, terms = c("temp [all]", "storage[all]"))
# For temp
ggplot(mydf, aes(x, predicted)) +
geom_point(data=myds, aes(temp, development), alpha = 0.5) +
geom_line() +
labs(x = "temp", y = "development")
# For storage
ggplot(mydf, aes(x, predicted)) +
geom_point(data=myds, aes(storage, development), alpha = 0.5) +
geom_line() +
labs(x = "storage", y = "development")
# -------------------------------------------------------------------------------------------
但我希望 temp
和 storage
变量的原始比例在我更好的模型 (m_6
) 中表示。
正确的方法是什么?
不要标准化我的输入变量,尽管有警告(Model is nearly unidentifiable: very large eigenvalue - Rescale variables?
)?
最后进行一些数据转换?
请问有什么帮助吗?
基本思想是将非标准化变量的范围值映射到标准化变量的范围值,然后使用 ggplot
中的 scale_x_...()
来更改标签。
library(datawizard)
library(lme4)
library(ggeffects)
library(ggplot2)
myds <- read.csv("https://raw.githubusercontent.com/Leprechault/trash/main/ds.desenvol.csv")
d.scale <- standardize(myds, select = c("temp", "storage"))
m_6 <- glmer.nb(development ~ poly(temp,2) + poly(storage,2) + (1 | storage ), data = d.scale)
# for temp
mydf <- ggpredict(m_6, terms = "temp [all]")
# retrieve center and scale from standardization
center_temp <- attributes(d.scale)$center["temp"]
scale_temp <- attributes(d.scale)$scale["temp"]
# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1, 2)
new_range <- round(scaled_range * scale_temp + center_temp)
# scaled range
plot(mydf, add.data = TRUE)
# original range
plot(mydf, add.data = TRUE) +
scale_x_continuous(
breaks = scaled_range,
labels = new_range
)
# 用于存储
mydf <- ggpredict(m_6, terms = "存储[全部]")
# retrieve center and scale from standardization
center_storage <- attributes(d.scale)$center["storage"]
scale_storage <- attributes(d.scale)$scale["storage"]
# scaled range, calculate back to range of unstandardized
scaled_range <- c(-1, 0, 1)
new_range <- round(scaled_range * scale_storage + center_storage)
# scaled range
plot(mydf, add.data = TRUE)
# original range
plot(mydf, add.data = TRUE) +
scale_x_continuous(
breaks = scaled_range,
labels = new_range
)