模拟具有特定比值比的重复测量二进制数据
Simulate repeated measures binary data with specific odds ratio
我正在尝试模拟一个二元结果,其中我在两个不同的时期(比如之前和之后)测量了 N 个受试者(具有特定于受试者的概率)。我想通过两个时期之间的特定优势比 (OR) 值来增加特定主题的概率。
模拟后,我使用 glm
和 lme4::glmer
检查我预定义的优势比是否被正确估计。我原以为只有 glm
估计的 OR 才会有偏差。然而,随着我预定义的 OR 值的增加,lme4::glmer
估计的 OR 也有偏差。 我该如何纠正这种偏见?
非常感谢,
下面是我的模拟
rm(list=ls(all=TRUE))
library(lme4)
library(ggplot2)
N = 2000 #Number of subjects
X = 1:20 #Odds ratio values tested
set.seed(20)
P = runif(N,-4,4) #Subject-specific probability (in logit scale)
#Vectors that will be used to create a data frame
ind = rep(paste0("Sub",1:N),2) #Vector of individuals
x1 = c(rep(0,N),rep(1,N)) #Categorical Predictor Variable x1
OR.glm = NULL;OR.glmer = NULL
#Loop over X
for (OR in X){
value = rbinom(N,1,plogis(P)) #Simulating values for x1=0
value.simu = rbinom(N,1,plogis(P+log(OR))) #Simulating values for x1=1
df = data.frame(ind=ind,y=c(value,value.simu),x1=x1) #Creating data frame
#Using glm
GLM = glm(y~factor(x1),data=df,family="binomial")
OR.glm = c(OR.glm,exp(GLM$coef[2]))
#Using glmer for each subject
GLMER = glmer(y~factor(x1)+(1|ind),data=df,family="binomial")
OR.glmer = c(OR.glmer,exp(summary(GLMER)$coef[2,1]))
}
DF = data.frame(method = rep(c("glm","glmer"),each=length(X)),
data = c(OR.glm,OR.glmer),x = rep(X,2))
ggplot(DF,aes(x = x,y = data,group=method, colour=method))+ theme_bw()+
geom_point() + stat_smooth(method = 'loess') +
geom_abline(slope=1, intercept=0) + ylim(0, max(X)) + xlim(0, max(X)) +
xlab("Expected OR") + ylab("Observed OR")
据我所知,你没有模拟正态随机效应,这是glmer()
拟合的混合效应逻辑回归模型背后的假设。
下面的代码模拟具有正常随机效应的数据,并用 glmer()
of lme4 和 mixed_model()
of GLMMadaptive 拟合模型],默认情况下在估计中使用自适应高斯正交(故意代码与固定和随机效应的设计矩阵一起使用,以便在需要时更容易扩展它):
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
# we constuct a data frame with the design:
DF <- data.frame(id = rep(seq_len(n), each = K),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects
X <- model.matrix(~ sex, data = DF)
Z <- model.matrix(~ 1, data = DF)
betas <- c(-2.13, 1) # fixed effects coefficients
D11 <- 0.48 # variance of random intercepts
# we simulate random effects
b <- rnorm(n, sd = sqrt(D11))
# linear predictor
eta_y <- drop(X %*% betas + rowSums(Z * b[DF$id]))
# we simulate binary longitudinal data
DF$y <- rbinom(n * K, 1, plogis(eta_y))
###############################################################################
library("lme4")
fm <- glmer(y ~ sex + (1 | id), data = DF, family = binomial())
summary(fm)
library("GLMMadaptive")
gm <- mixed_model(y ~ sex, random = ~ 1 | id, data = DF, family = binomial())
summary(gm)
我正在尝试模拟一个二元结果,其中我在两个不同的时期(比如之前和之后)测量了 N 个受试者(具有特定于受试者的概率)。我想通过两个时期之间的特定优势比 (OR) 值来增加特定主题的概率。
模拟后,我使用 glm
和 lme4::glmer
检查我预定义的优势比是否被正确估计。我原以为只有 glm
估计的 OR 才会有偏差。然而,随着我预定义的 OR 值的增加,lme4::glmer
估计的 OR 也有偏差。 我该如何纠正这种偏见?
非常感谢,
下面是我的模拟
rm(list=ls(all=TRUE))
library(lme4)
library(ggplot2)
N = 2000 #Number of subjects
X = 1:20 #Odds ratio values tested
set.seed(20)
P = runif(N,-4,4) #Subject-specific probability (in logit scale)
#Vectors that will be used to create a data frame
ind = rep(paste0("Sub",1:N),2) #Vector of individuals
x1 = c(rep(0,N),rep(1,N)) #Categorical Predictor Variable x1
OR.glm = NULL;OR.glmer = NULL
#Loop over X
for (OR in X){
value = rbinom(N,1,plogis(P)) #Simulating values for x1=0
value.simu = rbinom(N,1,plogis(P+log(OR))) #Simulating values for x1=1
df = data.frame(ind=ind,y=c(value,value.simu),x1=x1) #Creating data frame
#Using glm
GLM = glm(y~factor(x1),data=df,family="binomial")
OR.glm = c(OR.glm,exp(GLM$coef[2]))
#Using glmer for each subject
GLMER = glmer(y~factor(x1)+(1|ind),data=df,family="binomial")
OR.glmer = c(OR.glmer,exp(summary(GLMER)$coef[2,1]))
}
DF = data.frame(method = rep(c("glm","glmer"),each=length(X)),
data = c(OR.glm,OR.glmer),x = rep(X,2))
ggplot(DF,aes(x = x,y = data,group=method, colour=method))+ theme_bw()+
geom_point() + stat_smooth(method = 'loess') +
geom_abline(slope=1, intercept=0) + ylim(0, max(X)) + xlim(0, max(X)) +
xlab("Expected OR") + ylab("Observed OR")
据我所知,你没有模拟正态随机效应,这是glmer()
拟合的混合效应逻辑回归模型背后的假设。
下面的代码模拟具有正常随机效应的数据,并用 glmer()
of lme4 和 mixed_model()
of GLMMadaptive 拟合模型],默认情况下在估计中使用自适应高斯正交(故意代码与固定和随机效应的设计矩阵一起使用,以便在需要时更容易扩展它):
set.seed(1234)
n <- 100 # number of subjects
K <- 8 # number of measurements per subject
# we constuct a data frame with the design:
DF <- data.frame(id = rep(seq_len(n), each = K),
sex = rep(gl(2, n/2, labels = c("male", "female")), each = K))
# design matrices for the fixed and random effects
X <- model.matrix(~ sex, data = DF)
Z <- model.matrix(~ 1, data = DF)
betas <- c(-2.13, 1) # fixed effects coefficients
D11 <- 0.48 # variance of random intercepts
# we simulate random effects
b <- rnorm(n, sd = sqrt(D11))
# linear predictor
eta_y <- drop(X %*% betas + rowSums(Z * b[DF$id]))
# we simulate binary longitudinal data
DF$y <- rbinom(n * K, 1, plogis(eta_y))
###############################################################################
library("lme4")
fm <- glmer(y ~ sex + (1 | id), data = DF, family = binomial())
summary(fm)
library("GLMMadaptive")
gm <- mixed_model(y ~ sex, random = ~ 1 | id, data = DF, family = binomial())
summary(gm)