具有群体效应的二项式回归

Question

我正在尝试使用 rstan 构建二项式回归模型。

目的是得到一组t[=36]中两个条件X之间的效果大小bt =] 总和效应大小 btg 对于子组 g。

library(rstan) df <- data.frame(hits=c(36,1261,36,1261,49,1248,17,7670,25,759,29,755),trials=c(118,53850,184,53784,209,53759,118,53850,184,53784,209,53759) ,X=rep(c(1,0),6),g=rep(rep(1:3, each=2),2),t=rep(1:2,each=6),tg=rep(1:6,each=2) ) stanIn <- list(Nt=length(unique(df$t)), #number of groups t Nc=length(df$t), #number of rows Ng=length(unique(df$g)), #number of subgroups g Ntg=length(unique(df$tg)), #number of t and g combinations N=df$trials, n=df$hits, X=df$X, #condition 1 or 0 t=df$t, #index of groups g=df$g, #index of subgroups tg=df$tg) #index of combinations between t and g model <- stan(data=stanIn, file="minimal.stan", chains = 4)

和minimal.stan如下。

data { int<lower=1> Nt; int<lower=1> Nc; int<lower=1> Ng; int<lower=1> Ntg; int<lower=1> N[Nc]; int<lower=0> n[Nc]; int<lower=0,upper=1> X[Nc]; int<lower=1> t[Nc]; int<lower=0> g[Nc]; int<lower=1> tg[Nc]; } parameters { real at[Nt]; // group intercepts real bt[Nt]; // group slopes real btg[Ntg]; // subgroup slopes real atg[Ntg]; // subgroup intercept } transformed parameters { vector[Nc] theta; // binomial probabilities for (i in 1:Nc) { // linear model theta[i] = inv_logit( (atg[tg[i]]+at[t[i]] ) + (bt[t[i]]+btg[tg[i]]) * X[i]); //theta[i] = inv_logit(at[t[i]] + bt[t[i]] * X[i]); //group effect //theta[i] = inv_logit(atg[tg[i]] + btg[tg[i]] * X[i]); //subgroup effects } } model { at ~ normal(0.0, 20.0); bt ~ normal(0.0, 20.0); atg ~ normal(0.0, 20.0); btg ~ normal(0.0, 20.0); n ~ binomial(N, theta); }

我可以用第一个注释行（在 转换后的参数) 和第二个注释行的子组效应。这个想法是将两者结合起来以获得群体效应和个体群体的偏差（第一行）。

然而，这给 bt 和 btg (A) 带来了非常奇怪的结果，我期待更像 (B) （我无法用最小的例子重现 A 中看到的行为，这只发生在完整的数据集中。）

如果从问题类型上看不明显，我是统计建模的新手，怀疑我有概念错误。因此，我将不胜感激有关此问题的任何提示或阅读这些内容的来源（感觉很常见，但我没有找到任何东西）。

Answer 1

我看到了几个问题。最直接的，模型没有被识别为参数atg和btg包括at和bt。我已将它们更改为 ag 和 bg，因为它们是您的子组参数，并将它们编入索引，如下所示：

library(rstan)

df <- data.frame(hits   = c(36, 1261, 36, 1261, 49, 1248, 
                            17, 7670, 25, 759, 29, 755),
                 trials = c(118, 53850, 184, 53784, 209, 53759, 
                            118, 53850, 184, 53784, 209, 53759),
                 X  = rep(c(1,0), 6),
                 g  = rep(rep(1:3, each=2), 2),
                 t  = rep(1:2, each=6),
                 tg = rep(1:6, each=2) )

stanIn <- list(Nt = length(unique(df$t)), #number of groups t
               Nc = length(df$t),         #number of rows
               Ng = length(unique(df$g)), #number of subgroups g
               N  = df$trials, 
               n  = df$hits,
               X  = df$X,                 #condition 1 or 0
               t  = df$t,                 #index of groups
               g  = df$g)                 #index of subgroups

model <- stan(data = stanIn, file = "minimal.stan", 
              cores = 4, chains = 4,
              control = list(max_treedepth = 14))

使用此修改后的 Stan 模型样本没有问题：

data {
  int<lower=1> Nt; 
  int<lower=1> Nc; 
  int<lower=1> Ng; 
  int<lower=1> N[Nc];
  int<lower=0> n[Nc]; 
  vector<lower=0,upper=1>[Nc] X; 
  int<lower=1> t[Nc]; 
  int<lower=0> g[Nc]; 
}

parameters {
  vector<offset=0, multiplier=20>[Nt] at; // group intercepts 
  vector<offset=0, multiplier=20>[Nt] bt; // group slopes  
  vector<offset=0, multiplier=20>[Ng] ag; // subgroup intercepts
  vector<offset=0, multiplier=20>[Ng] bg; // subgroup slopes
}

model {
  at ~ normal(0.0, 20.0);
  bt ~ normal(0.0, 20.0);  
  ag ~ normal(0.0, 20.0);
  bg ~ normal(0.0, 20.0);
  n  ~ binomial_logit(N, ag[g] + at[t] + (bt[t] + bg[g]) .* X);
}

generated quantities {
  vector[Nc] theta = inv_logit(ag[g] + at[t] + (bt[t] + bg[g]) .* X);
}

值得注意的是，我不得不使用高 max_treedepth 来拟合模型，但如果不了解数据，我很难对此发表评论。我还将 theta 移动到 generated quantities 以防您需要这些计算，但 binomial_logit 直接处理此问题。

我还使用 offset 和 multiplier 设置了非中心参数，以便 Stan 采样器有更好的参数 space 来采样。最后，我将循环重新编码为向量。

具有群体效应的二项式回归

binomial regression with group effects

bayesian

rstan