brm回归参数的含义
meaning of brm regression parameters
我正在使用 brms 包在预测变量 x 上构建一个具有高斯过程的多级模型。该模型如下所示:make_stancode(y ~ gp(x, cov = "exp_quad", by= groups) + (1| groups), data = dat) 所以 x 预测变量上的 gp 和多级组变量。就我而言,我有 5 个组。我一直在查看它的代码(如下),我试图弄清楚一些参数的含义和维度。
我看到M_1是组数
我的问题是:
- N_1是什么意思,和观察次数N一样吗?这里用到:vector[N_1]z_1[M_1]; // 未缩放的组级效果
对于Kgp_1和Mgp_1(intKgp_1;和intMgp_1;),如果我有5组都是Kgp_1 和 Mgp_1 等于 5?如果是这样,为什么要使用两个变量?
// 使用 brms 1.10.0 生成
功能{
/* compute a latent Gaussian process
* Args:
* x: array of continuous predictor values
* sdgp: marginal SD parameter
* lscale: length-scale parameter
* zgp: vector of independent standard normal variables
* Returns:
* a vector to be added to the linear predictor
*/
vector gp(vector[] x, real sdgp, real lscale, vector zgp) {
matrix[size(x), size(x)] cov;
cov = cov_exp_quad(x, sdgp, lscale);
for (n in 1:size(x)) {
// deal with numerical non-positive-definiteness
cov[n, n] = cov[n, n] + 1e-12;
}
return cholesky_decompose(cov) * zgp;
}
}
data {
int<lower=1> N; // total number of observations
vector[N] Y; // response variable
int<lower=1> Kgp_1;
int<lower=1> Mgp_1;
vector[Mgp_1] Xgp_1[N];
int<lower=1> Igp_1[Kgp_1];
int<lower=1> Jgp_1_1[Igp_1[1]];
int<lower=1> Jgp_1_2[Igp_1[2]];
int<lower=1> Jgp_1_3[Igp_1[3]];
int<lower=1> Jgp_1_4[Igp_1[4]];
int<lower=1> Jgp_1_5[Igp_1[5]];
// data for group-level effects of ID 1
int<lower=1> J_1[N];
int<lower=1> N_1;
int<lower=1> M_1;
vector[N] Z_1_1;
int prior_only; // should the likelihood be ignored?
}
transformed data {
}
parameters {
real temp_Intercept; // temporary intercept
// GP hyperparameters
vector<lower=0>[Kgp_1] sdgp_1;
vector<lower=0>[Kgp_1] lscale_1;
vector[N] zgp_1;
real<lower=0> sigma; // residual SD
vector<lower=0>[M_1] sd_1; // group-level standard deviations
vector[N_1] z_1[M_1]; // unscaled group-level effects
}
transformed parameters {
// group-level effects
vector[N_1] r_1_1 = sd_1[1] * (z_1[1]);
}
model {
vector[N] mu = rep_vector(0, N) + temp_Intercept;
mu[Jgp_1_1] = mu[Jgp_1_1] + gp(Xgp_1[Jgp_1_1], sdgp_1[1], lscale_1[1], zgp_1[Jgp_1_1]);
mu[Jgp_1_2] = mu[Jgp_1_2] + gp(Xgp_1[Jgp_1_2], sdgp_1[2], lscale_1[2], zgp_1[Jgp_1_2]);
mu[Jgp_1_3] = mu[Jgp_1_3] + gp(Xgp_1[Jgp_1_3], sdgp_1[3], lscale_1[3], zgp_1[Jgp_1_3]);
mu[Jgp_1_4] = mu[Jgp_1_4] + gp(Xgp_1[Jgp_1_4], sdgp_1[4], lscale_1[4], zgp_1[Jgp_1_4]);
mu[Jgp_1_5] = mu[Jgp_1_5] + gp(Xgp_1[Jgp_1_5], sdgp_1[5], lscale_1[5], zgp_1[Jgp_1_5]);
for (n in 1:N) {
mu[n] = mu[n] + (r_1_1[J_1[n]]) * Z_1_1[n];
}
// priors including all constants
target += student_t_lpdf(sdgp_1 | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
target += normal_lpdf(lscale_1 | 0, 0.5)
- 1 * normal_lccdf(0 | 0, 0.5);
target += normal_lpdf(zgp_1 | 0, 1);
target += student_t_lpdf(sigma | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
target += student_t_lpdf(sd_1 | 3, 0, 10)
- 1 * student_t_lccdf(0 | 3, 0, 10);
target += normal_lpdf(z_1[1] | 0, 1);
// likelihood including all constants
if (!prior_only) {
target += normal_lpdf(Y | mu, sigma);
}
}
generated quantities {
// actual population-level intercept
real b_Intercept = temp_Intercept;
}
如果您在同一个公式上使用 make_standata(...)
,您可以看到将传递给 Stan 的数据。从这里,您可以拼凑一些变量的作用。如果我使用 lme4::sleepstudy
数据集作为您数据的代理,我得到:
library(brms)
dat <- lme4::sleepstudy
dat$groups <- dat$Subject
dat$y <- dat$Reaction
dat$x <- dat$Days
s_data <- make_standata(
y ~ gp(x, cov = "exp_quad", by= groups) + (1| groups), data = dat)
s_data$N_1
#> 18
对于 N_1
,我得到 18,这是此数据集中 groups
中的级别数。
For Kgp_1 and Mgp_1 ( int Kgp_1; and int Mgp_1;), if I have 5 groups are both Kgp_1 and Mgp_1 equal to 5? If so, why are two variables used?
s_data$Mgp_1
#> 1
s_data$Kgp_1
#> 18
看来Kgp_1
又是组数。我不确定 Mgp_1 除了设置向量的长度 vector[Mgp_1] Xgp_1[N];
之外还能做什么
我正在使用 brms 包在预测变量 x 上构建一个具有高斯过程的多级模型。该模型如下所示:make_stancode(y ~ gp(x, cov = "exp_quad", by= groups) + (1| groups), data = dat) 所以 x 预测变量上的 gp 和多级组变量。就我而言,我有 5 个组。我一直在查看它的代码(如下),我试图弄清楚一些参数的含义和维度。
我看到M_1是组数
我的问题是:
- N_1是什么意思,和观察次数N一样吗?这里用到:vector[N_1]z_1[M_1]; // 未缩放的组级效果
对于Kgp_1和Mgp_1(intKgp_1;和intMgp_1;),如果我有5组都是Kgp_1 和 Mgp_1 等于 5?如果是这样,为什么要使用两个变量?
// 使用 brms 1.10.0 生成 功能{
/* compute a latent Gaussian process * Args: * x: array of continuous predictor values * sdgp: marginal SD parameter * lscale: length-scale parameter * zgp: vector of independent standard normal variables * Returns: * a vector to be added to the linear predictor */ vector gp(vector[] x, real sdgp, real lscale, vector zgp) { matrix[size(x), size(x)] cov; cov = cov_exp_quad(x, sdgp, lscale); for (n in 1:size(x)) { // deal with numerical non-positive-definiteness cov[n, n] = cov[n, n] + 1e-12; } return cholesky_decompose(cov) * zgp; } } data { int<lower=1> N; // total number of observations vector[N] Y; // response variable int<lower=1> Kgp_1; int<lower=1> Mgp_1; vector[Mgp_1] Xgp_1[N]; int<lower=1> Igp_1[Kgp_1]; int<lower=1> Jgp_1_1[Igp_1[1]]; int<lower=1> Jgp_1_2[Igp_1[2]]; int<lower=1> Jgp_1_3[Igp_1[3]]; int<lower=1> Jgp_1_4[Igp_1[4]]; int<lower=1> Jgp_1_5[Igp_1[5]]; // data for group-level effects of ID 1 int<lower=1> J_1[N]; int<lower=1> N_1; int<lower=1> M_1; vector[N] Z_1_1; int prior_only; // should the likelihood be ignored? } transformed data { } parameters { real temp_Intercept; // temporary intercept // GP hyperparameters vector<lower=0>[Kgp_1] sdgp_1; vector<lower=0>[Kgp_1] lscale_1; vector[N] zgp_1; real<lower=0> sigma; // residual SD vector<lower=0>[M_1] sd_1; // group-level standard deviations vector[N_1] z_1[M_1]; // unscaled group-level effects } transformed parameters { // group-level effects vector[N_1] r_1_1 = sd_1[1] * (z_1[1]); } model { vector[N] mu = rep_vector(0, N) + temp_Intercept; mu[Jgp_1_1] = mu[Jgp_1_1] + gp(Xgp_1[Jgp_1_1], sdgp_1[1], lscale_1[1], zgp_1[Jgp_1_1]); mu[Jgp_1_2] = mu[Jgp_1_2] + gp(Xgp_1[Jgp_1_2], sdgp_1[2], lscale_1[2], zgp_1[Jgp_1_2]); mu[Jgp_1_3] = mu[Jgp_1_3] + gp(Xgp_1[Jgp_1_3], sdgp_1[3], lscale_1[3], zgp_1[Jgp_1_3]); mu[Jgp_1_4] = mu[Jgp_1_4] + gp(Xgp_1[Jgp_1_4], sdgp_1[4], lscale_1[4], zgp_1[Jgp_1_4]); mu[Jgp_1_5] = mu[Jgp_1_5] + gp(Xgp_1[Jgp_1_5], sdgp_1[5], lscale_1[5], zgp_1[Jgp_1_5]); for (n in 1:N) { mu[n] = mu[n] + (r_1_1[J_1[n]]) * Z_1_1[n]; } // priors including all constants target += student_t_lpdf(sdgp_1 | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10); target += normal_lpdf(lscale_1 | 0, 0.5) - 1 * normal_lccdf(0 | 0, 0.5); target += normal_lpdf(zgp_1 | 0, 1); target += student_t_lpdf(sigma | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10); target += student_t_lpdf(sd_1 | 3, 0, 10) - 1 * student_t_lccdf(0 | 3, 0, 10); target += normal_lpdf(z_1[1] | 0, 1); // likelihood including all constants if (!prior_only) { target += normal_lpdf(Y | mu, sigma); } } generated quantities { // actual population-level intercept real b_Intercept = temp_Intercept; }
如果您在同一个公式上使用 make_standata(...)
,您可以看到将传递给 Stan 的数据。从这里,您可以拼凑一些变量的作用。如果我使用 lme4::sleepstudy
数据集作为您数据的代理,我得到:
library(brms)
dat <- lme4::sleepstudy
dat$groups <- dat$Subject
dat$y <- dat$Reaction
dat$x <- dat$Days
s_data <- make_standata(
y ~ gp(x, cov = "exp_quad", by= groups) + (1| groups), data = dat)
s_data$N_1
#> 18
对于 N_1
,我得到 18,这是此数据集中 groups
中的级别数。
For Kgp_1 and Mgp_1 ( int Kgp_1; and int Mgp_1;), if I have 5 groups are both Kgp_1 and Mgp_1 equal to 5? If so, why are two variables used?
s_data$Mgp_1
#> 1
s_data$Kgp_1
#> 18
看来Kgp_1
又是组数。我不确定 Mgp_1 除了设置向量的长度 vector[Mgp_1] Xgp_1[N];