Stan多项式回归参数估计模型回顾
Stan Polynomial Regression Parameter Estimation Model Review
我有以下多项式回归模型:
图像版本
LaTeX 版本
$Y_i | \mu_i, \sigma^2 \sim \text{正常}(\mu_i, \sigma^2), i = 1, \dots, n \ \text{独立}$
$\mu_i = \alpha + \beta_1 x_{i1} + \beta_2 x_{i2} + \beta_3 x_{i1}^2 + \beta_4 x_{i2}^2 + \beta_5 x_{i1} x_{i2}$
$\alpha \sim \text{一些合适的先验}$
$\beta_1, \dots, \beta_5 \sim \text{一些合适的先验}$
$\sigma^2 \sim \text{一些合适的先验}$
我想将样本大小和 $y_i$、$x_{i1}$ 和 $x_{i2}$ 上的观察向量作为输入。代码如下:
data{
int<lower=1> n;
vector[n] x1;
vector[n] x2;
vector[n] y;
}
我想对两个输入变量进行标准化(居中和缩放)以获得标准化的回归变量x1_std
和x2_std
。这个的代码在transformed data
块中,如下:
transformed data{
real bar_x1;
real x1_sd;
vector[n] x1_std;
real bar_x2;
real x2_sd;
vector[n] x2_std;
real y_sd;
bar_x1 = mean(x1);
x1_sd = sd(x1);
x1_std = (x1 - bar_x1)/x1_sd; // centered and scaled
bar_x2 = mean(x2);
x2_sd = sd(x2);
x2_std = (x2 - bar_x2)/x2_sd; // centered and scaled
y_sd = sd(y);
}
然后我想使用标准化回归变量和回归参数 $\alpha$、$\beta_1$ 和 $\dots 的 return 估计来拟合上述多项式回归模型,\ beta_5$, 原始和标准化比例。
据此,如果我没记错的话,标准化参数到原始尺度的转换公式如下:
图像版本
LaTeX 版本
$\alpha = \tilde{\alpha} - \dfrac{\gamma_1}{s_1}\bar{x}_1 - \dfrac{\gamma_2} {s_2}\bar{x}_2 + \dfrac{\gamma_3}{s_1^2}\bar{x}_1^2 + \dfrac{\gamma_4}{s_2^2}\bar{x}_2^2 + \dfrac{\gamma_5}{s_1 s_2}\bar{x}_1\bar{x}_2$
$\beta_1 = \left( \dfrac{\gamma_1}{s_1} - 2\dfrac{\gamma_3}{s_1^2}\bar{x}_1 - \dfrac{\gamma_5}{s_1 s_2}\bar{x}_2\right)$
$\beta_2 = \left( \dfrac{\gamma_2}{s_2} - 2\dfrac{\gamma_4}{s_2^2}\bar{x}_2 - \dfrac{\gamma_5}{s_1 s_2}\bar{x}_1\right)$
$\beta_3 = \dfrac{\gamma_3}{s_1^2}$
$\beta_4 = \dfrac{\gamma_4}{s_2^2}$
$\beta_5 = \dfrac{\gamma_5}{s_1 s_2}$
实现它的代码包含在 generated quantities
块中,如下所示:
alpha = alpha_std - beta1_std*bar_x1/x1_sd - beta2_std*bar_x2/x2_sd
+ (beta3_std*bar_x1^2)/x1_sd^2 + (beta4_std*bar_x2^2)/x2_sd^2
+ (beta5_std*bar_x2*bar_x1)/(x1_sd*x2_sd);
beta1 = beta1_std/x1_sd - 2*beta3_std*bar_x1/x1_sd^2
- beta5_std*bar_x2/(x1_sd*x2_sd);
beta2 = beta2_std/x2_sd - 2*beta4_std*bar_x2/x2_sd^2
- beta5_std*bar_x1/(x1_sd*x2_sd);
beta3 = beta3_std/x1_sd^2;
beta4 = beta4_std/x2_sd^2;
beta5 = beta5_std/(x1_sd*x2_sd);
我的整个模型如下:
data{
int<lower=1> n;
vector[n] x1;
vector[n] x2;
vector[n] y;
}
transformed data{
real bar_x1;
real x1_sd;
vector[n] x1_std;
real bar_x2;
real x2_sd;
vector[n] x2_std;
real y_sd;
bar_x1 = mean(x1);
x1_sd = sd(x1);
x1_std = (x1 - bar_x1)/x1_sd; // centered and scaled
bar_x2 = mean(x2);
x2_sd = sd(x2);
x2_std = (x2 - bar_x2)/x2_sd; // centered and scaled
y_sd = sd(y);
}
parameters{
real<lower=0> sigma;
real alpha_std;
real beta1_std;
real beta2_std;
real beta3_std;
real beta4_std;
real beta5_std;
}
transformed parameters {
real mu[n];
for(i in 1:n) {
mu[i] = alpha_std + beta1_std*x1_std[i]
+ beta2_std*x2_std[i] + beta3_std*x1_std[i]^2
+ beta4_std*x2_std[i]^2 + beta5_std*x1_std[i]*x2_std[i];
}
}
model{
alpha_std ~ normal(0, 10);
beta1_std ~ normal(0, 2.5);
beta2_std ~ normal(0, 2.5);
beta3_std ~ normal(0, 2.5);
beta4_std ~ normal(0, 2.5);
beta5_std ~ normal(0, 2.5);
sigma ~ exponential(1 / y_sd);
y ~ normal(mu, sigma);
}
generated quantities {
real alpha;
real beta1;
real beta2;
real beta3;
real beta4;
real beta5;
alpha = alpha_std - beta1_std*bar_x1/x1_sd - beta2_std*bar_x2/x2_sd
+ (beta3_std*bar_x1^2)/x1_sd^2 + (beta4_std*bar_x2^2)/x2_sd^2
+ (beta5_std*bar_x2*bar_x1)/(x1_sd*x2_sd);
beta1 = beta1_std/x1_sd - 2*beta3_std*bar_x1/x1_sd^2
- beta5_std*bar_x2/(x1_sd*x2_sd);
beta2 = beta2_std/x2_sd - 2*beta4_std*bar_x2/x2_sd^2
- beta5_std*bar_x1/(x1_sd*x2_sd);
beta3 = beta3_std/x1_sd^2;
beta4 = beta4_std/x2_sd^2;
beta5 = beta5_std/(x1_sd*x2_sd);
}
我正在使用 R 的 MASS
包中的 hills
数据集:
library(MASS)
hills[18, 3] <- 18.65 # Fixing transcription error
x1 <- hills$dist
x2 <- hills$climb
y <- hills$time
n <- length(x1)
data.in <- list(x1 = x1, x2 = x2, y = y, n = n)
model.fit <- sampling(example, data.in)
现在我输出标准化的(alpha_std
、beta1_std
、beta2_std
、beta3_std
、beta4_std
、beta5_std
)和原始的尺度(alpha
、beta1
、beta2
、beta3
、beta4
、beta5
)回归参数:
print(model.fit, pars = c("alpha_std", "alpha", "beta1_std", "beta2_std", "beta3_std", "beta4_std", "beta5_std", "beta1", "beta2", "beta3", "beta4", "beta5", "sigma"), probs = c(0.05, 0.5, 0.95), digits = 5)
我做对了吗?我还对数学进行了两次和三次检查,所以我 认为 它应该是正确的。尽管如此,让我感到紧张的一件事是 beta4
是 0.00000。这是否表明我犯了错误?正如我所说,我已经检查了我所有的代码和数学,所以,据我所知,一切似乎都很好。
好的,我刚刚发现问题是我没有打印足够位数的值(5 不够)以查看该值不是 0.00000。其他都没问题。
我有以下多项式回归模型:
图像版本
LaTeX 版本
$Y_i | \mu_i, \sigma^2 \sim \text{正常}(\mu_i, \sigma^2), i = 1, \dots, n \ \text{独立}$
$\mu_i = \alpha + \beta_1 x_{i1} + \beta_2 x_{i2} + \beta_3 x_{i1}^2 + \beta_4 x_{i2}^2 + \beta_5 x_{i1} x_{i2}$
$\alpha \sim \text{一些合适的先验}$
$\beta_1, \dots, \beta_5 \sim \text{一些合适的先验}$
$\sigma^2 \sim \text{一些合适的先验}$
我想将样本大小和 $y_i$、$x_{i1}$ 和 $x_{i2}$ 上的观察向量作为输入。代码如下:
data{
int<lower=1> n;
vector[n] x1;
vector[n] x2;
vector[n] y;
}
我想对两个输入变量进行标准化(居中和缩放)以获得标准化的回归变量x1_std
和x2_std
。这个的代码在transformed data
块中,如下:
transformed data{
real bar_x1;
real x1_sd;
vector[n] x1_std;
real bar_x2;
real x2_sd;
vector[n] x2_std;
real y_sd;
bar_x1 = mean(x1);
x1_sd = sd(x1);
x1_std = (x1 - bar_x1)/x1_sd; // centered and scaled
bar_x2 = mean(x2);
x2_sd = sd(x2);
x2_std = (x2 - bar_x2)/x2_sd; // centered and scaled
y_sd = sd(y);
}
然后我想使用标准化回归变量和回归参数 $\alpha$、$\beta_1$ 和 $\dots 的 return 估计来拟合上述多项式回归模型,\ beta_5$, 原始和标准化比例。
据此,如果我没记错的话,标准化参数到原始尺度的转换公式如下:
图像版本
LaTeX 版本
$\alpha = \tilde{\alpha} - \dfrac{\gamma_1}{s_1}\bar{x}_1 - \dfrac{\gamma_2} {s_2}\bar{x}_2 + \dfrac{\gamma_3}{s_1^2}\bar{x}_1^2 + \dfrac{\gamma_4}{s_2^2}\bar{x}_2^2 + \dfrac{\gamma_5}{s_1 s_2}\bar{x}_1\bar{x}_2$
$\beta_1 = \left( \dfrac{\gamma_1}{s_1} - 2\dfrac{\gamma_3}{s_1^2}\bar{x}_1 - \dfrac{\gamma_5}{s_1 s_2}\bar{x}_2\right)$
$\beta_2 = \left( \dfrac{\gamma_2}{s_2} - 2\dfrac{\gamma_4}{s_2^2}\bar{x}_2 - \dfrac{\gamma_5}{s_1 s_2}\bar{x}_1\right)$
$\beta_3 = \dfrac{\gamma_3}{s_1^2}$
$\beta_4 = \dfrac{\gamma_4}{s_2^2}$
$\beta_5 = \dfrac{\gamma_5}{s_1 s_2}$
实现它的代码包含在 generated quantities
块中,如下所示:
alpha = alpha_std - beta1_std*bar_x1/x1_sd - beta2_std*bar_x2/x2_sd
+ (beta3_std*bar_x1^2)/x1_sd^2 + (beta4_std*bar_x2^2)/x2_sd^2
+ (beta5_std*bar_x2*bar_x1)/(x1_sd*x2_sd);
beta1 = beta1_std/x1_sd - 2*beta3_std*bar_x1/x1_sd^2
- beta5_std*bar_x2/(x1_sd*x2_sd);
beta2 = beta2_std/x2_sd - 2*beta4_std*bar_x2/x2_sd^2
- beta5_std*bar_x1/(x1_sd*x2_sd);
beta3 = beta3_std/x1_sd^2;
beta4 = beta4_std/x2_sd^2;
beta5 = beta5_std/(x1_sd*x2_sd);
我的整个模型如下:
data{
int<lower=1> n;
vector[n] x1;
vector[n] x2;
vector[n] y;
}
transformed data{
real bar_x1;
real x1_sd;
vector[n] x1_std;
real bar_x2;
real x2_sd;
vector[n] x2_std;
real y_sd;
bar_x1 = mean(x1);
x1_sd = sd(x1);
x1_std = (x1 - bar_x1)/x1_sd; // centered and scaled
bar_x2 = mean(x2);
x2_sd = sd(x2);
x2_std = (x2 - bar_x2)/x2_sd; // centered and scaled
y_sd = sd(y);
}
parameters{
real<lower=0> sigma;
real alpha_std;
real beta1_std;
real beta2_std;
real beta3_std;
real beta4_std;
real beta5_std;
}
transformed parameters {
real mu[n];
for(i in 1:n) {
mu[i] = alpha_std + beta1_std*x1_std[i]
+ beta2_std*x2_std[i] + beta3_std*x1_std[i]^2
+ beta4_std*x2_std[i]^2 + beta5_std*x1_std[i]*x2_std[i];
}
}
model{
alpha_std ~ normal(0, 10);
beta1_std ~ normal(0, 2.5);
beta2_std ~ normal(0, 2.5);
beta3_std ~ normal(0, 2.5);
beta4_std ~ normal(0, 2.5);
beta5_std ~ normal(0, 2.5);
sigma ~ exponential(1 / y_sd);
y ~ normal(mu, sigma);
}
generated quantities {
real alpha;
real beta1;
real beta2;
real beta3;
real beta4;
real beta5;
alpha = alpha_std - beta1_std*bar_x1/x1_sd - beta2_std*bar_x2/x2_sd
+ (beta3_std*bar_x1^2)/x1_sd^2 + (beta4_std*bar_x2^2)/x2_sd^2
+ (beta5_std*bar_x2*bar_x1)/(x1_sd*x2_sd);
beta1 = beta1_std/x1_sd - 2*beta3_std*bar_x1/x1_sd^2
- beta5_std*bar_x2/(x1_sd*x2_sd);
beta2 = beta2_std/x2_sd - 2*beta4_std*bar_x2/x2_sd^2
- beta5_std*bar_x1/(x1_sd*x2_sd);
beta3 = beta3_std/x1_sd^2;
beta4 = beta4_std/x2_sd^2;
beta5 = beta5_std/(x1_sd*x2_sd);
}
我正在使用 R 的 MASS
包中的 hills
数据集:
library(MASS)
hills[18, 3] <- 18.65 # Fixing transcription error
x1 <- hills$dist
x2 <- hills$climb
y <- hills$time
n <- length(x1)
data.in <- list(x1 = x1, x2 = x2, y = y, n = n)
model.fit <- sampling(example, data.in)
现在我输出标准化的(alpha_std
、beta1_std
、beta2_std
、beta3_std
、beta4_std
、beta5_std
)和原始的尺度(alpha
、beta1
、beta2
、beta3
、beta4
、beta5
)回归参数:
print(model.fit, pars = c("alpha_std", "alpha", "beta1_std", "beta2_std", "beta3_std", "beta4_std", "beta5_std", "beta1", "beta2", "beta3", "beta4", "beta5", "sigma"), probs = c(0.05, 0.5, 0.95), digits = 5)
我做对了吗?我还对数学进行了两次和三次检查,所以我 认为 它应该是正确的。尽管如此,让我感到紧张的一件事是 beta4
是 0.00000。这是否表明我犯了错误?正如我所说,我已经检查了我所有的代码和数学,所以,据我所知,一切似乎都很好。
好的,我刚刚发现问题是我没有打印足够位数的值(5 不够)以查看该值不是 0.00000。其他都没问题。