拟合逻辑模型时出现 stan 错误 "undefined values"
stan error "undefined values" while fitting logistic model
这里是新的 stan 用户。这个特定模型(基本上是混合效应逻辑回归)有时会 运行,但经常会出现错误 "The following variables have undefined values: log_lik[182]" 等。"dev" 或 "log_lik" 值总是有问题.它被捕获的索引有时在区域之间的过渡处,但也在某些 运行s 中的随机位置。
斯坦模型:
data{
int nObs;
int S[nObs];
int<lower=0> n[nObs];
real Area2[nObs];
real Area3[nObs];
real Julian_Day[nObs];
int Year[nObs];
int nYears;
}
parameters{
real intercept_raw;
real beta_Area2_raw;
real beta_Area3_raw;
real gamm_raw;
real gamm_raw_Area2;
real gamm_raw_Area3;
real vary_Year[nYears];
real<lower=0> sigma_Year;
}
transformed parameters {
real intercept;
real beta_Area2;
real beta_Area3;
real gamm;
real gamm_Area2;
real gamm_Area3;
intercept <- intercept_raw*20;
beta_Area2 <- beta_Area2_raw*5;
beta_Area3 <- beta_Area3_raw*5;
gamm <- gamm_raw*5;
gamm_Area2 <- gamm_raw_Area2*5;
gamm_Area3 <- gamm_raw_Area3*5;
}
model{
real vary[nObs];
real y[nObs];
// Priors
intercept_raw ~ normal(0,1);
beta_Area2_raw ~ normal( 0 , 1 );
beta_Area3_raw ~ normal( 0 , 1 );
gamm_raw ~ normal( 0 , 1 );
gamm_raw_Area2 ~ normal( 0 , 1 );
gamm_raw_Area3 ~ normal( 0 , 1 );
sigma_Year ~ cauchy( 0 , 5 );
// random effect
for ( j in 1:nYears ) vary_Year[j] ~ normal( 0 , sigma_Year );
// Fixed effects
for ( i in 1:nObs ) {
vary[i] <- vary_Year[Year[i]];
y[i] <- vary[i] + intercept
+ beta_Area2 * Area2[i]
+ beta_Area3 * Area3[i]
+ gamm * Julian_Day[i]
+ gamm_Area2 * Area2[i] * Julian_Day[i]
+ gamm_Area3 * Area3[i] * Julian_Day[i];
}
S ~ binomial_logit( n, y );
}
generated quantities{
real y_pred[nObs];
real dev;
real y[nObs];
real vary[nObs];
vector[nObs] log_lik;
dev <- 0;
for ( i in 1:nObs ) {
vary[i] <- vary_Year[Year[i]];
y[i] <- vary[i] + intercept
+ beta_Area2 * Area2[i]
+ beta_Area3 * Area3[i]
+ gamm * Julian_Day[i]
+ gamm_Area2 * Area2[i] * Julian_Day[i]
+ gamm_Area3 * Area3[i] * Julian_Day[i];
log_lik[i] <- binomial_log( S[i] , n[i] , inv_logit(y[i]));
dev <- dev + (-2) * log_lik[i];
y_pred[i] <- binomial_rng(100, inv_logit(y[i]) );
}
}
数据看起来像这样(数据框 "SDF"):
Year Area.ID DayIndex S n Area1 Area2 Area3
1 1 1 19 1 1 1 0 0
2 1 1 22 0 2 1 0 0
3 1 1 23 2 5 1 0 0
4 1 1 24 1 3 1 0 0
5 1 1 26 3 3 1 0 0
6 1 1 28 1 3 1 0 0
这些调用在 R 中使用:
Dlist <- list ("nObs"=dim(SDF)[1], "S"=SDF$S, "n"=SDF$n,
"Area2"= SDF$Area2,"Area3"= SDF$Area3, "Julian_Day"=SDF$DayIndex,
"Year"=SDF$Year,"nYears"=length(unique(SDF$Year)))
# Fit intercept model using stan
fit_ints <- stan(file='STAN/Logistic_Diff_Slope_SN.stan',data = Dlist, iter=5000, chains=3)
当某些生成数量的计算结果为 NaN
时会出现此错误消息,通常是由于数值下溢或溢出。
在您的情况下,您可以通过使用数值更稳定的 binomial_logit_log
函数而不是 binomial_log
函数来避免它(出于与您在中使用 binomial_logit
相同的原因model
块而不是 binomial
)。换句话说,它应该是
log_lik[i] <- binomial_logit_log( S[i] , n[i] , y[i]);
而不是
log_lik[i] <- binomial_log( S[i] , n[i] , inv_logit(y[i]));
此外,当从后验预测分布中提取时,你可以做类似的事情
p <- inv_logit(y[i]);
if (is_nan(p)) y_pred[i] <- y[i] > 0;
else if (p >= 1) y_pred[i] <- 1;
else if (p <= 0) y_pred[i] <- 0;
else y_pred[i] <- binomial_rng(100, inv_logit(y[i]));
不幸的是,目前 Stan 中没有 binomial_logit_rng
功能。
这里是新的 stan 用户。这个特定模型(基本上是混合效应逻辑回归)有时会 运行,但经常会出现错误 "The following variables have undefined values: log_lik[182]" 等。"dev" 或 "log_lik" 值总是有问题.它被捕获的索引有时在区域之间的过渡处,但也在某些 运行s 中的随机位置。
斯坦模型:
data{
int nObs;
int S[nObs];
int<lower=0> n[nObs];
real Area2[nObs];
real Area3[nObs];
real Julian_Day[nObs];
int Year[nObs];
int nYears;
}
parameters{
real intercept_raw;
real beta_Area2_raw;
real beta_Area3_raw;
real gamm_raw;
real gamm_raw_Area2;
real gamm_raw_Area3;
real vary_Year[nYears];
real<lower=0> sigma_Year;
}
transformed parameters {
real intercept;
real beta_Area2;
real beta_Area3;
real gamm;
real gamm_Area2;
real gamm_Area3;
intercept <- intercept_raw*20;
beta_Area2 <- beta_Area2_raw*5;
beta_Area3 <- beta_Area3_raw*5;
gamm <- gamm_raw*5;
gamm_Area2 <- gamm_raw_Area2*5;
gamm_Area3 <- gamm_raw_Area3*5;
}
model{
real vary[nObs];
real y[nObs];
// Priors
intercept_raw ~ normal(0,1);
beta_Area2_raw ~ normal( 0 , 1 );
beta_Area3_raw ~ normal( 0 , 1 );
gamm_raw ~ normal( 0 , 1 );
gamm_raw_Area2 ~ normal( 0 , 1 );
gamm_raw_Area3 ~ normal( 0 , 1 );
sigma_Year ~ cauchy( 0 , 5 );
// random effect
for ( j in 1:nYears ) vary_Year[j] ~ normal( 0 , sigma_Year );
// Fixed effects
for ( i in 1:nObs ) {
vary[i] <- vary_Year[Year[i]];
y[i] <- vary[i] + intercept
+ beta_Area2 * Area2[i]
+ beta_Area3 * Area3[i]
+ gamm * Julian_Day[i]
+ gamm_Area2 * Area2[i] * Julian_Day[i]
+ gamm_Area3 * Area3[i] * Julian_Day[i];
}
S ~ binomial_logit( n, y );
}
generated quantities{
real y_pred[nObs];
real dev;
real y[nObs];
real vary[nObs];
vector[nObs] log_lik;
dev <- 0;
for ( i in 1:nObs ) {
vary[i] <- vary_Year[Year[i]];
y[i] <- vary[i] + intercept
+ beta_Area2 * Area2[i]
+ beta_Area3 * Area3[i]
+ gamm * Julian_Day[i]
+ gamm_Area2 * Area2[i] * Julian_Day[i]
+ gamm_Area3 * Area3[i] * Julian_Day[i];
log_lik[i] <- binomial_log( S[i] , n[i] , inv_logit(y[i]));
dev <- dev + (-2) * log_lik[i];
y_pred[i] <- binomial_rng(100, inv_logit(y[i]) );
}
}
数据看起来像这样(数据框 "SDF"):
Year Area.ID DayIndex S n Area1 Area2 Area3
1 1 1 19 1 1 1 0 0
2 1 1 22 0 2 1 0 0
3 1 1 23 2 5 1 0 0
4 1 1 24 1 3 1 0 0
5 1 1 26 3 3 1 0 0
6 1 1 28 1 3 1 0 0
这些调用在 R 中使用:
Dlist <- list ("nObs"=dim(SDF)[1], "S"=SDF$S, "n"=SDF$n,
"Area2"= SDF$Area2,"Area3"= SDF$Area3, "Julian_Day"=SDF$DayIndex,
"Year"=SDF$Year,"nYears"=length(unique(SDF$Year)))
# Fit intercept model using stan
fit_ints <- stan(file='STAN/Logistic_Diff_Slope_SN.stan',data = Dlist, iter=5000, chains=3)
当某些生成数量的计算结果为 NaN
时会出现此错误消息,通常是由于数值下溢或溢出。
在您的情况下,您可以通过使用数值更稳定的 binomial_logit_log
函数而不是 binomial_log
函数来避免它(出于与您在中使用 binomial_logit
相同的原因model
块而不是 binomial
)。换句话说,它应该是
log_lik[i] <- binomial_logit_log( S[i] , n[i] , y[i]);
而不是
log_lik[i] <- binomial_log( S[i] , n[i] , inv_logit(y[i]));
此外,当从后验预测分布中提取时,你可以做类似的事情
p <- inv_logit(y[i]);
if (is_nan(p)) y_pred[i] <- y[i] > 0;
else if (p >= 1) y_pred[i] <- 1;
else if (p <= 0) y_pred[i] <- 0;
else y_pred[i] <- binomial_rng(100, inv_logit(y[i]));
不幸的是,目前 Stan 中没有 binomial_logit_rng
功能。