在 PyMC3 中对序数预测变量建模时使用度量预测器
Using a metric predictor when modelling ordinal predicted variable in PyMC3
我正在尝试在 PyMC3 中实现 Doing Bayesian Data Analysis (Kruschke) 第 23.4 章中的有序概率回归模型。抽样后,截距和斜率的后验分布与书上的结果并没有真正的可比性。我认为模型定义存在一些基本问题,但我没有看到。
数据:
X 是指标预测变量(标准化为 zX),Y 是顺序结果 (1-7)。
nYlevels3 = df3.Y.nunique()
# Setting the thresholds for the ordinal outcomes. The outer sides are
# fixed, while the others are estimated.
thresh3 = [k + .5 for k in range(1, nYlevels3)]
thresh_obs3 = np.ma.asarray(thresh3)
thresh_obs3[1:-1] = np.ma.masked
@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])])
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])])
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])])
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])])
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])])
out[:,6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model_metric:
theta = pm.Normal('theta', mu=thresh3, tau=np.repeat(1/2**2, len(thresh3)),
shape=len(thresh3), observed=thresh_obs3, testval=thresh3[1:-1])
# Intercept
zbeta0 = pm.Normal('zbeta0', mu=(1+nYlevels3)/2, tau=1/nYlevels3**2)
# Slope
zbeta = pm.Normal('zbeta', mu=0.0, tau=1/nYlevels3**2)
# Mean of the underlying metric distribution
mu = pm.Deterministic('mu', zbeta0 + zbeta*zX)
zsigma = pm.Uniform('zsigma', nYlevels3/1000.0, nYlevels3*10.0)
pr = outcome_probabilities(theta, mu, zsigma)
y = pm.Categorical('y', pr, observed=df3.Y.cat.codes)
http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb
作为参考,这里是 Kruschke 使用的 JAGS 模型,我的模型基于该模型:
Ntotal = length(y)
# Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
# This allows all parameters to be interpretable on the response scale.
nYlevels = max(y)
thresh = rep(NA,nYlevels-1)
thresh[1] = 1 + 0.5
thresh[nYlevels-1] = nYlevels-1 + 0.5
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dcat( pr[i,1:nYlevels] )
pr[i,1] <- pnorm( thresh[1] , mu[x[i]] , 1/sigma[x[i]]^2 )
for ( k in 2:(nYlevels-1) ) {
pr[i,k] <- max( 0 , pnorm( thresh[ k ] , mu[x[i]] , 1/sigma[x[i]]^2 )
- pnorm( thresh[k-1] , mu[x[i]] , 1/sigma[x[i]]^2 ) )
}
pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu[x[i]] , 1/sigma[x[i]]^2 )
}
for ( j in 1:2 ) { # 2 groups
mu[j] ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
sigma[j] ~ dunif( nYlevels/1000 , nYlevels*10 )
}
for ( k in 2:(nYlevels-2) ) { # 1 and nYlevels-1 are fixed, not stochastic
thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
}
}
毕竟这不是基本问题:我忘记在下面的函数中为 np.max()
指明轴。
@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])], axis=0)
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])], axis=0)
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])], axis=0)
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])], axis=0)
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])], axis=0)
out[:,6] = 1 - n.cdf(theta[5])
return out
我正在尝试在 PyMC3 中实现 Doing Bayesian Data Analysis (Kruschke) 第 23.4 章中的有序概率回归模型。抽样后,截距和斜率的后验分布与书上的结果并没有真正的可比性。我认为模型定义存在一些基本问题,但我没有看到。
数据: X 是指标预测变量(标准化为 zX),Y 是顺序结果 (1-7)。
nYlevels3 = df3.Y.nunique()
# Setting the thresholds for the ordinal outcomes. The outer sides are
# fixed, while the others are estimated.
thresh3 = [k + .5 for k in range(1, nYlevels3)]
thresh_obs3 = np.ma.asarray(thresh3)
thresh_obs3[1:-1] = np.ma.masked
@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])])
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])])
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])])
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])])
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])])
out[:,6] = 1 - n.cdf(theta[5])
return out
with pm.Model() as ordinal_model_metric:
theta = pm.Normal('theta', mu=thresh3, tau=np.repeat(1/2**2, len(thresh3)),
shape=len(thresh3), observed=thresh_obs3, testval=thresh3[1:-1])
# Intercept
zbeta0 = pm.Normal('zbeta0', mu=(1+nYlevels3)/2, tau=1/nYlevels3**2)
# Slope
zbeta = pm.Normal('zbeta', mu=0.0, tau=1/nYlevels3**2)
# Mean of the underlying metric distribution
mu = pm.Deterministic('mu', zbeta0 + zbeta*zX)
zsigma = pm.Uniform('zsigma', nYlevels3/1000.0, nYlevels3*10.0)
pr = outcome_probabilities(theta, mu, zsigma)
y = pm.Categorical('y', pr, observed=df3.Y.cat.codes)
http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb
作为参考,这里是 Kruschke 使用的 JAGS 模型,我的模型基于该模型:
Ntotal = length(y)
# Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
# This allows all parameters to be interpretable on the response scale.
nYlevels = max(y)
thresh = rep(NA,nYlevels-1)
thresh[1] = 1 + 0.5
thresh[nYlevels-1] = nYlevels-1 + 0.5
modelString = "
model {
for ( i in 1:Ntotal ) {
y[i] ~ dcat( pr[i,1:nYlevels] )
pr[i,1] <- pnorm( thresh[1] , mu[x[i]] , 1/sigma[x[i]]^2 )
for ( k in 2:(nYlevels-1) ) {
pr[i,k] <- max( 0 , pnorm( thresh[ k ] , mu[x[i]] , 1/sigma[x[i]]^2 )
- pnorm( thresh[k-1] , mu[x[i]] , 1/sigma[x[i]]^2 ) )
}
pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu[x[i]] , 1/sigma[x[i]]^2 )
}
for ( j in 1:2 ) { # 2 groups
mu[j] ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
sigma[j] ~ dunif( nYlevels/1000 , nYlevels*10 )
}
for ( k in 2:(nYlevels-2) ) { # 1 and nYlevels-1 are fixed, not stochastic
thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
}
}
毕竟这不是基本问题:我忘记在下面的函数中为 np.max()
指明轴。
@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
out = np.empty((mu.size, nYlevels3))
n = norm(loc=mu, scale=sigma)
out[:,0] = n.cdf(theta[0])
out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])], axis=0)
out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])], axis=0)
out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])], axis=0)
out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])], axis=0)
out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])], axis=0)
out[:,6] = 1 - n.cdf(theta[5])
return out