在 PyMC3 中对序数预测变量建模时使用度量预测器

Using a metric predictor when modelling ordinal predicted variable in PyMC3

我正在尝试在 PyMC3 中实现 Doing Bayesian Data Analysis (Kruschke) 第 23.4 章中的有序概率回归模型。抽样后,截距和斜率的后验分布与书上的结果并没有真正的可比性。我认为模型定义存在一些基本问题,但我没有看到。

数据: X 是指标预测变量(标准化为 zX),Y 是顺序结果 (1-7)。

nYlevels3 = df3.Y.nunique()

# Setting the thresholds for the ordinal outcomes. The outer sides are
# fixed, while the others are estimated.

thresh3 = [k + .5 for k in range(1, nYlevels3)]
thresh_obs3 = np.ma.asarray(thresh3)
thresh_obs3[1:-1] = np.ma.masked

@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
    out = np.empty((mu.size, nYlevels3))
    n = norm(loc=mu, scale=sigma)       
    out[:,0] = n.cdf(theta[0])        
    out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])])
    out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])])
    out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])])
    out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])])
    out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])])
    out[:,6] = 1 - n.cdf(theta[5])
    return out

with pm.Model() as ordinal_model_metric:    

    theta = pm.Normal('theta', mu=thresh3, tau=np.repeat(1/2**2, len(thresh3)),
                      shape=len(thresh3), observed=thresh_obs3, testval=thresh3[1:-1])

    # Intercept
    zbeta0 = pm.Normal('zbeta0', mu=(1+nYlevels3)/2, tau=1/nYlevels3**2)

    # Slope
    zbeta = pm.Normal('zbeta', mu=0.0, tau=1/nYlevels3**2)

    # Mean of the underlying metric distribution
    mu = pm.Deterministic('mu', zbeta0 + zbeta*zX)

    zsigma = pm.Uniform('zsigma', nYlevels3/1000.0, nYlevels3*10.0)

    pr = outcome_probabilities(theta, mu, zsigma)

    y = pm.Categorical('y', pr, observed=df3.Y.cat.codes)

http://nbviewer.jupyter.org/github/JWarmenhoven/DBDA-python/blob/master/Notebooks/Chapter%2023.ipynb

作为参考,这里是 Kruschke 使用的 JAGS 模型,我的模型基于该模型:

Ntotal = length(y)
# Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
# This allows all parameters to be interpretable on the response scale.
nYlevels = max(y)  
thresh = rep(NA,nYlevels-1)
thresh[1] = 1 + 0.5
thresh[nYlevels-1] = nYlevels-1 + 0.5

 modelString = "
  model {
    for ( i in 1:Ntotal ) {
      y[i] ~ dcat( pr[i,1:nYlevels] )
      pr[i,1] <- pnorm( thresh[1] , mu[x[i]] , 1/sigma[x[i]]^2 )
      for ( k in 2:(nYlevels-1) ) {
        pr[i,k] <- max( 0 ,  pnorm( thresh[ k ] , mu[x[i]] , 1/sigma[x[i]]^2 )
                           - pnorm( thresh[k-1] , mu[x[i]] , 1/sigma[x[i]]^2 ) )
      }
      pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu[x[i]] , 1/sigma[x[i]]^2 )
    }
    for ( j in 1:2 ) { # 2 groups
      mu[j] ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
      sigma[j] ~ dunif( nYlevels/1000 , nYlevels*10 )
    }
    for ( k in 2:(nYlevels-2) ) {  # 1 and nYlevels-1 are fixed, not stochastic
      thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
    }
  }

毕竟这不是基本问题:我忘记在下面的函数中为 np.max() 指明轴。

@as_op(itypes=[tt.dvector, tt.dvector, tt.dscalar], otypes=[tt.dmatrix])
def outcome_probabilities(theta, mu, sigma):
    out = np.empty((mu.size, nYlevels3))
    n = norm(loc=mu, scale=sigma)       
    out[:,0] = n.cdf(theta[0])        
    out[:,1] = np.max([np.repeat(0,mu.size), n.cdf(theta[1]) - n.cdf(theta[0])], axis=0)
    out[:,2] = np.max([np.repeat(0,mu.size), n.cdf(theta[2]) - n.cdf(theta[1])], axis=0)
    out[:,3] = np.max([np.repeat(0,mu.size), n.cdf(theta[3]) - n.cdf(theta[2])], axis=0)
    out[:,4] = np.max([np.repeat(0,mu.size), n.cdf(theta[4]) - n.cdf(theta[3])], axis=0)
    out[:,5] = np.max([np.repeat(0,mu.size), n.cdf(theta[5]) - n.cdf(theta[4])], axis=0)
    out[:,6] = 1 - n.cdf(theta[5])
    return out