JAGS/rjags 中多个组的单独贝叶斯参数估计

Question

我正在尝试在 JAGS 中执行层次分析，从 Kruschke 的“做贝叶斯数据分析”第 9 章推断。我希望获得四个硬币正面比例的后验参数估计值（theta 的 1、2、3 和4)，来自两个铸币厂，以及来自每个铸币厂的硬币平均偏差的估计值（铸币厂偏差：欧米茄）。我将每个造币厂的偏差 kappa 的可变性保持为常数。问题是我无法从第二个铸币厂获得后验估计，它似乎只是对先验进行抽样。有谁知道如何修复模型字符串文本（见下面的第 3 步）以便为第二个 mint 生成后验估计？

下面分析的完整脚本

library(rjags)
library(runjags)
library(coda)


############### 1. Generate the data 

flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
           sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
           sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
           sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips

coins <- factor(c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23)))

mints <- factor(c(rep(1,17), rep(2,38)))

nFlips <- length(flips) 
nCoins <- length(unique(coins))
nMints <- length(unique(mints))


#################### 2. Pass data into a list 

dataList <- list(
  flips = flips,
  coins = coins,
  mints = mints,
  nFlips = nFlips,
  nCoins = nCoins,
  nMints = nMints)


################### 3. specify and save the model 

modelString <- "
model{

      # start with nested likelihood function
      for (i in 1:nFlips) {

              flips[i] ~ dbern(theta[coins[i]])
      } 

      # next the prior on theta
      for (coins in 1:nCoins) {

              theta[coins] ~ dbeta(omega[mints[coins]]*(kappa - 2) + 1, (1 - omega[mints[coins]])*(kappa - 2) + 1) 
      }

      # next we specify the prior for the higher-level parameters on the mint, omega and kappa
      for (mints in 1:nMints) {

              omega[mints] ~ dbeta(2,2)

      }

      kappa <- 5
}
"


writeLines(modelString, "tempModelHier4CoinTwoMint.txt")

############################### Step 4: Initialise Chains 

initsList <- list(theta1 = mean(flips[coins==1]),
                  theta2 = mean(flips[coins==2]),
                  theta3 = mean(flips[coins==3]),
                  theta4 = mean(flips[coins==4]),
                  omega1 = mean(c(mean(flips[coins==1]),
                                 mean(flips[coins==2]))),
                  omega2 = mean(c(mean(flips[coins==3]),
                                 mean(flips[coins==4]))))

initsList


############################### Step 5: Generate Chains 

runJagsOut <- run.jags(method = "simple",
                       model = "tempModelHier4CoinTwoMint.txt",
                       monitor = c("theta[1]", "theta[2]", "theta[3]", "theta[4]", "omega[1]", "omega[2]"),
                       data = dataList,
                       inits = initsList,
                       n.chains = 1,
                       adapt = 500,
                       burnin = 1000,
                       sample = 50000,
                       thin = 1,
                       summarise = FALSE,
                       plots = FALSE)



############################### Step 6: Convert to Coda Object 

codaSamples <- as.mcmc.list(runJagsOut)

head(codaSamples)


############################### Step 7: Make Graphs 

df <- data.frame(as.matrix(codaSamples))

theta1 <- ggplot(df, aes(x = df$theta.1.)) + geom_density()
theta2 <- ggplot(df, aes(x = df$theta.2.)) + geom_density()
theta3 <- ggplot(df, aes(x = df$theta.3.)) + geom_density()
theta4 <- ggplot(df, aes(x = df$theta.4.)) + geom_density()
omega1 <- ggplot(df, aes(x = df$omega.1.)) + geom_density()
omega2 <- ggplot(df, aes(x = df$omega.2.)) + geom_density()

require(gridExtra)

ggsave("coinsAndMintsHier/hierPropFourCoinsTwoMints.pdf", grid.arrange(theta1, theta2, theta3, theta4, omega1, omega2, ncol = 2), device = "pdf", height = 30, width = 10, units = "cm")

Answer 1

问题是您在 theta 上设置先验时试图索引铸币厂的方式。在这种情况下只有 4 个 theta，而不是 nFlips。您的嵌套索引 mints[coins] 正在访问 mints 数据向量，而不是每个硬币所属的铸币厂向量。我在下面创建了一个更正版本。请注意向量的显式构造，该向量为每个硬币所属的铸币厂编制索引。另请注意，在模型规范中，每个 for 循环索引都有自己的索引名称，与数据名称不同。

graphics.off() # This closes all of R's graphics windows.
rm(list=ls())  # Careful! This clears all of R's memory!

library(runjags)
library(coda)

#library(rjags)

############### 1. Generate the data 

flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
           sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
           sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
           sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips

# NOTE: I got rid of `factor` because it was unneeded and got in the way
coins <- c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23))

# NOTE: I got rid of `factor` because it was unneeded and got in the way
mints <- c(rep(1,17), rep(2,38))

nFlips <- length(flips) 
nCoins <- length(unique(coins))
nMints <- length(unique(mints))

# NEW: Create vector that specifies the mint of each coin. There must be a     more 
# elegant way to do this, but here is a logical brute-force approach. This
# assumes that coins are consecutively numbered from 1 to nCoins.
mintOfCoin = NULL
for ( cIdx in 1:nCoins ) {
  mintOfCoin = c( mintOfCoin , unique(mints[coins==cIdx]) )
}

#################### 2. Pass data into a list 

dataList <- list(
  flips = flips,
  coins = coins,
  mints = mints,
  nFlips = nFlips,
  nCoins = nCoins,
  nMints = nMints,
  mintOfCoin = mintOfCoin # NOTE
  )


################### 3. specify and save the model 

modelString <- "
model{
  # start with nested likelihood function
  for (fIdx in 1:nFlips) {
    flips[fIdx] ~ dbern( theta[coins[fIdx]] )
  } 
  # next the prior on theta
  # NOTE: Here we use the mintOfCoin index.
  for (cIdx in 1:nCoins) {
    theta[cIdx] ~ dbeta( omega[mintOfCoin[cIdx]]*(kappa - 2) + 1 ,
                          ( 1 - omega[mintOfCoin[cIdx]])*(kappa - 2) + 1 ) 
  }
  # next we specify the prior for the higher-level parameters on the mint, 
  # omega and kappa
  # NOTE: I changed the name of the mint index so it doesn't conflict with 
  # mints data vector.
  for (mIdx in 1:nMints) {
    omega[mIdx] ~ dbeta(2,2)
  }
  kappa <- 5
}
"


writeLines(modelString, "tempModelHier4CoinTwoMint.txt")

############################### Step 4: Initialise Chains 

initsList <- list(theta1 = mean(flips[coins==1]),
                  theta2 = mean(flips[coins==2]),
                  theta3 = mean(flips[coins==3]),
                  theta4 = mean(flips[coins==4]),
                  omega1 = mean(c(mean(flips[coins==1]),
                                  mean(flips[coins==2]))),
                  omega2 = mean(c(mean(flips[coins==3]),
                                  mean(flips[coins==4]))))

initsList


############################### Step 5: Generate Chains 

runJagsOut <- run.jags(method = "parallel",
                       model = "tempModelHier4CoinTwoMint.txt",
                       # NOTE: theta and omega are vectors:
                       monitor = c( "theta", "omega" , "kappa" ),
                       data = dataList,
                       #inits = initsList, # NOTE: Let JAGS initialize.
                       n.chains = 4, # NOTE: Not only 1 chain.
                       adapt = 500,
                       burnin = 1000,
                       sample = 10000,
                       thin = 1,
                       summarise = FALSE,
                       plots = FALSE)



############################### Step 6: Convert to Coda Object 

codaSamples <- as.mcmc.list(runJagsOut)

head(codaSamples)

########################################
## NOTE: Important step --- Check MCMC diagnostics 

# Display diagnostics of chain, for specified parameters:
source("DBDA2E-utilities.R") # For function diagMCMC()
parameterNames = varnames(codaSamples) # from coda package
for ( parName in parameterNames ) {
  diagMCMC( codaObject=codaSamples , parName=parName )
}



############################### Step 7: Make Graphs 
# ...

JAGS/rjags 中多个组的单独贝叶斯参数估计

Separate Bayesian parameter estimates for multiple groups in JAGS/rjags

r

bayesian

jags

runjags