JAGS/rjags 中多个组的单独贝叶斯参数估计
Separate Bayesian parameter estimates for multiple groups in JAGS/rjags
我正在尝试在 JAGS 中执行层次分析,从 Kruschke 的“做贝叶斯数据分析”第 9 章推断。我希望获得四个硬币正面比例的后验参数估计值(theta 的 1、2、3 和4),来自两个铸币厂,以及来自每个铸币厂的硬币平均偏差的估计值(铸币厂偏差:欧米茄)。我将每个造币厂的偏差 kappa 的可变性保持为常数。问题是我无法从第二个铸币厂获得后验估计,它似乎只是对先验进行抽样。有谁知道如何修复模型字符串文本(见下面的第 3 步)以便为第二个 mint 生成后验估计?
下面分析的完整脚本
library(rjags)
library(runjags)
library(coda)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
coins <- factor(c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23)))
mints <- factor(c(rep(1,17), rep(2,38)))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (i in 1:nFlips) {
flips[i] ~ dbern(theta[coins[i]])
}
# next the prior on theta
for (coins in 1:nCoins) {
theta[coins] ~ dbeta(omega[mints[coins]]*(kappa - 2) + 1, (1 - omega[mints[coins]])*(kappa - 2) + 1)
}
# next we specify the prior for the higher-level parameters on the mint, omega and kappa
for (mints in 1:nMints) {
omega[mints] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "simple",
model = "tempModelHier4CoinTwoMint.txt",
monitor = c("theta[1]", "theta[2]", "theta[3]", "theta[4]", "omega[1]", "omega[2]"),
data = dataList,
inits = initsList,
n.chains = 1,
adapt = 500,
burnin = 1000,
sample = 50000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
############################### Step 7: Make Graphs
df <- data.frame(as.matrix(codaSamples))
theta1 <- ggplot(df, aes(x = df$theta.1.)) + geom_density()
theta2 <- ggplot(df, aes(x = df$theta.2.)) + geom_density()
theta3 <- ggplot(df, aes(x = df$theta.3.)) + geom_density()
theta4 <- ggplot(df, aes(x = df$theta.4.)) + geom_density()
omega1 <- ggplot(df, aes(x = df$omega.1.)) + geom_density()
omega2 <- ggplot(df, aes(x = df$omega.2.)) + geom_density()
require(gridExtra)
ggsave("coinsAndMintsHier/hierPropFourCoinsTwoMints.pdf", grid.arrange(theta1, theta2, theta3, theta4, omega1, omega2, ncol = 2), device = "pdf", height = 30, width = 10, units = "cm")
问题是您在 theta
上设置先验时试图索引铸币厂的方式。在这种情况下只有 4 个 theta
,而不是 nFlips
。您的嵌套索引 mints[coins]
正在访问 mints
数据向量,而不是每个硬币所属的铸币厂向量。我在下面创建了一个更正版本。请注意向量的显式构造,该向量为每个硬币所属的铸币厂编制索引。另请注意,在模型规范中,每个 for 循环索引都有自己的索引名称,与数据名称不同。
graphics.off() # This closes all of R's graphics windows.
rm(list=ls()) # Careful! This clears all of R's memory!
library(runjags)
library(coda)
#library(rjags)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
# NOTE: I got rid of `factor` because it was unneeded and got in the way
coins <- c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23))
# NOTE: I got rid of `factor` because it was unneeded and got in the way
mints <- c(rep(1,17), rep(2,38))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
# NEW: Create vector that specifies the mint of each coin. There must be a more
# elegant way to do this, but here is a logical brute-force approach. This
# assumes that coins are consecutively numbered from 1 to nCoins.
mintOfCoin = NULL
for ( cIdx in 1:nCoins ) {
mintOfCoin = c( mintOfCoin , unique(mints[coins==cIdx]) )
}
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints,
mintOfCoin = mintOfCoin # NOTE
)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (fIdx in 1:nFlips) {
flips[fIdx] ~ dbern( theta[coins[fIdx]] )
}
# next the prior on theta
# NOTE: Here we use the mintOfCoin index.
for (cIdx in 1:nCoins) {
theta[cIdx] ~ dbeta( omega[mintOfCoin[cIdx]]*(kappa - 2) + 1 ,
( 1 - omega[mintOfCoin[cIdx]])*(kappa - 2) + 1 )
}
# next we specify the prior for the higher-level parameters on the mint,
# omega and kappa
# NOTE: I changed the name of the mint index so it doesn't conflict with
# mints data vector.
for (mIdx in 1:nMints) {
omega[mIdx] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "parallel",
model = "tempModelHier4CoinTwoMint.txt",
# NOTE: theta and omega are vectors:
monitor = c( "theta", "omega" , "kappa" ),
data = dataList,
#inits = initsList, # NOTE: Let JAGS initialize.
n.chains = 4, # NOTE: Not only 1 chain.
adapt = 500,
burnin = 1000,
sample = 10000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
########################################
## NOTE: Important step --- Check MCMC diagnostics
# Display diagnostics of chain, for specified parameters:
source("DBDA2E-utilities.R") # For function diagMCMC()
parameterNames = varnames(codaSamples) # from coda package
for ( parName in parameterNames ) {
diagMCMC( codaObject=codaSamples , parName=parName )
}
############################### Step 7: Make Graphs
# ...
我正在尝试在 JAGS 中执行层次分析,从 Kruschke 的“做贝叶斯数据分析”第 9 章推断。我希望获得四个硬币正面比例的后验参数估计值(theta 的 1、2、3 和4),来自两个铸币厂,以及来自每个铸币厂的硬币平均偏差的估计值(铸币厂偏差:欧米茄)。我将每个造币厂的偏差 kappa 的可变性保持为常数。问题是我无法从第二个铸币厂获得后验估计,它似乎只是对先验进行抽样。有谁知道如何修复模型字符串文本(见下面的第 3 步)以便为第二个 mint 生成后验估计?
下面分析的完整脚本
library(rjags)
library(runjags)
library(coda)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
coins <- factor(c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23)))
mints <- factor(c(rep(1,17), rep(2,38)))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (i in 1:nFlips) {
flips[i] ~ dbern(theta[coins[i]])
}
# next the prior on theta
for (coins in 1:nCoins) {
theta[coins] ~ dbeta(omega[mints[coins]]*(kappa - 2) + 1, (1 - omega[mints[coins]])*(kappa - 2) + 1)
}
# next we specify the prior for the higher-level parameters on the mint, omega and kappa
for (mints in 1:nMints) {
omega[mints] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "simple",
model = "tempModelHier4CoinTwoMint.txt",
monitor = c("theta[1]", "theta[2]", "theta[3]", "theta[4]", "omega[1]", "omega[2]"),
data = dataList,
inits = initsList,
n.chains = 1,
adapt = 500,
burnin = 1000,
sample = 50000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
############################### Step 7: Make Graphs
df <- data.frame(as.matrix(codaSamples))
theta1 <- ggplot(df, aes(x = df$theta.1.)) + geom_density()
theta2 <- ggplot(df, aes(x = df$theta.2.)) + geom_density()
theta3 <- ggplot(df, aes(x = df$theta.3.)) + geom_density()
theta4 <- ggplot(df, aes(x = df$theta.4.)) + geom_density()
omega1 <- ggplot(df, aes(x = df$omega.1.)) + geom_density()
omega2 <- ggplot(df, aes(x = df$omega.2.)) + geom_density()
require(gridExtra)
ggsave("coinsAndMintsHier/hierPropFourCoinsTwoMints.pdf", grid.arrange(theta1, theta2, theta3, theta4, omega1, omega2, ncol = 2), device = "pdf", height = 30, width = 10, units = "cm")
问题是您在 theta
上设置先验时试图索引铸币厂的方式。在这种情况下只有 4 个 theta
,而不是 nFlips
。您的嵌套索引 mints[coins]
正在访问 mints
数据向量,而不是每个硬币所属的铸币厂向量。我在下面创建了一个更正版本。请注意向量的显式构造,该向量为每个硬币所属的铸币厂编制索引。另请注意,在模型规范中,每个 for 循环索引都有自己的索引名称,与数据名称不同。
graphics.off() # This closes all of R's graphics windows.
rm(list=ls()) # Careful! This clears all of R's memory!
library(runjags)
library(coda)
#library(rjags)
############### 1. Generate the data
flips <- c(sample(c(rep(1,3), rep(0,9))), # coin 1, mint 1, 12 flips total
sample(c(rep(1,1), rep(0,4))), # coin 2, mint 1, 5 flips total
sample(c(rep(1,10), rep(0,5))), # coin 1, mint 2, 15 flips
sample(c(rep(1,17), rep(0,6)))) # coin 2, mint 2, 23 flips
# NOTE: I got rid of `factor` because it was unneeded and got in the way
coins <- c(rep(1,12), rep(2,5), rep(3, 15), rep(4, 23))
# NOTE: I got rid of `factor` because it was unneeded and got in the way
mints <- c(rep(1,17), rep(2,38))
nFlips <- length(flips)
nCoins <- length(unique(coins))
nMints <- length(unique(mints))
# NEW: Create vector that specifies the mint of each coin. There must be a more
# elegant way to do this, but here is a logical brute-force approach. This
# assumes that coins are consecutively numbered from 1 to nCoins.
mintOfCoin = NULL
for ( cIdx in 1:nCoins ) {
mintOfCoin = c( mintOfCoin , unique(mints[coins==cIdx]) )
}
#################### 2. Pass data into a list
dataList <- list(
flips = flips,
coins = coins,
mints = mints,
nFlips = nFlips,
nCoins = nCoins,
nMints = nMints,
mintOfCoin = mintOfCoin # NOTE
)
################### 3. specify and save the model
modelString <- "
model{
# start with nested likelihood function
for (fIdx in 1:nFlips) {
flips[fIdx] ~ dbern( theta[coins[fIdx]] )
}
# next the prior on theta
# NOTE: Here we use the mintOfCoin index.
for (cIdx in 1:nCoins) {
theta[cIdx] ~ dbeta( omega[mintOfCoin[cIdx]]*(kappa - 2) + 1 ,
( 1 - omega[mintOfCoin[cIdx]])*(kappa - 2) + 1 )
}
# next we specify the prior for the higher-level parameters on the mint,
# omega and kappa
# NOTE: I changed the name of the mint index so it doesn't conflict with
# mints data vector.
for (mIdx in 1:nMints) {
omega[mIdx] ~ dbeta(2,2)
}
kappa <- 5
}
"
writeLines(modelString, "tempModelHier4CoinTwoMint.txt")
############################### Step 4: Initialise Chains
initsList <- list(theta1 = mean(flips[coins==1]),
theta2 = mean(flips[coins==2]),
theta3 = mean(flips[coins==3]),
theta4 = mean(flips[coins==4]),
omega1 = mean(c(mean(flips[coins==1]),
mean(flips[coins==2]))),
omega2 = mean(c(mean(flips[coins==3]),
mean(flips[coins==4]))))
initsList
############################### Step 5: Generate Chains
runJagsOut <- run.jags(method = "parallel",
model = "tempModelHier4CoinTwoMint.txt",
# NOTE: theta and omega are vectors:
monitor = c( "theta", "omega" , "kappa" ),
data = dataList,
#inits = initsList, # NOTE: Let JAGS initialize.
n.chains = 4, # NOTE: Not only 1 chain.
adapt = 500,
burnin = 1000,
sample = 10000,
thin = 1,
summarise = FALSE,
plots = FALSE)
############################### Step 6: Convert to Coda Object
codaSamples <- as.mcmc.list(runJagsOut)
head(codaSamples)
########################################
## NOTE: Important step --- Check MCMC diagnostics
# Display diagnostics of chain, for specified parameters:
source("DBDA2E-utilities.R") # For function diagMCMC()
parameterNames = varnames(codaSamples) # from coda package
for ( parName in parameterNames ) {
diagMCMC( codaObject=codaSamples , parName=parName )
}
############################### Step 7: Make Graphs
# ...