用于估计负二项分布的 MCMC
MCMC for estimating negative binomial distribution
我想使用 MCMC Metropolis-Hastings 算法估计负二项分布的参数。换句话说,我有样本:
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
我想编写算法来估计大小参数和概率参数。
我目前的工作
我将大小的先验分布定义为泊松分布:
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
并且概率的先验分布在 [0, 1] 上是均匀的:
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = T))
}
此外,为了简单起见,我定义了对数似然函数和联合概率函数:
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
data <- y
return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}
最后,整个算法:
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
else chain[i + 1, ] <- chain[i, ]
}
return(chain)
}
问题
我遇到的问题是,当我 运行 它的起始值甚至非常接近正确值时:
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
我会得到明显错误的后验分布。例如
> colMeans(res)
[1] 11.963018 0.994533
如您所见,size 非常靠近点 12,probability 位于点 1。
你知道这些现象的成因吗?
将 loglikelihood
中的 dnorm
更改为 dnbinom
并修复 prob
的提案,使其不会超出 (0,1):
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = TRUE))
}
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
}
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) {
chain[i + 1, ] <- c(proposal_r, proposal_prob)
} else {
chain[i + 1, ] <- chain[i, ]
}
}
return(chain)
}
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
colMeans(res)
#> [1] 3.1009495 0.1988177
我想使用 MCMC Metropolis-Hastings 算法估计负二项分布的参数。换句话说,我有样本:
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
我想编写算法来估计大小参数和概率参数。
我目前的工作
我将大小的先验分布定义为泊松分布:
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
并且概率的先验分布在 [0, 1] 上是均匀的:
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = T))
}
此外,为了简单起见,我定义了对数似然函数和联合概率函数:
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnorm(data, mean = r, sd = prob, log = T))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
data <- y
return(loglikelihood(data, r, prob) + prior_r(r) + prior_prob(prob))
}
最后,整个算法:
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = -0.2, max = 0.2)
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) chain[i + 1, ] <- c(proposal_r, proposal_prob)
else chain[i + 1, ] <- chain[i, ]
}
return(chain)
}
问题
我遇到的问题是,当我 运行 它的起始值甚至非常接近正确值时:
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
我会得到明显错误的后验分布。例如
> colMeans(res)
[1] 11.963018 0.994533
如您所见,size 非常靠近点 12,probability 位于点 1。
你知道这些现象的成因吗?
将 loglikelihood
中的 dnorm
更改为 dnbinom
并修复 prob
的提案,使其不会超出 (0,1):
set.seed(42)
y <- rnbinom(20, size = 3, prob = 0.2)
prior_r <- function(r) {
return(dpois(r, lambda = 2, log = T))
}
prior_prob <- function(prob) {
return(dunif(prob, min = 0, max = 1, log = TRUE))
}
loglikelihood <- function(data, r, prob) {
loglikelihoodValue <- sum(dnbinom(data, size = r, prob = prob, log = TRUE))
return(loglikelihoodValue)
}
joint <- function(r, prob) {
return(loglikelihood(y, r, prob) + prior_r(r) + prior_prob(prob))
}
run_mcmc <- function(startvalue, iterations) {
chain <- array(dim = c(iterations + 1, 2))
chain[1, ] <- startvalue
for (i in 1:iterations) {
proposal_r <- rpois(1, lambda = chain[i, 1])
proposal_prob <- chain[i, 2] + runif(1, min = max(-0.2, -chain[i,2]), max = min(0.2, 1 - chain[i,2]))
quotient <- joint(proposal_r, proposal_prob) - joint(chain[i, 1], chain[i, 2])
if (runif(1, 0, 1) < min(1, exp(quotient))) {
chain[i + 1, ] <- c(proposal_r, proposal_prob)
} else {
chain[i + 1, ] <- chain[i, ]
}
}
return(chain)
}
iterations <- 2000
startvalue <- c(4, 0.25)
res <- run_mcmc(startvalue, iterations)
colMeans(res)
#> [1] 3.1009495 0.1988177