人口 Monte Carlo 实施
Population Monte Carlo implementation
我正在尝试实现 this 论文(参见第 78 页图 3)中描述的 Population Monte Carlo 算法,用于一个简单模型(参见函数 model()
)参数使用 Python。不幸的是,该算法不起作用,我无法弄清楚出了什么问题。请参阅下面的实现。实际函数称为 abc()
。所有其他功能都可以看作是辅助功能,并且似乎工作正常。
为了检查算法是否有效,我首先生成观察数据,并将模型的唯一参数设置为param = 8。因此,ABC算法产生的后验应该以8为中心。事实并非如此我想知道为什么。
如有任何帮助或意见,我将不胜感激。
# imports
from math import exp
from math import log
from math import sqrt
import numpy as np
import random
from scipy.stats import norm
# globals
N = 300 # sample size
N_PARTICLE = 300 # number of particles
ITERS = 5 # number of decreasing thresholds
M = 10 # number of words to remember
MEAN = 7 # prior mean of parameter
SD = 2 # prior sd of parameter
def model(param):
recall_prob_all = 1/(1 + np.exp(M - param))
recall_prob_one_item = np.exp(np.log(recall_prob_all) / float(M))
return sum([1 if random.random() < recall_prob_one_item else 0 for item in range(M)])
## example
print "Output of model function: \n" + str(model(10)) + "\n"
# generate data from model
def generate(param):
out = np.empty(N)
for i in range(N):
out[i] = model(param)
return out
## example
print "Output of generate function: \n" + str(generate(10)) + "\n"
# distance function (sum of squared error)
def distance(obsData,simData):
out = 0.0
for i in range(len(obsData)):
out += (obsData[i] - simData[i]) * (obsData[i] - simData[i])
return out
## example
print "Output of distance function: \n" + str(distance([1,2,3],[4,5,6])) + "\n"
# sample new particles based on weights
def sample(particles, weights):
return np.random.choice(particles, 1, p=weights)
## example
print "Output of sample function: \n" + str(sample([1,2,3],[0.1,0.1,0.8])) + "\n"
# perturbance function
def perturb(variance):
return np.random.normal(0,sqrt(variance),1)[0]
## example
print "Output of perturb function: \n" + str(perturb(1)) + "\n"
# compute new weight
def computeWeight(prevWeights,prevParticles,prevVariance,currentParticle):
denom = 0.0
proposal = norm(currentParticle, sqrt(prevVariance))
prior = norm(MEAN,SD)
for i in range(len(prevParticles)):
denom += prevWeights[i] * proposal.pdf(prevParticles[i])
return prior.pdf(currentParticle)/denom
## example
prevWeights = [0.2,0.3,0.5]
prevParticles = [1,2,3]
prevVariance = 1
currentParticle = 2.5
print "Output of computeWeight function: \n" + str(computeWeight(prevWeights,prevParticles,prevVariance,currentParticle)) + "\n"
# normalize weights
def normalize(weights):
return weights/np.sum(weights)
## example
print "Output of normalize function: \n" + str(normalize([3.,5.,9.])) + "\n"
# sampling from prior distribution
def rprior():
return np.random.normal(MEAN,SD,1)[0]
## example
print "Output of rprior function: \n" + str(rprior()) + "\n"
# ABC using Population Monte Carlo sampling
def abc(obsData,eps):
draw = 0
Distance = 1e9
variance = np.empty(ITERS)
simData = np.empty(N)
particles = np.empty([ITERS,N_PARTICLE])
weights = np.empty([ITERS,N_PARTICLE])
for t in range(ITERS):
if t == 0:
for i in range(N_PARTICLE):
while(Distance > eps[t]):
draw = rprior()
simData = generate(draw)
Distance = distance(obsData,simData)
Distance = 1e9
particles[t][i] = draw
weights[t][i] = 1./N_PARTICLE
variance[t] = 2 * np.var(particles[t])
continue
for i in range(N_PARTICLE):
while(Distance > eps[t]):
draw = sample(particles[t-1],weights[t-1])
draw += perturb(variance[t-1])
simData = generate(draw)
Distance = distance(obsData,simData)
Distance = 1e9
particles[t][i] = draw
weights[t][i] = computeWeight(weights[t-1],particles[t-1],variance[t-1],particles[t][i])
weights[t] = normalize(weights[t])
variance[t] = 2 * np.var(particles[t])
return particles[ITERS-1]
true_param = 9
obsData = generate(true_param)
eps = [15000,10000,8000,6000,3000]
posterior = abc(obsData,eps)
#print posterior
我在寻找 PMC 算法的 pythonic 实现时偶然发现了这个问题,因为非常巧合的是,我目前正在将这篇确切论文中的技术应用到我自己的研究中。
你能post你得到的结果吗?我的猜测是 1) 您使用的距离函数选择不当(and/or 相似性阈值),或者 2) 您没有使用足够的粒子。我在这里可能是错的(我不是很精通样本统计),但是你的距离函数隐含地向我暗示你的 运行dom 的排序很重要。我必须更多地考虑这个以确定它是否真的对收敛特性有任何影响(可能不会),但你为什么不简单地使用平均值或中位数作为你的样本统计量?
我 运行 你的代码有 1000 个粒子,真实参数值为 8,同时使用样本均值之间的绝对差作为我的距离函数,进行三次迭代,epsilons 为 [0.5, 0.3, 0.1] ;我估计的 posterior 分布的峰值似乎接近 8,就像它在每次迭代中应该的那样,同时总体方差减少。请注意,仍然存在明显的向右偏差,但这是因为模型的不对称性(8 或更小的参数值永远不会导致超过 8 个观察到的成功,而所有大于 8 的参数值都可以,导致向右分布中的偏态)。
这是我的结果图:
我正在尝试实现 this 论文(参见第 78 页图 3)中描述的 Population Monte Carlo 算法,用于一个简单模型(参见函数 model()
)参数使用 Python。不幸的是,该算法不起作用,我无法弄清楚出了什么问题。请参阅下面的实现。实际函数称为 abc()
。所有其他功能都可以看作是辅助功能,并且似乎工作正常。
为了检查算法是否有效,我首先生成观察数据,并将模型的唯一参数设置为param = 8。因此,ABC算法产生的后验应该以8为中心。事实并非如此我想知道为什么。
如有任何帮助或意见,我将不胜感激。
# imports
from math import exp
from math import log
from math import sqrt
import numpy as np
import random
from scipy.stats import norm
# globals
N = 300 # sample size
N_PARTICLE = 300 # number of particles
ITERS = 5 # number of decreasing thresholds
M = 10 # number of words to remember
MEAN = 7 # prior mean of parameter
SD = 2 # prior sd of parameter
def model(param):
recall_prob_all = 1/(1 + np.exp(M - param))
recall_prob_one_item = np.exp(np.log(recall_prob_all) / float(M))
return sum([1 if random.random() < recall_prob_one_item else 0 for item in range(M)])
## example
print "Output of model function: \n" + str(model(10)) + "\n"
# generate data from model
def generate(param):
out = np.empty(N)
for i in range(N):
out[i] = model(param)
return out
## example
print "Output of generate function: \n" + str(generate(10)) + "\n"
# distance function (sum of squared error)
def distance(obsData,simData):
out = 0.0
for i in range(len(obsData)):
out += (obsData[i] - simData[i]) * (obsData[i] - simData[i])
return out
## example
print "Output of distance function: \n" + str(distance([1,2,3],[4,5,6])) + "\n"
# sample new particles based on weights
def sample(particles, weights):
return np.random.choice(particles, 1, p=weights)
## example
print "Output of sample function: \n" + str(sample([1,2,3],[0.1,0.1,0.8])) + "\n"
# perturbance function
def perturb(variance):
return np.random.normal(0,sqrt(variance),1)[0]
## example
print "Output of perturb function: \n" + str(perturb(1)) + "\n"
# compute new weight
def computeWeight(prevWeights,prevParticles,prevVariance,currentParticle):
denom = 0.0
proposal = norm(currentParticle, sqrt(prevVariance))
prior = norm(MEAN,SD)
for i in range(len(prevParticles)):
denom += prevWeights[i] * proposal.pdf(prevParticles[i])
return prior.pdf(currentParticle)/denom
## example
prevWeights = [0.2,0.3,0.5]
prevParticles = [1,2,3]
prevVariance = 1
currentParticle = 2.5
print "Output of computeWeight function: \n" + str(computeWeight(prevWeights,prevParticles,prevVariance,currentParticle)) + "\n"
# normalize weights
def normalize(weights):
return weights/np.sum(weights)
## example
print "Output of normalize function: \n" + str(normalize([3.,5.,9.])) + "\n"
# sampling from prior distribution
def rprior():
return np.random.normal(MEAN,SD,1)[0]
## example
print "Output of rprior function: \n" + str(rprior()) + "\n"
# ABC using Population Monte Carlo sampling
def abc(obsData,eps):
draw = 0
Distance = 1e9
variance = np.empty(ITERS)
simData = np.empty(N)
particles = np.empty([ITERS,N_PARTICLE])
weights = np.empty([ITERS,N_PARTICLE])
for t in range(ITERS):
if t == 0:
for i in range(N_PARTICLE):
while(Distance > eps[t]):
draw = rprior()
simData = generate(draw)
Distance = distance(obsData,simData)
Distance = 1e9
particles[t][i] = draw
weights[t][i] = 1./N_PARTICLE
variance[t] = 2 * np.var(particles[t])
continue
for i in range(N_PARTICLE):
while(Distance > eps[t]):
draw = sample(particles[t-1],weights[t-1])
draw += perturb(variance[t-1])
simData = generate(draw)
Distance = distance(obsData,simData)
Distance = 1e9
particles[t][i] = draw
weights[t][i] = computeWeight(weights[t-1],particles[t-1],variance[t-1],particles[t][i])
weights[t] = normalize(weights[t])
variance[t] = 2 * np.var(particles[t])
return particles[ITERS-1]
true_param = 9
obsData = generate(true_param)
eps = [15000,10000,8000,6000,3000]
posterior = abc(obsData,eps)
#print posterior
我在寻找 PMC 算法的 pythonic 实现时偶然发现了这个问题,因为非常巧合的是,我目前正在将这篇确切论文中的技术应用到我自己的研究中。
你能post你得到的结果吗?我的猜测是 1) 您使用的距离函数选择不当(and/or 相似性阈值),或者 2) 您没有使用足够的粒子。我在这里可能是错的(我不是很精通样本统计),但是你的距离函数隐含地向我暗示你的 运行dom 的排序很重要。我必须更多地考虑这个以确定它是否真的对收敛特性有任何影响(可能不会),但你为什么不简单地使用平均值或中位数作为你的样本统计量?
我 运行 你的代码有 1000 个粒子,真实参数值为 8,同时使用样本均值之间的绝对差作为我的距离函数,进行三次迭代,epsilons 为 [0.5, 0.3, 0.1] ;我估计的 posterior 分布的峰值似乎接近 8,就像它在每次迭代中应该的那样,同时总体方差减少。请注意,仍然存在明显的向右偏差,但这是因为模型的不对称性(8 或更小的参数值永远不会导致超过 8 个观察到的成功,而所有大于 8 的参数值都可以,导致向右分布中的偏态)。
这是我的结果图: