Metropolis Hastings 线性回归模型

Metropolis Hastings for linear regression model

我正在尝试在 C 中实现用于简单线性回归的 Metropolis-Hastings 算法(不使用其他库(boost、Eigen 等)并且不使用二维数组)*。为了更好地测试跟踪图的 code/evaluation,我通过保留尽可能多的 C 代码重写了 R 的代码(见下文)。

不幸的是,链条没有收敛。我想知道是否

  1. 执行本身有错误吗?
  2. "just" 提案分布的错误选择?

假设是后者,我正在考虑如何找到好的提案分布参数(目前我已经选择了任意值)以便算法工作。即使像本例那样具有三个参数,也很难找到合适的参数。如果说 Gibbs 抽样不是替代方案,通常如何处理这个问题?

*我想将此代码用于 Cuda

#### posterior distribution
logPostDensity <- function(x, y, a, b, s2, N)
{
sumSqError = 0.0
for(i in 1:N)
{
  sumSqError = sumSqError + (y[i] - (a + b*x[i]))^2
}
return(((-(N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))

}

# x = x values
# y = actual datapoints
# N = sample size
# m = length of chain
# sigmaProp = uniform proposal for sigma squared
# paramAProp = uniform proposal for intercept
# paramBProp = uniform proposal for slope

mcmcSampling <- function(x,y,N,m,sigmaProp,paramAProp,paramBProp)
{

  paramsA = vector("numeric",length=m) # intercept
  paramsB = vector("numeric",length=m) # slope
  s2 = vector("numeric",length=m) # sigma squared

  paramsA[1] = 0
  paramsB[1] = 0
  s2[1] = 1

  for(i in 2:m)
  {

    paramsA[i] = paramsA[i-1] + runif(1,-paramAProp,paramAProp)

    if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
        - logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
       < log(runif(1)))
    {
      paramsA[i] = paramsA[i-1]
    }

    paramsB[i] = paramsB[i-1] + runif(1,-paramBProp,paramBProp)

    if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
        - logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
       < log(runif(1)))
    {
      paramsB[i] = paramsB[i-1]
    }

    s2[i] = s2[i-1] + runif(1,-sigmaProp,sigmaProp)

    if((s2[i] < 0) || (logPostDensity(x,y,paramsA[i],paramsB[i],s2[i],N)
                       - logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N))
       < log(runif(1)))
    {
      s2[i] = s2[i-1]
    }


  }

  res = data.frame(paramsA,paramsB,s2)
  return(res)
}


#########################################

set.seed(321)
x <- runif(100)
y <- 2 + 5*x + rnorm(100)

summary(lm(y~x))


df <- mcmcSampling(x,y,10,5000,0.05,0.05,0.05)


par(mfrow=c(3,1))
plot(df$paramsA[3000:5000],type="l",main="intercept")
plot(df$paramsB[3000:5000],type="l",main="slope")
plot(df$s2[3000:5000],type="l",main="sigma")

截取部分 (paramsA) 有一个错误。其他一切都很好。我已经实施了 Alexey 在他的评论中提出的建议。解决方法如下:

pow <- function(x,y)
{
  return(x^y)
}


#### posterior distribution
posteriorDistribution <- function(x, y, a, b,s2,N)
{
sumSqError <- 0.0
for(i in 1:N)
{
  sumSqError <- sumSqError + pow(y[i] - (a + b*x[i]),2)
}
return((-((N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))

}

# x <- x values
# y <- actual datapoints
# N <- sample size
# m <- length of chain
# sigmaProposalWidth <- width of uniform proposal dist for sigma squared
# paramAProposalWidth <- width of uniform proposal dist for intercept
# paramBProposalWidth <- width of uniform proposal dist for slope

mcmcSampling <- function(x,y,N,m,sigmaProposalWidth,paramAProposalWidth,paramBProposalWidth)
{

  desiredAcc <- 0.44

  paramsA <- vector("numeric",length=m) # intercept
  paramsB <- vector("numeric",length=m) # slope
  s2 <- vector("numeric",length=m) # sigma squared

  paramsA[1] <- 0
  paramsB[1] <- 0
  s2[1] <- 1

  accATot <- 0
  accBTot <- 0
  accS2Tot <- 0

  for(i in 2:m)
  {
    paramsA[i] <- paramsA[i-1] + runif(1,-paramAProposalWidth,paramAProposalWidth)
    accA <- 1
    if((posteriorDistribution(x,y,paramsA[i],paramsB[i-1],s2[i-1],N) - 
        posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
    {
      paramsA[i] <- paramsA[i-1]
      accA <- 0
    }


    accATot <- accATot + accA

    paramsB[i] <- paramsB[i-1] + runif(1,-paramBProposalWidth,paramBProposalWidth)
    accB <- 1
    if((posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N) - 
        posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
    {
      paramsB[i] <- paramsB[i-1]
      accB <- 0
    }

    accBTot <- accBTot + accB

    s2[i] <- s2[i-1] + runif(1,-sigmaProposalWidth,sigmaProposalWidth)
    accS2 <- 1

    if((s2[i] < 0) || (posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i],N) - 
                       posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N)) < log(runif(1)))
    {
      s2[i] <- s2[i-1]
      accS2 <- 0
    }

    accS2Tot <- accS2Tot + accS2

    if(i%%100==0)
    {

      paramAProposalWidth <- paramAProposalWidth * ((accATot/100)/desiredAcc)
      paramBProposalWidth <- paramBProposalWidth * ((accBTot/100)/desiredAcc)
      sigmaProposalWidth <- sigmaProposalWidth * ((accS2Tot/100)/desiredAcc)

      accATot <-  0
      accBTot <-  0 
      accS2Tot <-  0

    }


  }
    res <- data.frame(paramsA,paramsB,s2)
    return(res)

}