R 中具有模拟最大似然的有序概率误差

Error in Ordered Probit with Simulated Maximum Likelihood in R

我 'trying' 在 R 中编写具有随机效应和模拟最大似然的有序 probit 模型。

我改编了 Chris Adolph (http://faculty.washington.edu/cadolph/?page=21)

的代码
set.seed(10234)
nobs <- 1000

x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)

#### Generate Halton Sequences
library("randtoolbox")
R <- 200
#a <- matrix(999, nrow=R, ncol=nobs)
a <- halton(n=nobs, dim=R, normal=T, init=T)

# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
    # preliminaries
    x <- as.matrix(x)
    os <- rep(1, nrow(x))
    x <- cbind(os, x)
    b <- param[1:ncol(x)]
    t2 <- param[(ncol(x)+1)]
    t3 <- param[(ncol(x)+2)]
    t4 <- param[(ncol(x)+3)]
    sigma_a <- param[ncol(x)+4]

    # probabilities and penalty function
    xb <- x %*% b %*% rep(1, R)
    asigma <- a * sigma_a

    p1 <- pnorm(- xb - asigma)
    if (t2 <= 0) {
        p2 <- -(abs(t2) * 10000)    # penalty function to keep t2>0
    } else {
        p2 <- pnorm(t2 - xb - asigma) - pnorm(- xb - asigma)
    }
    if (t3 <= t2) {
        p3 <- -((t2-t3)*10000)    # penalty to keep t3>t2
    } else {
        p3 <- pnorm(t3 - xb - asigma) - pnorm(t2 - xb - asigma)
    }
    if (t4 <= t3) {
        p4 <- -((t3 - t4) * 10000)
    } else {
        p4 <- pnorm(t4 - xb - asigma) - pnorm(t3 - xb - asigma) 
    }
    p5 <- 1 - pnorm(t4 - xb - asigma)

    p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
    p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
    p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
    p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
    p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)

    # -1 * log likelihood (optim is a minimizer)
    -sum(cbind(y==1, y==2, y==3, y==4, y==5) * cbind(p1, p2, p3, p4, p5))
}

# Use optim directly
ls.result <- lm(y~x)                    # use ls estimates as starting   values
stval <- c(ls.result$coefficients,1,2,3,2)  # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian = T)

但是,代码给了我以下错误: apply(p3, MARGIN = 1, FUN = sum) 错误: dim(X) 的长度必须为正 调用自:apply(p3, MARGIN = 1, FUN = sum)

我已经使用了 debug() 函数并且我能够 运行 单独地执行所有函数并且我可以在每个步骤中打印值。

问题是,只有当相应的参数值在允许的范围内时,才需要对您正在进行的 Halton 序列进行平均。请注意,我在每个 if 分支内移动了 log(apply(…)) 行:

set.seed(10234)
nobs <- 1000

x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)

#### Generate Halton Sequences
library("randtoolbox")
R <- 200
a <- halton(n=nobs, dim=R, normal=T, init=T)

# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
    # preliminaries
    x <- as.matrix(x)
    os <- rep(1, nrow(x))
    x <- cbind(os, x)
    b <- param[1:ncol(x)]
    t2 <- param[(ncol(x)+1)]
    t3 <- param[(ncol(x)+2)]
    t4 <- param[(ncol(x)+3)]
    sigma_a <- param[ncol(x)+4]

    # probabilities and penalty function
    xb <- x %*% b %*% rep(1, R)
    asigma <- a*sigma_a

    p1 <- pnorm(-xb-asigma)
    p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)

    if (t2 <= 0) {
        p2 <- -(abs(t2) * 10000)    # penalty function to keep t2>0
    } else {
        p2 <- pnorm(t2-xb-asigma)-pnorm(-xb-asigma)
        p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
    }
    if (t3 <= t2) {
        p3 <- -((t2-t3)*10000)    # penalty to keep t3>t2
    } else {
        p3 <- pnorm(t3-xb-asigma)-pnorm(t2-xb-asigma)   
        p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
    }
    if (t4 <= t3) {
        p4 <- -((t3-t4)*10000)
    } else {
        p4 <- pnorm(t4-xb-asigma)-pnorm(t3-xb-asigma) 
        p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
    }
    p5 <- 1 - pnorm(t4-xb-asigma)
    p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)

    # -1 * log likelihood (optim is a minimizer)
    -sum(cbind(y==1,y==2,y==3,y==4, y==5) * cbind(p1,p2,p3,p4,p5))
}

# Use optim directly
ls.result <- lm(y~x)                    # use ls estimates as starting   values
stval <- c(ls.result$coefficients,1,2,3,2)  # initial guesses

oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian=T, control = list(trace = 10, REPORT = 1))

然后成功运行:

...
iter  20 value 1567.966484
iter  21 value 1567.966434
iter  22 value 1567.966389
iter  23 value 1567.966350
iter  23 value 1567.966349
iter  23 value 1567.966345
final  value 1567.966345 
converged

生成结果:

pe <- oprobit.result$par                # point estimates
vc <- solve(oprobit.result$hessian)     # var-cov matrix
se <- sqrt(diag(vc))                    # standard errors
ll <- -oprobit.result$value             # likelihood at maximum

> pe
(Intercept)         xx1         xx2                                                 
 1.14039048 -0.05864677  0.04268965  0.77838577  1.43517145  2.23191376  0.02237956 
> vc
 (Intercept)           xx1           xx2                                                        
 (Intercept)  2.704159e-03 -7.945238e-05  4.507541e-07  1.520451e-03  1.970613e-03  2.215032e-03  9.274348e-04
 xx1         -7.945238e-05  7.300705e-03  1.165960e-04 -9.066118e-06 -6.078438e-05 -1.046191e-04 -2.009612e-04
 xx2          4.507541e-07  1.165960e-04  2.850668e-03  3.795273e-05  3.951004e-05  3.506606e-05 -2.686577e-04
1.520451e-03 -9.066118e-06  3.795273e-05  2.107875e-03  1.860727e-03  1.728905e-03  9.524469e-05
1.970613e-03 -6.078438e-05  3.951004e-05  1.860727e-03  2.955453e-03  2.576940e-03  1.960465e-04
2.215032e-03 -1.046191e-04  3.506606e-05  1.728905e-03  2.576940e-03  4.262996e-03  2.723117e-04
9.274348e-04 -2.009612e-04 -2.686577e-04  9.524469e-05  1.960465e-04  2.723117e-04  5.636931e-03
> se
(Intercept)         xx1         xx2                                                 
 0.05200153  0.08544417  0.05339165  0.04591160  0.05436408  0.06529162  0.07507950 
> ll
[1] -1567.966