R 中具有模拟最大似然的有序概率误差
Error in Ordered Probit with Simulated Maximum Likelihood in R
我 'trying' 在 R 中编写具有随机效应和模拟最大似然的有序 probit 模型。
我改编了 Chris Adolph (http://faculty.washington.edu/cadolph/?page=21)
的代码
set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
#a <- matrix(999, nrow=R, ncol=nobs)
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a * sigma_a
p1 <- pnorm(- xb - asigma)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2 - xb - asigma) - pnorm(- xb - asigma)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3 - xb - asigma) - pnorm(t2 - xb - asigma)
}
if (t4 <= t3) {
p4 <- -((t3 - t4) * 10000)
} else {
p4 <- pnorm(t4 - xb - asigma) - pnorm(t3 - xb - asigma)
}
p5 <- 1 - pnorm(t4 - xb - asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1, y==2, y==3, y==4, y==5) * cbind(p1, p2, p3, p4, p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian = T)
但是,代码给了我以下错误:
apply(p3, MARGIN = 1, FUN = sum) 错误:
dim(X) 的长度必须为正
调用自:apply(p3, MARGIN = 1, FUN = sum)
我已经使用了 debug() 函数并且我能够 运行 单独地执行所有函数并且我可以在每个步骤中打印值。
问题是,只有当相应的参数值在允许的范围内时,才需要对您正在进行的 Halton 序列进行平均。请注意,我在每个 if
分支内移动了 log(apply(…))
行:
set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a*sigma_a
p1 <- pnorm(-xb-asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2-xb-asigma)-pnorm(-xb-asigma)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3-xb-asigma)-pnorm(t2-xb-asigma)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
}
if (t4 <= t3) {
p4 <- -((t3-t4)*10000)
} else {
p4 <- pnorm(t4-xb-asigma)-pnorm(t3-xb-asigma)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
}
p5 <- 1 - pnorm(t4-xb-asigma)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1,y==2,y==3,y==4, y==5) * cbind(p1,p2,p3,p4,p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian=T, control = list(trace = 10, REPORT = 1))
然后成功运行:
...
iter 20 value 1567.966484
iter 21 value 1567.966434
iter 22 value 1567.966389
iter 23 value 1567.966350
iter 23 value 1567.966349
iter 23 value 1567.966345
final value 1567.966345
converged
生成结果:
pe <- oprobit.result$par # point estimates
vc <- solve(oprobit.result$hessian) # var-cov matrix
se <- sqrt(diag(vc)) # standard errors
ll <- -oprobit.result$value # likelihood at maximum
> pe
(Intercept) xx1 xx2
1.14039048 -0.05864677 0.04268965 0.77838577 1.43517145 2.23191376 0.02237956
> vc
(Intercept) xx1 xx2
(Intercept) 2.704159e-03 -7.945238e-05 4.507541e-07 1.520451e-03 1.970613e-03 2.215032e-03 9.274348e-04
xx1 -7.945238e-05 7.300705e-03 1.165960e-04 -9.066118e-06 -6.078438e-05 -1.046191e-04 -2.009612e-04
xx2 4.507541e-07 1.165960e-04 2.850668e-03 3.795273e-05 3.951004e-05 3.506606e-05 -2.686577e-04
1.520451e-03 -9.066118e-06 3.795273e-05 2.107875e-03 1.860727e-03 1.728905e-03 9.524469e-05
1.970613e-03 -6.078438e-05 3.951004e-05 1.860727e-03 2.955453e-03 2.576940e-03 1.960465e-04
2.215032e-03 -1.046191e-04 3.506606e-05 1.728905e-03 2.576940e-03 4.262996e-03 2.723117e-04
9.274348e-04 -2.009612e-04 -2.686577e-04 9.524469e-05 1.960465e-04 2.723117e-04 5.636931e-03
> se
(Intercept) xx1 xx2
0.05200153 0.08544417 0.05339165 0.04591160 0.05436408 0.06529162 0.07507950
> ll
[1] -1567.966
我 'trying' 在 R 中编写具有随机效应和模拟最大似然的有序 probit 模型。
我改编了 Chris Adolph (http://faculty.washington.edu/cadolph/?page=21)
的代码set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
#a <- matrix(999, nrow=R, ncol=nobs)
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a * sigma_a
p1 <- pnorm(- xb - asigma)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2 - xb - asigma) - pnorm(- xb - asigma)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3 - xb - asigma) - pnorm(t2 - xb - asigma)
}
if (t4 <= t3) {
p4 <- -((t3 - t4) * 10000)
} else {
p4 <- pnorm(t4 - xb - asigma) - pnorm(t3 - xb - asigma)
}
p5 <- 1 - pnorm(t4 - xb - asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1, y==2, y==3, y==4, y==5) * cbind(p1, p2, p3, p4, p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian = T)
但是,代码给了我以下错误: apply(p3, MARGIN = 1, FUN = sum) 错误: dim(X) 的长度必须为正 调用自:apply(p3, MARGIN = 1, FUN = sum)
我已经使用了 debug() 函数并且我能够 运行 单独地执行所有函数并且我可以在每个步骤中打印值。
问题是,只有当相应的参数值在允许的范围内时,才需要对您正在进行的 Halton 序列进行平均。请注意,我在每个 if
分支内移动了 log(apply(…))
行:
set.seed(10234)
nobs <- 1000
x1 <- rnorm(nobs)*.15^.5
x2 <- rnorm(nobs)*.35^.5
z <- rnorm(nobs)*.25^.5
y <- round(runif(nobs, 1,5), 0)
x <- cbind(x1, x2)
#### Generate Halton Sequences
library("randtoolbox")
R <- 200
a <- halton(n=nobs, dim=R, normal=T, init=T)
# Likelihood for 5 category ordered probit
llk.oprobit5 <- function(param, x, y) {
# preliminaries
x <- as.matrix(x)
os <- rep(1, nrow(x))
x <- cbind(os, x)
b <- param[1:ncol(x)]
t2 <- param[(ncol(x)+1)]
t3 <- param[(ncol(x)+2)]
t4 <- param[(ncol(x)+3)]
sigma_a <- param[ncol(x)+4]
# probabilities and penalty function
xb <- x %*% b %*% rep(1, R)
asigma <- a*sigma_a
p1 <- pnorm(-xb-asigma)
p1 <- log(apply(p1, MARGIN=1, FUN=sum)/R)
if (t2 <= 0) {
p2 <- -(abs(t2) * 10000) # penalty function to keep t2>0
} else {
p2 <- pnorm(t2-xb-asigma)-pnorm(-xb-asigma)
p2 <- log(apply(p2, MARGIN=1, FUN=sum)/R)
}
if (t3 <= t2) {
p3 <- -((t2-t3)*10000) # penalty to keep t3>t2
} else {
p3 <- pnorm(t3-xb-asigma)-pnorm(t2-xb-asigma)
p3 <- log(apply(p3, MARGIN=1, FUN=sum)/R)
}
if (t4 <= t3) {
p4 <- -((t3-t4)*10000)
} else {
p4 <- pnorm(t4-xb-asigma)-pnorm(t3-xb-asigma)
p4 <- log(apply(p4, MARGIN=1, FUN=sum)/R)
}
p5 <- 1 - pnorm(t4-xb-asigma)
p5 <- log(apply(p5, MARGIN=1, FUN=sum)/R)
# -1 * log likelihood (optim is a minimizer)
-sum(cbind(y==1,y==2,y==3,y==4, y==5) * cbind(p1,p2,p3,p4,p5))
}
# Use optim directly
ls.result <- lm(y~x) # use ls estimates as starting values
stval <- c(ls.result$coefficients,1,2,3,2) # initial guesses
oprobit.result <- optim(stval, llk.oprobit5, method="BFGS", x=x, y=y, hessian=T, control = list(trace = 10, REPORT = 1))
然后成功运行:
...
iter 20 value 1567.966484
iter 21 value 1567.966434
iter 22 value 1567.966389
iter 23 value 1567.966350
iter 23 value 1567.966349
iter 23 value 1567.966345
final value 1567.966345
converged
生成结果:
pe <- oprobit.result$par # point estimates
vc <- solve(oprobit.result$hessian) # var-cov matrix
se <- sqrt(diag(vc)) # standard errors
ll <- -oprobit.result$value # likelihood at maximum
> pe
(Intercept) xx1 xx2
1.14039048 -0.05864677 0.04268965 0.77838577 1.43517145 2.23191376 0.02237956
> vc
(Intercept) xx1 xx2
(Intercept) 2.704159e-03 -7.945238e-05 4.507541e-07 1.520451e-03 1.970613e-03 2.215032e-03 9.274348e-04
xx1 -7.945238e-05 7.300705e-03 1.165960e-04 -9.066118e-06 -6.078438e-05 -1.046191e-04 -2.009612e-04
xx2 4.507541e-07 1.165960e-04 2.850668e-03 3.795273e-05 3.951004e-05 3.506606e-05 -2.686577e-04
1.520451e-03 -9.066118e-06 3.795273e-05 2.107875e-03 1.860727e-03 1.728905e-03 9.524469e-05
1.970613e-03 -6.078438e-05 3.951004e-05 1.860727e-03 2.955453e-03 2.576940e-03 1.960465e-04
2.215032e-03 -1.046191e-04 3.506606e-05 1.728905e-03 2.576940e-03 4.262996e-03 2.723117e-04
9.274348e-04 -2.009612e-04 -2.686577e-04 9.524469e-05 1.960465e-04 2.723117e-04 5.636931e-03
> se
(Intercept) xx1 xx2
0.05200153 0.08544417 0.05339165 0.04591160 0.05436408 0.06529162 0.07507950
> ll
[1] -1567.966