MLE 不收敛于 R 中的 "spelled out" dnorm
MLE does not converge with "spelled out" dnorm in R
我正在使用对数似然函数估计模型参数。对于标准正态密度函数,我曾经使用内置函数 "dnorm" 并自己指定了一次该函数。奇怪的是,使用 dnorm 会导致收敛,而另一种方法不会:
### Functions:
u <- function(x,n)
{
ifelse(n!=1, util <- x^(1-n)/(1-n), util <- log(x))
return(util)
}
u.inv <- function(x,n)
{
ifelse(n !=1, inv.util <- ((1-n)*(x))^(1/(1-n)), inv.util <- exp(x))
return(inv.util)
}
v = function(x,n){return(1/(u(maxz,n)-u(minz,n))*(u(x,n)-u(minz,n)))}
v.inv = function(x,n){return(u.inv(x*(u(maxz,n)-u(minz,n))+u(minz,n),n))}
w <- function(p,a,b){return(exp(-b*(-log(p))^(1-a)))}
### Data
z1 <- c(0.1111111, 0.1037037, 0.1222222, 0.1111111, 0.1074074, 0.1666667, 0.1333333, 0.2000000, 0.1333333, 0.1074074,
0.1037037, 0.1111111, 0.1333333, 0.2000000, 0.1222222, 0.1111111, 0.1666667, 0.1333333, 0.1111111, 0.1333333,
0.1111111, 0.1666667, 0.1074074, 0.1333333, 0.1222222, 0.2000000, 0.1037037)
z2 <- c(0.08888889, 0.06666667, 0.07777778, 0.00000000, 0.03333333, 0.09259259, 0.09629630, 0.08888889, 0.06666667,
0.03333333, 0.06666667, 0.08888889, 0.06666667, 0.08888889, 0.07777778, 0.00000000, 0.09259259, 0.09629630,
0.00000000, 0.09629630, 0.08888889, 0.09259259, 0.03333333, 0.06666667, 0.07777778, 0.08888889, 0.06666667)
p <- c(0.5, 0.9, 0.5, 0.9, 0.9, 0.1, 0.1, 0.1, 0.5, 0.9, 0.9, 0.5, 0.5, 0.1, 0.5, 0.9, 0.1, 0.1, 0.9, 0.1, 0.5, 0.1, 0.9, 0.5, 0.5, 0.1, 0.9)
zce <- c(0.11055556, 0.10277778, 0.11000000, 0.10833333, 0.10185185, 0.11666667, 0.13240741, 0.14166667, 0.13166667,
0.07222222, 0.08796296, 0.09944444, 0.09500000,0.10833333, 0.09444444, 0.05277778, 0.10925926, 0.11759259,
0.05833333, 0.10277778, 0.09277778, 0.10925926, 0.06111111, 0.08833333, 0.09222222, 0.12500000, 0.09166667)
maxz = 135
minz = 0
### Using dnorm:
LL <- function(n,a,b,s)
{
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ll = dnorm(res, 0, s,log=T)
return(-sum(ll))
}
### mle()
fit <- mle(LL,
start = list(n = 0.1,a=0.1,b=0.1,s=0.1),
method = "L-BFGS-B",
lower = list(n=-Inf,a = -Inf, b = 0.0001, s=0.0001),
upper = list(n=0.9999,a = 0.9999, b = Inf, s=Inf),
control = list(maxit = 500, ndeps = rep(0.000001,4)),
nobs=length(z1)
)
### Resulting coefficients saved in "fit"
Coefficients:
n a b s
0.16533414 0.65254314 0.78727084 0.01475997
现在使用标准法线的拼写日志而不是 dnorm(..., log=T):
ldens <- function(x,mu,sig){log((1/(sig*sqrt(2*pi)))*exp(-((x-mu)^2/(2*sig^2))))}
LL.ldens <- function(n,a,b,s)
{
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ll = ldens(x= res, mu=0, sig = s)
return(-sum(ll))
}
fit <- mle(LL.ldens,
start = list(n = 0.1,a=0.1,b=0.1,s=0.1),
method = "L-BFGS-B",
lower = list(n=-Inf,a = -Inf, b = 0.0001, s=0.0001),
upper = list(n=0.9999,a = 0.9999, b = Inf, s=Inf),
control = list(maxit = 500, ndeps = rep(0.000001,4),trace =6),
nobs=length(z1)
)
生成 "finite values error" 消息:
Error in optim(start, f, method = method, hessian = TRUE, ...) :
L-BFGS-B needs finite values of 'fn'
问题是,我不明白为什么。如果我采用起始值生成 mle 将使用的 "res" 的第一个向量,我将使用自己的规范获得对数密度向量。更重要的是,这似乎与我在使用 dnorm(... log=T):
时得到的向量相匹配
n = a = b = s = 0.1
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ldens(x= res, mu=0, sig = s)
[1] 1.383596 1.383637 1.379527 1.383579 1.382617 1.320485 1.381703 1.317098 1.383168 1.325277 1.372026 1.378537 1.327294 1.139934 1.353307 1.222810 1.291415 [18] 1.379966 1.252776 1.356281 1.369575 1.291415 1.281141 1.302690 1.347586 1.242405 1.376986
dnorm(res, 0, s, log=T)
[1] 1.383596 1.383637 1.379527 1.383579 1.382617 1.320485 1.381703 1.317098 1.383168 1.325277 1.372026 1.378537 1.327294 1.139934 1.353307 1.222810 1.291415 [18] 1.379966 1.252776 1.356281 1.369575 1.291415 1.281141 1.302690 1.347586 1.242405 1.376986
有趣的是,当使用“==”测试相等性时,这些数字并不相同(一个除外):
ldens(x= res,mu=0,sig = s) == dnorm(res, 0, s,log=T)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [26] TRUE FALSE
检查更高精度的数字发现它们与另一个略有不同:
sprintf("%.54f",ldens(res,0,s))[1]
"1.383596381246589235303190434933640062808990478515625000"
sprintf("%.54f",dnorm(res, 0, s,log=T))[1]
"1.383596381246589013258585509902331978082656860351562500"
但这不可能是使用 dnorm 导致收敛而另一个不收敛的原因吗?
更改函数以捕获错误:
ldens <- function(x,mu,sig){v <- log((1/(sig*sqrt(2*pi)))*exp(-((x-mu)^2/(2*sig^2)))); if(is.infinite(sum(v))) browser(); v}
您将能够看到导致问题的参数值 - 指数部分为零,指数计算为 0
,因此对数 returns -Inf
.内部函数 dnorm
可能使用数学上等效的指数分布版本,它具有更好的浮点运算特性。
我正在使用对数似然函数估计模型参数。对于标准正态密度函数,我曾经使用内置函数 "dnorm" 并自己指定了一次该函数。奇怪的是,使用 dnorm 会导致收敛,而另一种方法不会:
### Functions:
u <- function(x,n)
{
ifelse(n!=1, util <- x^(1-n)/(1-n), util <- log(x))
return(util)
}
u.inv <- function(x,n)
{
ifelse(n !=1, inv.util <- ((1-n)*(x))^(1/(1-n)), inv.util <- exp(x))
return(inv.util)
}
v = function(x,n){return(1/(u(maxz,n)-u(minz,n))*(u(x,n)-u(minz,n)))}
v.inv = function(x,n){return(u.inv(x*(u(maxz,n)-u(minz,n))+u(minz,n),n))}
w <- function(p,a,b){return(exp(-b*(-log(p))^(1-a)))}
### Data
z1 <- c(0.1111111, 0.1037037, 0.1222222, 0.1111111, 0.1074074, 0.1666667, 0.1333333, 0.2000000, 0.1333333, 0.1074074,
0.1037037, 0.1111111, 0.1333333, 0.2000000, 0.1222222, 0.1111111, 0.1666667, 0.1333333, 0.1111111, 0.1333333,
0.1111111, 0.1666667, 0.1074074, 0.1333333, 0.1222222, 0.2000000, 0.1037037)
z2 <- c(0.08888889, 0.06666667, 0.07777778, 0.00000000, 0.03333333, 0.09259259, 0.09629630, 0.08888889, 0.06666667,
0.03333333, 0.06666667, 0.08888889, 0.06666667, 0.08888889, 0.07777778, 0.00000000, 0.09259259, 0.09629630,
0.00000000, 0.09629630, 0.08888889, 0.09259259, 0.03333333, 0.06666667, 0.07777778, 0.08888889, 0.06666667)
p <- c(0.5, 0.9, 0.5, 0.9, 0.9, 0.1, 0.1, 0.1, 0.5, 0.9, 0.9, 0.5, 0.5, 0.1, 0.5, 0.9, 0.1, 0.1, 0.9, 0.1, 0.5, 0.1, 0.9, 0.5, 0.5, 0.1, 0.9)
zce <- c(0.11055556, 0.10277778, 0.11000000, 0.10833333, 0.10185185, 0.11666667, 0.13240741, 0.14166667, 0.13166667,
0.07222222, 0.08796296, 0.09944444, 0.09500000,0.10833333, 0.09444444, 0.05277778, 0.10925926, 0.11759259,
0.05833333, 0.10277778, 0.09277778, 0.10925926, 0.06111111, 0.08833333, 0.09222222, 0.12500000, 0.09166667)
maxz = 135
minz = 0
### Using dnorm:
LL <- function(n,a,b,s)
{
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ll = dnorm(res, 0, s,log=T)
return(-sum(ll))
}
### mle()
fit <- mle(LL,
start = list(n = 0.1,a=0.1,b=0.1,s=0.1),
method = "L-BFGS-B",
lower = list(n=-Inf,a = -Inf, b = 0.0001, s=0.0001),
upper = list(n=0.9999,a = 0.9999, b = Inf, s=Inf),
control = list(maxit = 500, ndeps = rep(0.000001,4)),
nobs=length(z1)
)
### Resulting coefficients saved in "fit"
Coefficients:
n a b s
0.16533414 0.65254314 0.78727084 0.01475997
现在使用标准法线的拼写日志而不是 dnorm(..., log=T):
ldens <- function(x,mu,sig){log((1/(sig*sqrt(2*pi)))*exp(-((x-mu)^2/(2*sig^2))))}
LL.ldens <- function(n,a,b,s)
{
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ll = ldens(x= res, mu=0, sig = s)
return(-sum(ll))
}
fit <- mle(LL.ldens,
start = list(n = 0.1,a=0.1,b=0.1,s=0.1),
method = "L-BFGS-B",
lower = list(n=-Inf,a = -Inf, b = 0.0001, s=0.0001),
upper = list(n=0.9999,a = 0.9999, b = Inf, s=Inf),
control = list(maxit = 500, ndeps = rep(0.000001,4),trace =6),
nobs=length(z1)
)
生成 "finite values error" 消息:
Error in optim(start, f, method = method, hessian = TRUE, ...) :
L-BFGS-B needs finite values of 'fn'
问题是,我不明白为什么。如果我采用起始值生成 mle 将使用的 "res" 的第一个向量,我将使用自己的规范获得对数密度向量。更重要的是,这似乎与我在使用 dnorm(... log=T):
时得到的向量相匹配n = a = b = s = 0.1
V = (v(z1,n)-v(z2,n))*w(p,a,b) + v(z2,n)
res = zce - v.inv(V,n)
ldens(x= res, mu=0, sig = s)
[1] 1.383596 1.383637 1.379527 1.383579 1.382617 1.320485 1.381703 1.317098 1.383168 1.325277 1.372026 1.378537 1.327294 1.139934 1.353307 1.222810 1.291415 [18] 1.379966 1.252776 1.356281 1.369575 1.291415 1.281141 1.302690 1.347586 1.242405 1.376986
dnorm(res, 0, s, log=T)
[1] 1.383596 1.383637 1.379527 1.383579 1.382617 1.320485 1.381703 1.317098 1.383168 1.325277 1.372026 1.378537 1.327294 1.139934 1.353307 1.222810 1.291415 [18] 1.379966 1.252776 1.356281 1.369575 1.291415 1.281141 1.302690 1.347586 1.242405 1.376986
有趣的是,当使用“==”测试相等性时,这些数字并不相同(一个除外):
ldens(x= res,mu=0,sig = s) == dnorm(res, 0, s,log=T)
[1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE [26] TRUE FALSE
检查更高精度的数字发现它们与另一个略有不同:
sprintf("%.54f",ldens(res,0,s))[1]
"1.383596381246589235303190434933640062808990478515625000"
sprintf("%.54f",dnorm(res, 0, s,log=T))[1]
"1.383596381246589013258585509902331978082656860351562500"
但这不可能是使用 dnorm 导致收敛而另一个不收敛的原因吗?
更改函数以捕获错误:
ldens <- function(x,mu,sig){v <- log((1/(sig*sqrt(2*pi)))*exp(-((x-mu)^2/(2*sig^2)))); if(is.infinite(sum(v))) browser(); v}
您将能够看到导致问题的参数值 - 指数部分为零,指数计算为 0
,因此对数 returns -Inf
.内部函数 dnorm
可能使用数学上等效的指数分布版本,它具有更好的浮点运算特性。