R:在混合模型上使用 bootstrap 预测
R: using bootstrap prediction on mixed model
library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
theta.fit = function(x, y){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = x,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, x){
(fit$fitted)[,1]
}
sq.err <- function(y,yhat) { (y-yhat)^2}
results <- bootpred(x,y,20,theta.fit,theta.predict,
err.meas=sq.err)
我正在使用 bootpred
函数来获取预测误差的估计值。但是,当我 运行 最后一行时,出现以下错误:
Error in model.frame.default(formula = ~height + age, data = c(" 4.51", :
'data' must be a data.frame, not a matrix or an array
然后我尝试了 x = data.frame(x)
但这并没有解决我的问题。
出现问题是因为使用的示例数据集是groupedData:
library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
class(x)
[1] "nfnGroupedData" "nfGroupedData" "groupedData" "data.frame"
而在bootpred
函数内部,又转为矩阵。来回转换可能会很麻烦,尤其是当您需要线性混合模型的因子列时。
你可以做什么写 theta.fit 和 theta.predict 来接收 data.frame:
theta.fit = function(df){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = df,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, df){
predict(fit,df)
}
sq.err <- function(y,yhat) { (y-yhat)^2}
现在更改 bootpred 函数并使用 df,我想您可以再次提供 y,或指定要在 data.frame:
中使用的列
bootpred_df = function (df,y,nboot, theta.fit, theta.predict, err.meas, ...)
{
call <- match.call()
n <- length(y)
saveii <- NULL
fit0 <- theta.fit(df, ...)
yhat0 <- theta.predict(fit0, df)
app.err <- mean(err.meas(y, yhat0))
err1 <- matrix(0, nrow = nboot, ncol = n)
err2 <- rep(0, nboot)
for (b in 1:nboot) {
ii <- sample(1:n, replace = TRUE)
saveii <- cbind(saveii, ii)
fit <- theta.fit(df[ii, ], ...)
yhat1 <- theta.predict(fit, df[ii, ])
yhat2 <- theta.predict(fit, df)
err1[b, ] <- err.meas(y, yhat2)
err2[b] <- mean(err.meas(y[ii], yhat1))
}
optim <- mean(apply(err1, 1, mean,na.rm=TRUE) - err2)
junk <- function(x, i) {
sum(x == i)
}
e0 <- 0
for (i in 1:n) {
o <- apply(saveii, 2, junk, i)
if (sum(o == 0) == 0)
cat("increase nboot for computation of the .632 estimator",
fill = TRUE)
e0 <- e0 + (1/n) * sum(err1[o == 0, i])/sum(o == 0)
}
err.632 <- 0.368 * app.err + 0.632 * e0
return(list(app.err, optim, err.632, call = call))
}
我们现在可以运行..但是由于这些数据的性质,在某些情况下,组(种子)的分布不均匀,使得一些变量难以估计..大多数可能通过改进代码可以更好地解决此问题。无论如何,如果你幸运的话,它会像下面这样工作:
bootpred_df(Loblolly,Loblolly$height,20,theta.fit,theta.predict,err.meas=sq.err)
[[1]]
[1] 0.4337236
[[2]]
[1] 0.1777644
[[3]]
[1] 0.6532417
$call
bootpred_df(df = Loblolly, y = Loblolly$height, nboot = 20, theta.fit = theta.fit,
theta.predict = theta.predict, err.meas = sq.err)
library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
theta.fit = function(x, y){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = x,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, x){
(fit$fitted)[,1]
}
sq.err <- function(y,yhat) { (y-yhat)^2}
results <- bootpred(x,y,20,theta.fit,theta.predict,
err.meas=sq.err)
我正在使用 bootpred
函数来获取预测误差的估计值。但是,当我 运行 最后一行时,出现以下错误:
Error in model.frame.default(formula = ~height + age, data = c(" 4.51", :
'data' must be a data.frame, not a matrix or an array
然后我尝试了 x = data.frame(x)
但这并没有解决我的问题。
出现问题是因为使用的示例数据集是groupedData:
library(nlme)
library(bootstrap)
y = Loblolly$height
x = Loblolly
class(x)
[1] "nfnGroupedData" "nfGroupedData" "groupedData" "data.frame"
而在bootpred
函数内部,又转为矩阵。来回转换可能会很麻烦,尤其是当您需要线性混合模型的因子列时。
你可以做什么写 theta.fit 和 theta.predict 来接收 data.frame:
theta.fit = function(df){
nlme(height ~ SSasymp(age, Asym, R0, lrc),
data = df,
fixed = Asym + R0 + lrc ~ 1,
random = Asym ~ 1,
start = c(Asym = 103, R0 = -8.5, lrc = -3.3))
}
theta.predict = function(fit, df){
predict(fit,df)
}
sq.err <- function(y,yhat) { (y-yhat)^2}
现在更改 bootpred 函数并使用 df,我想您可以再次提供 y,或指定要在 data.frame:
中使用的列bootpred_df = function (df,y,nboot, theta.fit, theta.predict, err.meas, ...)
{
call <- match.call()
n <- length(y)
saveii <- NULL
fit0 <- theta.fit(df, ...)
yhat0 <- theta.predict(fit0, df)
app.err <- mean(err.meas(y, yhat0))
err1 <- matrix(0, nrow = nboot, ncol = n)
err2 <- rep(0, nboot)
for (b in 1:nboot) {
ii <- sample(1:n, replace = TRUE)
saveii <- cbind(saveii, ii)
fit <- theta.fit(df[ii, ], ...)
yhat1 <- theta.predict(fit, df[ii, ])
yhat2 <- theta.predict(fit, df)
err1[b, ] <- err.meas(y, yhat2)
err2[b] <- mean(err.meas(y[ii], yhat1))
}
optim <- mean(apply(err1, 1, mean,na.rm=TRUE) - err2)
junk <- function(x, i) {
sum(x == i)
}
e0 <- 0
for (i in 1:n) {
o <- apply(saveii, 2, junk, i)
if (sum(o == 0) == 0)
cat("increase nboot for computation of the .632 estimator",
fill = TRUE)
e0 <- e0 + (1/n) * sum(err1[o == 0, i])/sum(o == 0)
}
err.632 <- 0.368 * app.err + 0.632 * e0
return(list(app.err, optim, err.632, call = call))
}
我们现在可以运行..但是由于这些数据的性质,在某些情况下,组(种子)的分布不均匀,使得一些变量难以估计..大多数可能通过改进代码可以更好地解决此问题。无论如何,如果你幸运的话,它会像下面这样工作:
bootpred_df(Loblolly,Loblolly$height,20,theta.fit,theta.predict,err.meas=sq.err)
[[1]]
[1] 0.4337236
[[2]]
[1] 0.1777644
[[3]]
[1] 0.6532417
$call
bootpred_df(df = Loblolly, y = Loblolly$height, nboot = 20, theta.fit = theta.fit,
theta.predict = theta.predict, err.meas = sq.err)