R:在插入符号模型和 glm 模型上使用 rms:bootcov 计算 bootstrap 估计值
R: compute the bootstrap estimate using rms:bootcov on caret model and glm model
如何使用包 rms
中的函数 bootcov
计算回归系数的 bootstrap 估计值?我尝试了以下示例数据集,但出现错误:
library(mlbench)
data(PimaIndiansDiabetes)
library(caret)
trControl <- trainControl(method = "repeatedcv",
repeats = 3,
classProbs = TRUE,
number = 10,
savePredictions = TRUE,
summaryFunction = twoClassSummary)
caret_model <- train(diabetes~.,
data=PimaIndiansDiabetes,
method="glm",
trControl=trControl)
library(rms)
set.seed(1234)
reduced_model_bootcov <- bootcov(caret_model$finalModel, B=100)
错误是:
Error in bootcov(caret_model$finalModel, B = 100) : you did not
specify x=TRUE and y=TRUE in the fit
如果我使用函数 glm
来构建模型,我会这样做:
model <- glm(diabetes~.,
data=PimaIndiansDiabetes,
family=binomial,
x=TRUE, y=TRUE)
model_bootcov <- bootcov(model, B=100)
但是,我又遇到了一个不同的错误:
Error in bootcov(model, B = 100) : fitter not valid
原来rms里面有个拟合函数叫glm,是glm的wrapper,不过如果有兴趣使用bootcov也可以用。所以要让 bootcov 工作:
library(mlbench)
library(rms)
data(PimaIndiansDiabetes)
model <- rms::Glm(diabetes~.,
data=PimaIndiansDiabetes,
family=binomial,
x=TRUE, y=TRUE)
model_bootcov <- bootcov(model, B=1000)
要使用引导:
library(boot)
glm.fun <- function(dat, inds){
fit <- glm(diabetes~.,family=binomial,data=dat[inds,])
coef(fit)
}
model_boot <- boot(PimaIndiansDiabetes, glm.fun, R = 1000)
我们可以比较两个不同的模型bootstrap,当然种子是不同的,很可能你需要先设置相似的种子:
library(tidyr)
library(dplyr)
library(ggplot2)
melt_matrix = function(mat,NAMES,X){
colnames(mat) = NAMES
data.frame(mat) %>%
tibble::rownames_to_column("B") %>%
pivot_longer(-B) %>%
mutate(type=X)
}
VAR = names(coef(model))
plotdf = rbind(
melt_matrix(model_boot$t,VAR,"boot"),
melt_matrix(model_bootcov$boot.Coef,VAR,"bootcov")
)
ggplot(plotdf,aes(x=type,y=value))+ geom_violin() + facet_wrap(~name,scale="free_y")
如何使用包 rms
中的函数 bootcov
计算回归系数的 bootstrap 估计值?我尝试了以下示例数据集,但出现错误:
library(mlbench)
data(PimaIndiansDiabetes)
library(caret)
trControl <- trainControl(method = "repeatedcv",
repeats = 3,
classProbs = TRUE,
number = 10,
savePredictions = TRUE,
summaryFunction = twoClassSummary)
caret_model <- train(diabetes~.,
data=PimaIndiansDiabetes,
method="glm",
trControl=trControl)
library(rms)
set.seed(1234)
reduced_model_bootcov <- bootcov(caret_model$finalModel, B=100)
错误是:
Error in bootcov(caret_model$finalModel, B = 100) : you did not specify x=TRUE and y=TRUE in the fit
如果我使用函数 glm
来构建模型,我会这样做:
model <- glm(diabetes~.,
data=PimaIndiansDiabetes,
family=binomial,
x=TRUE, y=TRUE)
model_bootcov <- bootcov(model, B=100)
但是,我又遇到了一个不同的错误:
Error in bootcov(model, B = 100) : fitter not valid
原来rms里面有个拟合函数叫glm,是glm的wrapper,不过如果有兴趣使用bootcov也可以用。所以要让 bootcov 工作:
library(mlbench)
library(rms)
data(PimaIndiansDiabetes)
model <- rms::Glm(diabetes~.,
data=PimaIndiansDiabetes,
family=binomial,
x=TRUE, y=TRUE)
model_bootcov <- bootcov(model, B=1000)
要使用引导:
library(boot)
glm.fun <- function(dat, inds){
fit <- glm(diabetes~.,family=binomial,data=dat[inds,])
coef(fit)
}
model_boot <- boot(PimaIndiansDiabetes, glm.fun, R = 1000)
我们可以比较两个不同的模型bootstrap,当然种子是不同的,很可能你需要先设置相似的种子:
library(tidyr)
library(dplyr)
library(ggplot2)
melt_matrix = function(mat,NAMES,X){
colnames(mat) = NAMES
data.frame(mat) %>%
tibble::rownames_to_column("B") %>%
pivot_longer(-B) %>%
mutate(type=X)
}
VAR = names(coef(model))
plotdf = rbind(
melt_matrix(model_boot$t,VAR,"boot"),
melt_matrix(model_bootcov$boot.Coef,VAR,"bootcov")
)
ggplot(plotdf,aes(x=type,y=value))+ geom_violin() + facet_wrap(~name,scale="free_y")