如何计算R中的randomForest训练AUC
How to calculate randomForest training AUC in R
很抱歉再次发布这个问题,但我现在真的需要帮助。
我正在尝试计算 R 中随机森林模型训练集的 AUC,有两种计算方法,但给出不同的结果。以下是我的问题的可重现示例。如果有人能提供帮助,我将不胜感激!!!
library(randomForest)
library(pROC)
library(ROCR)
# prep training to binary outcome
train <- iris[iris$Species %in% c('virginica', 'versicolor'),]
train$Species <- droplevels(train$Species)
# build model
rfmodel <- randomForest(Species~., data=train, importance=TRUE, ntree=2)
#the first way to calculate training auc
rf_p_train <- predict(rfmodel, type="prob",newdata = train)[,2]
rf_pr_train <- prediction(rf_p_train, train$Species)
r_auc_train1 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train1 #0.9888
#the second way to calculate training auc
rf_p_train <- as.vector(rfmodel$votes[,2])
rf_pr_train <- prediction(rf_p_train, train$Species);
r_auc_train2 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train2 #0.9175
要获得两个预测函数的相同结果,您应该从第一个函数中排除 newdata 参数(在 predict 函数的包文档中有解释),
rf_p_train <- predict(rfmodel, type="prob")[,2]
rf_pr_train <- prediction(rf_p_train, train$Species)
r_auc_train1 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train1
returns,
[1] 0.8655172
第二个函数 returns OOB 投票如 randomForest 函数的包文档中所述,
rf_p_train <- as.vector(rfmodel$votes[,2])
rf_pr_train <- prediction(rf_p_train, train$Species);
r_auc_train2 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train2
returns(结果相同),
[1] 0.8655172
很抱歉再次发布这个问题,但我现在真的需要帮助。 我正在尝试计算 R 中随机森林模型训练集的 AUC,有两种计算方法,但给出不同的结果。以下是我的问题的可重现示例。如果有人能提供帮助,我将不胜感激!!!
library(randomForest)
library(pROC)
library(ROCR)
# prep training to binary outcome
train <- iris[iris$Species %in% c('virginica', 'versicolor'),]
train$Species <- droplevels(train$Species)
# build model
rfmodel <- randomForest(Species~., data=train, importance=TRUE, ntree=2)
#the first way to calculate training auc
rf_p_train <- predict(rfmodel, type="prob",newdata = train)[,2]
rf_pr_train <- prediction(rf_p_train, train$Species)
r_auc_train1 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train1 #0.9888
#the second way to calculate training auc
rf_p_train <- as.vector(rfmodel$votes[,2])
rf_pr_train <- prediction(rf_p_train, train$Species);
r_auc_train2 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train2 #0.9175
要获得两个预测函数的相同结果,您应该从第一个函数中排除 newdata 参数(在 predict 函数的包文档中有解释),
rf_p_train <- predict(rfmodel, type="prob")[,2]
rf_pr_train <- prediction(rf_p_train, train$Species)
r_auc_train1 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train1
returns,
[1] 0.8655172
第二个函数 returns OOB 投票如 randomForest 函数的包文档中所述,
rf_p_train <- as.vector(rfmodel$votes[,2])
rf_pr_train <- prediction(rf_p_train, train$Species);
r_auc_train2 <- performance(rf_pr_train, measure = "auc")@y.values[[1]]
r_auc_train2
returns(结果相同),
[1] 0.8655172