如何在 R 中从头开始创建随机森林(没有 randomforest 包)

How to create Random Forest from scratch in R (without the randomforest package)

这是我想通过 RandomForest 包使用随机森林的方式:

library (randomForest)
rf1 <- randomForest(CLA ~ ., dat, ntree=100, norm.votes=FALSE)
p1 <- predict(rf1, testing, type='response')
confMat_rf1 <- table(p1,testing_CLA$CLA)
accuracy_rf1 <- sum(diag(confMat_rf1))/sum(confMat_rf1)

我根本不想使用 RandomForest 包。给定一个数据集 (dat) 并使用 rpartrandomforest 包的默认值,我怎样才能得到相同的结果?例如,对于 100 棵决策树,我需要 运行 以下内容:

for(i in 1:100){
cart.models[[i]]<-rpart(CLA~ ., data = random_dataset[[i]],cp=-1)
} 

其中每个 random_dataset[[i]] 将随机选择默认的属性和行数。另外,rpart用于randomforest吗?

可以通过在训练集上使用 rpart 和 bootstrap 样本以及训练集的特征训练多棵树来模拟训练随机森林。 下面的代码片段训练了 10 棵树来对鸢尾花种类进行分类,returns 一个树列表,每棵树都具有袋外精度。

library(rpart)
library(Metrics)
library(doParallel)
library(foreach)
library(ggplot2)


random_forest <- function(train_data, train_formula, method="class", feature_per=0.7, cp=0.01, min_split=20, min_bucket=round(min_split/3), max_depth=30, ntrees = 10) {

  target_variable <- as.character(train_formula)[[2]]
  features <- setdiff(colnames(train_data), target_variable)
  n_features <- length(features)

  ncores <- detectCores(logical=FALSE)
  cl <- makeCluster(ncores)
  registerDoParallel(cl)

  rf_model <- foreach(
    icount(ntrees),
    .packages = c("rpart", "Metrics")
  ) %dopar% {
    bagged_features <- sample(features, n_features * feature_per, replace = FALSE)
    index_bag <- sample(nrow(train_data), replace=TRUE)
    in_train_bag <- train_data[index_bag,]
    out_train_bag <- train_data[-index_bag,]
    trControl <- rpart.control(minsplit = min_split, minbucket = min_bucket, cp = cp, maxdepth = max_depth)
    tree <- rpart(formula = train_formula, 
                  data = in_train_bag, 
                  control = trControl)

    oob_pred <- predict(tree, newdata = out_train_bag, type = "class")
    oob_acc <- accuracy(actual = out_train_bag[, target_variable], predicted = oob_pred)

    list(tree=tree, oob_perf=oob_acc)
  }

  stopCluster(cl)

  rf_model

}

train_formula <- as.formula("Species ~ .")
forest <- random_forest(train_data = iris, train_formula = train_formula)