计算 R 中 5 种不同算法的精度、召回率和准确度的函数

Function to calculate Precision, Recall and Accuracy for 5 different algorithms in R

我有 4 个一般形式的不同数据集:

df <- data.frame(var1 = c(319, 77, 222, 107, 167),
                  var2 = c(137, 290, 237, 52, 192),
                  class = c(1,1,0,1,0))

每个都包含 var1、var2 和一个 class 变量。我得到了以下指示:

编写一个 R 脚本,将数据 table 作为输入,return 是五种差异算法、决策树 (rpart) 的性能统计数据(精度、召回率和准确性),朴素贝叶斯(naiveBayes)、K 最近邻(knn)、支持向量机(svm)和人工神经网络(nnet)。脚本的 return 值将是每个算法的统计信息的 5 x 3 矩阵。对于 knn 使用 k=3,对于 svm,使用线性核,对于 nnet 使用 4 个隐藏节点。要计算统计数据,您将使用 10 折交叉验证。

本质上,我相信我必须编写一个可以传递数据帧的包罗万象的函数,而该函数的 return 是 5 种不同算法中每一种的精度、召回率和准确性上面的指示。有没有一种简洁的方法来执行此操作?任何帮助将不胜感激。

假设 class 变量是第 3 列并命名为 "class",它与我所有的示例数据集一起使用,这是我想出的:

algStats <- function(dataset){
  # Split data into test and train (80/20 train to test)
  trainindex  <- sample(1:nrow(dataset), 0.8 * nrow(dataset))
  TrainData  <- dataset[trainindex, ] # Train data length same for all datasets
  TestData <- dataset[-trainindex, ] # Test data length same for all datasets
  # Declare 10-fold CV (same for all datasets)
  train_control <- trainControl(method="cv", number=10)
  # Train a decision tree model
  DecTreeMod <- train(as.factor(class)~., data=TrainData, 
                       trControl=train_control, method="rpart")
  # Predict on test data using Decision Tree model
  DecTreepred <- predict(DecTreeMod, TestData[,1:2])
  # Create confusion matrix for Decision Tree classifier
  DecTreecf <- confusionMatrix(DecTreepred, as.factor(TestData[,3]), mode = "prec_recall", positive = "1")
  # Extract Precision, Recall and Accuracy from confusion matrix
  DecTreePrecision <- DecTreecf$byClass[5] # <-----Precision
  DecTreeRecall <- DecTreecf$byClass[6] # <-----Recall
  DecTreeAcc <- DecTreecf$overall[1] # <-----Accuracy

  # Create an empty matrix to hold performance measures of each algorithm
  rownames = c("Decision Tree", "Naive Bayes", "KNN", "SVM", "ANN")
  colnames = c("Precision", "Recall", "Accuracy")

  performance <- matrix(ncol = 3, nrow = 5,
     byrow = T, dimnames = list(rownames, colnames))

  # Append the metrics from the Decision Tree classifier into the matrix
  # performance <- rbind(performance, c(DecTreePrecision,DecTreeRecall,DecTreeAcc))
  performance[1,] <- c(DecTreePrecision,DecTreeRecall,DecTreeAcc)

  # Train a Naive Bayes model
  NBMod <- train(as.factor(class)~., data=TrainData, 
                      trControl=train_control, method="nb")
  # Predict on test data using Naive Bayes model
  NBpred <- predict(NBMod, TestData[,1:2])
  # Create confusion matrix for Naive Bayes classifier
  NBcf <- confusionMatrix(NBpred, as.factor(TestData[,3]), mode = "prec_recall", positive = "1")
  # Extract Precision, Recall and Accuracy from confusion matrix
  NBPrecision <- NBcf$byClass[5] # <-----Precision
  NBRecall <- NBcf$byClass[6] # <-----Recall
  NBAcc <- NBcf$overall[1] # <-----Accuracy

  # Append the metrics from the Naive Bayes classifier into the matrix
  performance[2,] <- c(NBPrecision,NBRecall,NBAcc)

  # Train a KNN model
  KNNMod <- train(as.factor(class)~., data=TrainData, tuneGrid = expand.grid(k = 3), 
                 trControl=train_control, method="knn", preProcess = c("center","scale"))
  # Predict on test data using KNN model
  KNNpred <- predict(KNNMod, TestData[,1:2])
  # Create confusion matrix for KNN classifier
  KNNcf <- confusionMatrix(KNNpred, as.factor(TestData[,3]), mode = "prec_recall", positive = "1")
  # Extract Precision, Recall and Accuracy from confusion matrix
  KNNPrecision <- KNNcf$byClass[5] # <-----Precision
  KNNRecall <- KNNcf$byClass[6] # <-----Recall
  KNNAcc <- KNNcf$overall[1] # <-----Accuracy

  # Append the metrics from the KNN classifier into the matrix
  performance[3,] <- c(KNNPrecision,KNNRecall,KNNAcc)

  # Train an SVM model
  SVMMod <- train(as.factor(class)~., data=TrainData, 
                  trControl=train_control, method="svmLinear", preProcess = c("center","scale"))
  # Predict on test data using the SVM model
  SVMpred <- predict(SVMMod, TestData[,1:2])
  # Create confusion matrix for SVM classifier
  SVMcf <- confusionMatrix(SVMpred, as.factor(TestData[,3]), mode = "prec_recall", positive = "1")
  # Extract Precision, Recall and Accuracy from confusion matrix
  SVMPrecision <- SVMcf$byClass[5] # <-----Precision
  SVMRecall <- SVMcf$byClass[6] # <-----Recall
  SVMAcc <- SVMcf$overall[1] # <-----Accuracy

  # Append the metrics from the SVM classifier into the matrix
  performance[4,] <- c(SVMPrecision,SVMRecall,SVMAcc)

  # Train an ANN model
  ANNMod <- train(as.factor(class)~., data=TrainData, tuneGrid = expand.grid(
            size = 4, decay = 0.1), linear.output = F, trControl=train_control, method="nnet",
            preProcess = c("center","scale"))
  # Predict on test data using the ANN model
  ANNpred <- predict(ANNMod, TestData[,1:2])
  # Create confusion matrix for ANN classifier
  ANNcf <- confusionMatrix(ANNpred, as.factor(TestData[,3]), mode = "prec_recall", positive = "1")
  # Extract Precision, Recall and Accuracy from confusion matrix
  ANNPrecision <- ANNcf$byClass[5] # <-----Precision
  ANNRecall <- ANNcf$byClass[6] # <-----Recall
  ANNAcc <- ANNcf$overall[1] # <-----Accuracy

  # Append the metrics from the ANN classifier into the matrix
  performance[5,] <- c(ANNPrecision,ANNRecall,ANNAcc)

  # Return the performance matrix
  return(performance)
}