应用函数从列表中 class 化树 class 概率的嵌套列表生成混淆矩阵

Applying a function to generate confusion matrices from nested lists of classification tree class probabilities within a list

对于对我的问题进行如此冗长而详细的解释,我提前表示歉意。我使用三个函数 Shuffle100 my_List 和 [=18] 在主列表中从 classification 树 class 概率(分组因子:G8 和 V4)生成了 10 个嵌套数据帧=](下)。很抱歉,我问了这个简单的问题,但我无法弄清楚。如果有人找到解决方案,非常感谢。

目标 1

(1) 我想将 caret package 中的函数 confusionMatrix() 插入到函数 shuffle100 中,为每个子集生成 10 个混淆矩阵

函数 shuffle100my_listFinal_lists

library(plyr)
library(caret)
library(e1071)
library(rpart)

set.seed(1235)

 shuffle100 <-lapply(seq(10), function(n){ #Select the production of 10 dataframes
 subset <- normalised_scores[sample(nrow(normalised_scores), 80),] #Shuffle rows
 subset_idx <- sample(1:nrow(subset), replace = FALSE)
 subset <- subset[subset_idx, ] #training subset
 subset1<-subset[-subset_idx, ] #test subset
 subset_resampled_idx <- createDataPartition(subset_idx, times = 1, p = 0.7, list = FALSE) #70 % training set    
 subset_resampled <- subset[subset_resampled_idx, ]
 ct_mod<-rpart(Matriline~., data=subset_resampled, method="class", control=rpart.control(cp=0.005)) #10 ct
 ct_pred<-predict(ct_mod, newdata=subset[, 2:13]) 
 ct_dataframe=as.data.frame(ct_pred)#create new data frame
 confusionMatrix(ct_dataframe, normalised_scores$Family)
 }

  Error in sort.list(y) : 'x' must be atomic for 'sort.list'
  Have you called 'sort' on a list?

 1: lapply(seq(10), function(n) {
subset <- normalised_scores[sample(nrow(normalised_scores
 2: FUN(X[[i]], ...)
 3: confusionMatrix(ct_dataframe, normalised_scores$Family)
 4: confusionMatrix.default(ct_dataframe, normalised_scores$Family)
 5: factor(data)
 6: sort.list(y)

 #Produce three columns: Predicted, Actual and Binary
 my_list <- lapply(shuffle100, function(df){#Create two new columns Predicted and Actual
                  if (nrow(df) > 0)
                cbind(df, Predicted = c(""), Actual = c(""), Binary = c(""))
         else
                 bind(df, Predicted = character(), Actual = c(""), Binary = c (""))
                 })

#Fill the empty columns with NA's
Final_lists <- lapply(my_list, function(x) mutate(x, Predicted = NA, Actual = NA, Binary = NA)) 

#Create a dataframe from the column normalised_scores$Family to fill the Actual column

Actual_scores<-Final_normalised3$Family
Final_scores<-as.data.frame(Actual_scores)

#Fill in the Predicted, Actual and Binary columns

 Predicted_Lists <- Final_lists %>%
 mutate(Predicted=ifelse(G8 > V4, G8, V4)) %>% # assuming if G8 > V4 then Predicted=G8
 mutate(Actual=Final_scores) %>% # your definition of Actual is not clear
 mutate(Binary=ifelse(Predicted==Actual, 1, 0))

#Error messages

Error in ifelse(G8 > V4, G8, V4) : object 'G8' not found

目标 2

编写一个函数或for循环来填充每个子集的PredictedActualBinary列,条件是V4或G8列的行中的概率可能大于或小于彼此。但是,我对函数和循环的正确语法感到困惑

A for loop 不起作用

  for(i in 1:length(Final_lists)){ #i loops through each dataframe in the list 
   for(j in 2:nrow(Final_lists[[i]])){ #j loops through each row of each dataframe in the list
   if(Final_lists[[i]][j, "G8"] > Final_lists[[i]][j, "V4"]) { #if the probability of G8 > V4 in each row of each dataframe in each list
      Final_lists[[i]][j, [j["Predicted" == "NA"]] ="G8" #G8 will be filled into the same row in the `Predicted' column
      }
    else {
   Final_lists[[i]][j, [Predicted == "NA"]] ="V4" #V4 will be filled into the same row in the `Predicted' column
    }
print(i)
    }
    }

填充列时每个子集都应具有此格式:

               G8        V4 Predicted Actual Binary
        0.1764706 0.8235294        V4     V4      1
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.1764706 0.8235294        V4     V4      1

填写 Predicted

如果 G8 > V4 的概率,则空 Predicted 行分配给 G8。但是,如果 V4 > G8,则空的“预测”行将分配给 V4。

填写 Actual

这些是 class 化树模型对每个子集的实际预测 class 概率预测,包含在 data_frame `normalised_scores

正在填写 Binary

如果 PredictedActual 行具有相同的结果(例如 G8 和 G8),则 Binary 行被分配值 1。但是,如果行PredictedActual 列不同(例如 G8 和 V4),则 Binary 行被分配值 0。

我使用此工作代码实现了这些目标,但是,我不确定如何将此代码应用于主列表中的子集。

单个子集的工作代码

      set.seed(1235)

    # Randomly permute the data before subsetting
      mydat_idx <- sample(1:nrow(Final_normalised_scores), replace = FALSE)
      mydat <- Final_normalised3[mydat_idx, ]

      mydat_resampled_idx <- createDataPartition(mydat_idx, times = 1, p = 0.7, list = FALSE)
      mydat_resampled <- mydat[mydat_resampled_idx, ] # Training portion of the data
      mydat_resampled1 <- mydat[-mydat_resampled_idx, ]

      #Classification tree

      ct_mod <- train(x = mydat_resampled[, 2:13], y = as.factor(mydat_resampled[, 1]), 
            method = "rpart", trControl = trainControl(method = "repeatedcv", number=10, repeats=100, classProbs = TRUE))

       #Model predictions
       ct_pred <- predict(ct_mod, newdata = mydat[ , 2:13], type = "prob")
       Final_Predicted<-as.data.frame(ct_pred)

       #Produce three empty columns: Predicted, Actual and Binary

       Final_Predicted$Predicted<-NA
       Final_Predicted$Actual<-NA
       Final_Predicted$Binary<-NA

       #Fill in the Predicted column

      for (i in 1:length(Final_Predicted$G8)){
        if(Final_Predicted$G8[i]>Final_Predicted$V4[i]) {
           Final_Predicted$Predicted[i]<-"G8"
           }
      else {
           Final_Predicted$Predicted[i]<-"V4"
           }
           print(i)
           }

        #Fill in the Actual column using the actual predictions from the dataframe normalised_scores

        Final_Predicted$Actual<-normalised_scores$Family

        #Fill in the Binary column

        for (i in 1:length(Final_Predicted$Binary)){
           if(Final_Predicted$Predicted[i]==Final_Predicted$Actual[i]) {
              Final_Predicted$Binary[i]<-1
              }
         else {
              Final_Predicted$Binary[i]<-0
              }
              print(i)
              }

来自主列表的子集

                  G8        V4 Predicted Actual Binary
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.7692308 0.2307692        NA     NA     NA
           0.1764706 0.8235294        NA     NA     NA

可重现的虚拟数据

您对问题的描述有点长,但可能的 dplyr 解决方案如下所示:

Final_Predicted$Actual <- ... # fill actual values
Final_Predicted <- Final_Predicted %>%
              mutate(Predicted=ifelse(G8 > V4, G8, V4)) %>% # assuming if G8==V4 then Predicted=V4
              mutate(Binary=ifelse(Predicted==Actual, 1, 0))

我实际上并没有 运行 这个解决方案,但按照这些思路,它应该是简短的。希望这有帮助。