Applying a function to generate confusion matrices from nested lists of classification tree class probabilities within a list

函数 shuffle100my_listFinal_lists



 shuffle100 <-lapply(seq(10), function(n){ #Select the production of 10 dataframes
 subset <- normalised_scores[sample(nrow(normalised_scores), 80),] #Shuffle rows
 subset_idx <- sample(1:nrow(subset), replace = FALSE)
 subset <- subset[subset_idx, ] #training subset
 subset1<-subset[-subset_idx, ] #test subset
 subset_resampled_idx <- createDataPartition(subset_idx, times = 1, p = 0.7, list = FALSE) #70 % training set    
 subset_resampled <- subset[subset_resampled_idx, ]
 ct_mod<-rpart(Matriline~., data=subset_resampled, method="class", control=rpart.control(cp=0.005)) #10 ct
 ct_pred<-predict(ct_mod, newdata=subset[, 2:13]) 
 ct_dataframe=as.data.frame(ct_pred)#create new data frame
 confusionMatrix(ct_dataframe, normalised_scores$Family)

  Error in sort.list(y) : 'x' must be atomic for 'sort.list'
  Have you called 'sort' on a list?

 1: lapply(seq(10), function(n) {
subset <- normalised_scores[sample(nrow(normalised_scores
 2: FUN(X[[i]], ...)
 3: confusionMatrix(ct_dataframe, normalised_scores$Family)
 4: confusionMatrix.default(ct_dataframe, normalised_scores$Family)
 5: factor(data)
 6: sort.list(y)

 #Produce three columns: Predicted, Actual and Binary
 my_list <- lapply(shuffle100, function(df){#Create two new columns Predicted and Actual
                  if (nrow(df) > 0)
                cbind(df, Predicted = c(""), Actual = c(""), Binary = c(""))
                 bind(df, Predicted = character(), Actual = c(""), Binary = c (""))

#Fill the empty columns with NA's
Final_lists <- lapply(my_list, function(x) mutate(x, Predicted = NA, Actual = NA, Binary = NA)) 

#Create a dataframe from the column normalised_scores$Family to fill the Actual column


#Fill in the Predicted, Actual and Binary columns

 Predicted_Lists <- Final_lists %>%
 mutate(Predicted=ifelse(G8 > V4, G8, V4)) %>% # assuming if G8 > V4 then Predicted=G8
 mutate(Actual=Final_scores) %>% # your definition of Actual is not clear
 mutate(Binary=ifelse(Predicted==Actual, 1, 0))

#Error messages

Error in ifelse(G8 > V4, G8, V4) : object 'G8' not found

  for(i in 1:length(Final_lists)){ #i loops through each dataframe in the list 
   for(j in 2:nrow(Final_lists[[i]])){ #j loops through each row of each dataframe in the list
   if(Final_lists[[i]][j, "G8"] > Final_lists[[i]][j, "V4"]) { #if the probability of G8 > V4 in each row of each dataframe in each list
      Final_lists[[i]][j, [j["Predicted" == "NA"]] ="G8" #G8 will be filled into the same row in the `Predicted' column
    else {
   Final_lists[[i]][j, [Predicted == "NA"]] ="V4" #V4 will be filled into the same row in the `Predicted' column


               G8        V4 Predicted Actual Binary
        0.1764706 0.8235294        V4     V4      1
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.7692308 0.2307692        G8     V4      0
        0.1764706 0.8235294        V4     V4      1

    # Randomly permute the data before subsetting
      mydat_idx <- sample(1:nrow(Final_normalised_scores), replace = FALSE)
      mydat <- Final_normalised3[mydat_idx, ]

      mydat_resampled_idx <- createDataPartition(mydat_idx, times = 1, p = 0.7, list = FALSE)
      mydat_resampled <- mydat[mydat_resampled_idx, ] # Training portion of the data
      mydat_resampled1 <- mydat[-mydat_resampled_idx, ]

      #Classification tree

      ct_mod <- train(x = mydat_resampled[, 2:13], y = as.factor(mydat_resampled[, 1]), 
            method = "rpart", trControl = trainControl(method = "repeatedcv", number=10, repeats=100, classProbs = TRUE))

       #Model predictions
       ct_pred <- predict(ct_mod, newdata = mydat[ , 2:13], type = "prob")

       #Produce three empty columns: Predicted, Actual and Binary


       #Fill in the Predicted column

      for (i in 1:length(Final_Predicted$G8)){
        if(Final_Predicted$G8[i]>Final_Predicted$V4[i]) {
      else {

        #Fill in the Actual column using the actual predictions from the dataframe normalised_scores


        #Fill in the Binary column

        for (i in 1:length(Final_Predicted$Binary)){
           if(Final_Predicted$Predicted[i]==Final_Predicted$Actual[i]) {
         else {


                  G8        V4 Predicted Actual Binary
您对问题的描述有点长,但可能的 dplyr 解决方案如下所示:

Final_Predicted$Actual <- ... # fill actual values
Final_Predicted <- Final_Predicted %>%
              mutate(Predicted=ifelse(G8 > V4, G8, V4)) %>% # assuming if G8==V4 then Predicted=V4
              mutate(Binary=ifelse(Predicted==Actual, 1, 0))

我实际上并没有 运行 这个解决方案,但按照这些思路,它应该是简短的。希望这有帮助。