预测格式无效。它不能被强制到列表 r 中的错误
Format of predictions is invalid. It couldn't be coerced to a list Error in r
我正在使用 ranger
来拟合随机森林。作为评估指标,我使用 cvAUC
的 roc-auc-score。做出预测后,当我尝试评估 auc 分数时,出现错误:Format of predictions is invalid. It couldn't be coerced to a list
。我认为这是由于预测包含一个 Level
部分,它显示了预测的独特水平。但是,我无法摆脱那部分。下面是最小的可重现示例,它会引发错误:
library(caret)
install.packages("cvAUC")
library(cvAUC)
# Columns for training set
cat.column <- c("cat", "dog", "monkey", "shark", "seal")
num.column <- c(1,2,5,7,9)
class <- c(0,1,0,0,1)
train.set <- data.frame(num.column, cat.column, class)
# Columns for test set
cat.column <- c("cat", "elephant-shrew", "monkey", "monkey", "seal")
num.column <- c(1,11,5,6,8)
class <- c(1,0,1,0,1)
test.set <- data.frame(num.column, cat.column, class)
# Drop the target variable from the test set
target.test <- test.set["class"]
test.set <- test.set[,!names(test.set) %in% "class"]
# Fit random forest
rf = ranger(formula = as.factor(class) ~ . , data = train.set, verbose = FALSE)
# Get predictions
pred <- predict(rf, test.set)
predictions <- pred$predictions
# Get AUC score
auc <- AUC(as.factor(predictions), as.factor(unlist(target.test)), label.ordering = NULL)
cat(auc)
你得到这个错误是因为 AUC
需要一个数值向量而不是一个因子。然而,在这个例子中,在测试集中出现了一个新的水平列cat.column
(elephant-shrew
)。最好输入一个变量在训练和测试集中都可以采用的所有可能值。
library(caret)
library(cvAUC)
library(ranger)
# Columns for training set
cat.column <- c("cat", "dog", "monkey", "shark", "seal")
num.column <- c(1,2,5,7,9)
class <- factor(c(0,1,0,0,1),levels = c(0,1))
train.set <- data.frame(num.column, cat.column, class,stringsAsFactors = F)
# Columns for test set
cat.column <- c("cat", "elephant-shrew", "monkey", "monkey", "seal")
num.column <- c(1,11,5,6,8)
class <- factor(c(1,0,1,0,1),,levels = c(0,1))
test.set <- data.frame(num.column, cat.column, class,stringsAsFactors = F)
# Drop the target variable from the test set
target.test <- test.set["class"]
test.set <- test.set[,!names(test.set) %in% "class"]
# Fit random forest
rf = ranger(formula = class ~ . , data = train.set, verbose = FALSE)
# Get predictions
pred <- predict(rf, test.set)
predictions <- pred$predictions
# Get AUC score
auc <- AUC(as.numeric(predictions), target.test$class, label.ordering = NULL)
cat(auc)
如您所见,我稍微更改了数据准备步骤。首先,如果您的 class
列是分类任务的结果,最好尽快将其强制分解。其次,如果测试集不包含字符变量的所有值(例如在您的示例中,其中列 cat.column
包含训练集中未包含的 elephant-shrew
)则更好将该变量作为字符处理(在这种情况下,您可以使用 stringAsFactor=F
将字符变量保留为字符
我正在使用 ranger
来拟合随机森林。作为评估指标,我使用 cvAUC
的 roc-auc-score。做出预测后,当我尝试评估 auc 分数时,出现错误:Format of predictions is invalid. It couldn't be coerced to a list
。我认为这是由于预测包含一个 Level
部分,它显示了预测的独特水平。但是,我无法摆脱那部分。下面是最小的可重现示例,它会引发错误:
library(caret)
install.packages("cvAUC")
library(cvAUC)
# Columns for training set
cat.column <- c("cat", "dog", "monkey", "shark", "seal")
num.column <- c(1,2,5,7,9)
class <- c(0,1,0,0,1)
train.set <- data.frame(num.column, cat.column, class)
# Columns for test set
cat.column <- c("cat", "elephant-shrew", "monkey", "monkey", "seal")
num.column <- c(1,11,5,6,8)
class <- c(1,0,1,0,1)
test.set <- data.frame(num.column, cat.column, class)
# Drop the target variable from the test set
target.test <- test.set["class"]
test.set <- test.set[,!names(test.set) %in% "class"]
# Fit random forest
rf = ranger(formula = as.factor(class) ~ . , data = train.set, verbose = FALSE)
# Get predictions
pred <- predict(rf, test.set)
predictions <- pred$predictions
# Get AUC score
auc <- AUC(as.factor(predictions), as.factor(unlist(target.test)), label.ordering = NULL)
cat(auc)
你得到这个错误是因为 AUC
需要一个数值向量而不是一个因子。然而,在这个例子中,在测试集中出现了一个新的水平列cat.column
(elephant-shrew
)。最好输入一个变量在训练和测试集中都可以采用的所有可能值。
library(caret)
library(cvAUC)
library(ranger)
# Columns for training set
cat.column <- c("cat", "dog", "monkey", "shark", "seal")
num.column <- c(1,2,5,7,9)
class <- factor(c(0,1,0,0,1),levels = c(0,1))
train.set <- data.frame(num.column, cat.column, class,stringsAsFactors = F)
# Columns for test set
cat.column <- c("cat", "elephant-shrew", "monkey", "monkey", "seal")
num.column <- c(1,11,5,6,8)
class <- factor(c(1,0,1,0,1),,levels = c(0,1))
test.set <- data.frame(num.column, cat.column, class,stringsAsFactors = F)
# Drop the target variable from the test set
target.test <- test.set["class"]
test.set <- test.set[,!names(test.set) %in% "class"]
# Fit random forest
rf = ranger(formula = class ~ . , data = train.set, verbose = FALSE)
# Get predictions
pred <- predict(rf, test.set)
predictions <- pred$predictions
# Get AUC score
auc <- AUC(as.numeric(predictions), target.test$class, label.ordering = NULL)
cat(auc)
如您所见,我稍微更改了数据准备步骤。首先,如果您的 class
列是分类任务的结果,最好尽快将其强制分解。其次,如果测试集不包含字符变量的所有值(例如在您的示例中,其中列 cat.column
包含训练集中未包含的 elephant-shrew
)则更好将该变量作为字符处理(在这种情况下,您可以使用 stringAsFactor=F
将字符变量保留为字符