从多个 confusionMatrix 中提取指标并填充数据框
Extract metrics and populate data frame from multiple confusionMatrix's
我有许多用于各种不同分类模型的 confusionMatrix 输出。我想从每个混淆矩阵中提取模型指标(即灵敏度 --> 平衡精度,或来自 cf_1$byClass 的所有指标)来填充数据框。我希望数据框包含所有这些指标,以及它来自哪个混淆矩阵的标签。谁能帮我弄清楚该怎么做?请参阅下面的可重现示例:
library(caret)
library(tidyverse)
df_1 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
df_2 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
cf_1 <- confusionMatrix(df_1$y, df_1$x)
cf_2 <- confusionMatrix(df_2$y, df_2$x)
# I would like a dataframe with each of these, as well as column for the associated confusion matrix name
cf_1$byClass
cf_2$byClass
除了所有相关指标外,我想要的输出如下所示:
x = c(0.2, 0.4)
y = c(0.5, 0.6)
z = c("cf_1", "cf_2"
)
output <- data.frame(Sensitivity = x, Specificity = y, Model = z)
更新
这是我想出的解决方案,它不是很漂亮,但很有效。话虽这么说,仍然愿意看看是否有人能想出一种更清洁、更有效的方法来做到这一点!
model_names <- c("cf_1", "cf_2")
model_metrics <- list()
for (i in 1:length(model_names)) {
for (j in model_names) {
model <- eval(as.name(j))
results <- model$byClass %>% round(digits = 3)
results <- c(model = j, results)
model_metrics[[j]] <- results
}
}
model_metrics <- do.call(rbind, model_metrics) %>%
data.frame %>%
`rownames<-`(NULL)
model_metrics
怎么样:
library(caret)
library(tidyverse)
df_1 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
df_2 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
cf_1 <- confusionMatrix(df_1$y, df_1$x, mode = "sens_spec")
cf_2 <- confusionMatrix(df_2$y, df_2$x, mode = "sens_spec")
bind_rows(stack(cf_1$byClass), stack(cf_2$byClass), .id = "id") %>%
filter(ind %in% c("Sensitivity", "Specificity")) %>%
spread(id, values) %>%
rename(metric = ind, mod1 = "1", mod2 = "2")
#> metric mod1 mod2
#> 1 Sensitivity 0.4285714 0.5
#> 2 Specificity 0.6153846 0.4
编辑
要获取数据框,只需堆叠 cf_1
列表:
data.frame(stack(cf_1$byClass))
两种方法(顺便说一下,我认为您的示例中有一个类型,df_1
使用了两次)
# Base R
output <- data.frame(rbind(cf_1$byClass, cf_2$byClass))
output$model = c('cf_1', 'cf_2')
# dplyr version
output <- bind_rows(cf_1$byClass, cf_2$byClass, .id = 'model')
我有许多用于各种不同分类模型的 confusionMatrix 输出。我想从每个混淆矩阵中提取模型指标(即灵敏度 --> 平衡精度,或来自 cf_1$byClass 的所有指标)来填充数据框。我希望数据框包含所有这些指标,以及它来自哪个混淆矩阵的标签。谁能帮我弄清楚该怎么做?请参阅下面的可重现示例:
library(caret)
library(tidyverse)
df_1 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
df_2 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
cf_1 <- confusionMatrix(df_1$y, df_1$x)
cf_2 <- confusionMatrix(df_2$y, df_2$x)
# I would like a dataframe with each of these, as well as column for the associated confusion matrix name
cf_1$byClass
cf_2$byClass
除了所有相关指标外,我想要的输出如下所示:
x = c(0.2, 0.4)
y = c(0.5, 0.6)
z = c("cf_1", "cf_2"
)
output <- data.frame(Sensitivity = x, Specificity = y, Model = z)
更新
这是我想出的解决方案,它不是很漂亮,但很有效。话虽这么说,仍然愿意看看是否有人能想出一种更清洁、更有效的方法来做到这一点!
model_names <- c("cf_1", "cf_2")
model_metrics <- list()
for (i in 1:length(model_names)) {
for (j in model_names) {
model <- eval(as.name(j))
results <- model$byClass %>% round(digits = 3)
results <- c(model = j, results)
model_metrics[[j]] <- results
}
}
model_metrics <- do.call(rbind, model_metrics) %>%
data.frame %>%
`rownames<-`(NULL)
model_metrics
怎么样:
library(caret)
library(tidyverse)
df_1 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
df_2 <- data.frame(x = sample(LETTERS[1:2], 20, replace = T),
y = sample(LETTERS[1:2], 20, replace = T))
cf_1 <- confusionMatrix(df_1$y, df_1$x, mode = "sens_spec")
cf_2 <- confusionMatrix(df_2$y, df_2$x, mode = "sens_spec")
bind_rows(stack(cf_1$byClass), stack(cf_2$byClass), .id = "id") %>%
filter(ind %in% c("Sensitivity", "Specificity")) %>%
spread(id, values) %>%
rename(metric = ind, mod1 = "1", mod2 = "2")
#> metric mod1 mod2
#> 1 Sensitivity 0.4285714 0.5
#> 2 Specificity 0.6153846 0.4
编辑
要获取数据框,只需堆叠 cf_1
列表:
data.frame(stack(cf_1$byClass))
两种方法(顺便说一下,我认为您的示例中有一个类型,df_1
使用了两次)
# Base R
output <- data.frame(rbind(cf_1$byClass, cf_2$byClass))
output$model = c('cf_1', 'cf_2')
# dplyr version
output <- bind_rows(cf_1$byClass, cf_2$byClass, .id = 'model')