按组计算准确度
Calculate accuracy by groups
我有一个如下所示的数据框:
df<- data.frame("iteration" = c(1,1,1,1,1,1),
"model" = c("RF","RF","RF","SVM", "SVM","SVM"),
"label" = c(0,0,1,0,0,1), "prediction" = c(0,1,1,0,1,1))
iteration model label prediction
1 1 RF 0 0
2 1 RF 0 1
3 1 RF 1 1
4 1 SVM 0 0
5 1 SVM 0 1
6 1 SVM 1 1
实际上,它有10 iterations
,更多的模型和每个模型的更多数据。
我想做的基本上是获得每个模型的准确性。
所以基本上我想将其应用于每个模型组(RF、SVM):
table(df$label,df$prediction)
0 1
0 2 2
1 0 2
他们对角线求和除以总数:
sum(diag(table(df$label,df$prediction)))/sum(table(df$label,df$prediction))
[1] 0.6666667
这种情况下我可以使用tapply
还是dplyr
派上用场?
我在这里很迷路。
尝试:
library(dplyr)
df %>%
group_by(iteration, model) %>%
summarise(accuracy = sum(label == prediction) / n())
给出:
#Source: local data frame [2 x 3]
#Groups: iteration [?]
#
# iteration model accuracy
# (dbl) (fctr) (dbl)
#1 1 RF 0.6666667
#2 1 SVM 0.6666667
思路是将次数label == prediction
returnsTRUE
相加除以分区大小n()
df2<-df %>% mutate(acc=ifelse(label==prediction,1,0)) %>%
group_by(iteration,model) %>%
summarise(accuracy=sum(acc)/n())
df2
iteration model accuracy
(dbl) (fctr) (dbl)
1 1 RF 0.6666667
2 1 SVM 0.6666667
使用data.table
library(data.table)
setDT(df)[, .(accuracy= mean(label==prediction)) , .(iteration, model)]
# iteration model accuracy
#1: 1 RF 0.6666667
#2: 1 SVM 0.6666667
或者可以用 base R
aggregate(cbind(accuracy = label == prediction)~iteration + model, df, mean)
# iteration model accuracy
#1 1 RF 0.6666667
#2 1 SVM 0.6666667
我有一个如下所示的数据框:
df<- data.frame("iteration" = c(1,1,1,1,1,1),
"model" = c("RF","RF","RF","SVM", "SVM","SVM"),
"label" = c(0,0,1,0,0,1), "prediction" = c(0,1,1,0,1,1))
iteration model label prediction
1 1 RF 0 0
2 1 RF 0 1
3 1 RF 1 1
4 1 SVM 0 0
5 1 SVM 0 1
6 1 SVM 1 1
实际上,它有10 iterations
,更多的模型和每个模型的更多数据。
我想做的基本上是获得每个模型的准确性。
所以基本上我想将其应用于每个模型组(RF、SVM):
table(df$label,df$prediction)
0 1
0 2 2
1 0 2
他们对角线求和除以总数:
sum(diag(table(df$label,df$prediction)))/sum(table(df$label,df$prediction))
[1] 0.6666667
这种情况下我可以使用tapply
还是dplyr
派上用场?
我在这里很迷路。
尝试:
library(dplyr)
df %>%
group_by(iteration, model) %>%
summarise(accuracy = sum(label == prediction) / n())
给出:
#Source: local data frame [2 x 3]
#Groups: iteration [?]
#
# iteration model accuracy
# (dbl) (fctr) (dbl)
#1 1 RF 0.6666667
#2 1 SVM 0.6666667
思路是将次数label == prediction
returnsTRUE
相加除以分区大小n()
df2<-df %>% mutate(acc=ifelse(label==prediction,1,0)) %>%
group_by(iteration,model) %>%
summarise(accuracy=sum(acc)/n())
df2
iteration model accuracy
(dbl) (fctr) (dbl)
1 1 RF 0.6666667
2 1 SVM 0.6666667
使用data.table
library(data.table)
setDT(df)[, .(accuracy= mean(label==prediction)) , .(iteration, model)]
# iteration model accuracy
#1: 1 RF 0.6666667
#2: 1 SVM 0.6666667
或者可以用 base R
aggregate(cbind(accuracy = label == prediction)~iteration + model, df, mean)
# iteration model accuracy
#1 1 RF 0.6666667
#2 1 SVM 0.6666667