尝试使用 R 中 k 的最佳值从交叉验证结果创建混淆矩阵
Trying to create confusion matrix from cross-validated results using the best value of k in R
我已经使用 R 中的鸢尾花数据集编写了下面的 knn 交叉验证方法。我如何从中获得最佳的 k 值并基于此创建一个混淆矩阵?任何帮助都会很棒。
library(class)
data("iris")
kfolds = 5
iris$folds = cut(seq(1,nrow(iris)),breaks=kfolds,labels=FALSE)
iris$folds
# Sets the columns to use as predicators
pred = c("Petal.Width", "Petal.Length")
accuracies = c()
ks = c(1,3,5,7,9,11,13,15)
for (k in ks) {
k.accuracies = c()
for(i in 1:kfolds) {
# Builds the training set and test set for this fold.
train.items.this.fold = iris[iris$folds != i,]
validation.items.this.fold = iris[iris$folds == i,]
# Fit knn model on this fold.
predictions = knn(train.items.this.fold[,pred],
validation.items.this.fold[,pred],
train.items.this.fold$Species, k=k)
predictions.table <- table(predictions, validation.items.this.fold$Species)
# Work out the amount of correct and incorrect predictions.
correct.list <- predictions == validation.items.this.fold$Species
nr.correct = nrow(validation.items.this.fold[correct.list,])
# Get accuracy rate of cv.
accuracy.rate = nr.correct/nrow(validation.items.this.fold)
# Adds the accuracy list.
k.accuracies <- cbind(k.accuracies, accuracy.rate)
}
# Adds the mean accuracy to the total accuracy list.
accuracies <- cbind(accuracies, mean(k.accuracies))
}
# Accuracy for each value of k: visualisation.
accuracies
更新:
predictions.table <- table(predictions == ks[which.max(accuracies)], validation.items.this.fold$Species)
你的代码有一些问题,这个运行:
library(class)
data("iris")
kfolds = 5
iris$folds = cut(seq(1,nrow(iris)),breaks=kfolds,labels=FALSE)
iris$folds
# Sets the columns to use as predicators
pred = c("Petal.Width", "Petal.Length")
accuracies = c()
ks = c(1,3,5,7,9,11,13,15)
k.accuracies = c()
predictions.list = list()
for (k in ks) {
k.accuracies = c()
for(i in 1:kfolds) {
# Builds the training set and test set for this fold.
train.items.this.fold = iris[iris$folds != i,]
validation.items.this.fold = iris[iris$folds == i,]
# Fit knn model on this fold.
predictions = knn(train.items.this.fold[,pred],
validation.items.this.fold[,pred],
train.items.this.fold$Species, k=k)
predictions.list[[i]] = predictions
predictions.table <- table(predictions, validation.items.this.fold$Species)
# Work out the amount of correct and incorrect predictions.
correct.list <- predictions == validation.items.this.fold$Species
nr.correct = nrow(validation.items.this.fold[correct.list,])
# Get accuracy rate of cv.
accuracy.rate = nr.correct/nrow(validation.items.this.fold)
# Adds the accuracy list.
k.accuracies <- cbind(k.accuracies, accuracy.rate)
}
# Adds the mean accuracy to the total accuracy list.
accuracies <- cbind(accuracies, mean(k.accuracies))
}
accuracies
predictions.table <- table(predictions.list[[which.max(accuracies)]], validation.items.this.fold$Species)
当你调用predictions.table <- table(predictions, validation.items.this.fold$Species)
时,这是混淆矩阵,你是用准确率作为评估指标,所以最好的K就是最好的准确率。你可以这样得到最好的K值:
ks[which.max(accuracies)]
更新
创建一个列表来存储每个预测,然后使用最佳准确度创建混淆矩阵。
我已经使用 R 中的鸢尾花数据集编写了下面的 knn 交叉验证方法。我如何从中获得最佳的 k 值并基于此创建一个混淆矩阵?任何帮助都会很棒。
library(class)
data("iris")
kfolds = 5
iris$folds = cut(seq(1,nrow(iris)),breaks=kfolds,labels=FALSE)
iris$folds
# Sets the columns to use as predicators
pred = c("Petal.Width", "Petal.Length")
accuracies = c()
ks = c(1,3,5,7,9,11,13,15)
for (k in ks) {
k.accuracies = c()
for(i in 1:kfolds) {
# Builds the training set and test set for this fold.
train.items.this.fold = iris[iris$folds != i,]
validation.items.this.fold = iris[iris$folds == i,]
# Fit knn model on this fold.
predictions = knn(train.items.this.fold[,pred],
validation.items.this.fold[,pred],
train.items.this.fold$Species, k=k)
predictions.table <- table(predictions, validation.items.this.fold$Species)
# Work out the amount of correct and incorrect predictions.
correct.list <- predictions == validation.items.this.fold$Species
nr.correct = nrow(validation.items.this.fold[correct.list,])
# Get accuracy rate of cv.
accuracy.rate = nr.correct/nrow(validation.items.this.fold)
# Adds the accuracy list.
k.accuracies <- cbind(k.accuracies, accuracy.rate)
}
# Adds the mean accuracy to the total accuracy list.
accuracies <- cbind(accuracies, mean(k.accuracies))
}
# Accuracy for each value of k: visualisation.
accuracies
更新:
predictions.table <- table(predictions == ks[which.max(accuracies)], validation.items.this.fold$Species)
你的代码有一些问题,这个运行:
library(class)
data("iris")
kfolds = 5
iris$folds = cut(seq(1,nrow(iris)),breaks=kfolds,labels=FALSE)
iris$folds
# Sets the columns to use as predicators
pred = c("Petal.Width", "Petal.Length")
accuracies = c()
ks = c(1,3,5,7,9,11,13,15)
k.accuracies = c()
predictions.list = list()
for (k in ks) {
k.accuracies = c()
for(i in 1:kfolds) {
# Builds the training set and test set for this fold.
train.items.this.fold = iris[iris$folds != i,]
validation.items.this.fold = iris[iris$folds == i,]
# Fit knn model on this fold.
predictions = knn(train.items.this.fold[,pred],
validation.items.this.fold[,pred],
train.items.this.fold$Species, k=k)
predictions.list[[i]] = predictions
predictions.table <- table(predictions, validation.items.this.fold$Species)
# Work out the amount of correct and incorrect predictions.
correct.list <- predictions == validation.items.this.fold$Species
nr.correct = nrow(validation.items.this.fold[correct.list,])
# Get accuracy rate of cv.
accuracy.rate = nr.correct/nrow(validation.items.this.fold)
# Adds the accuracy list.
k.accuracies <- cbind(k.accuracies, accuracy.rate)
}
# Adds the mean accuracy to the total accuracy list.
accuracies <- cbind(accuracies, mean(k.accuracies))
}
accuracies
predictions.table <- table(predictions.list[[which.max(accuracies)]], validation.items.this.fold$Species)
当你调用predictions.table <- table(predictions, validation.items.this.fold$Species)
时,这是混淆矩阵,你是用准确率作为评估指标,所以最好的K就是最好的准确率。你可以这样得到最好的K值:
ks[which.max(accuracies)]
更新
创建一个列表来存储每个预测,然后使用最佳准确度创建混淆矩阵。