如何创建混淆矩阵来评估模型?
How do I create a confusion matrix to evaluate the model?
我正在关注 This Tutorial 的音乐流派分类器。现在我想使用混淆矩阵可视化对测试集所做的预测。我该如何着手创建它?我是否只是使用 model.predict()
命令进行预测并将其添加到混淆矩阵中?如果是这样,我的其他参数应该是什么?请解释,因为我不明白如何正确地可视化结果。下面是代码:
import json
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
# path to json file that stores MFCCs and genre labels for each processed segment
DATA_PATH = "data_10.json"
def load_data(data_path):
with open(data_path, "r") as fp:
data = json.load(fp)
# convert lists to numpy arrays
X = np.array(data["mfcc"])
y = np.array(data["labels"])
print("Data succesfully loaded!")
return X, y
def plot_history(history):
"""Plots accuracy/loss for training/validation set as a function of the epochs
:param history: Training history of model
:return:
"""
fig, axs = plt.subplots(2)
# create accuracy sublpot
axs[0].plot(history.history["accuracy"], label="train accuracy")
axs[0].plot(history.history["val_accuracy"], label="test accuracy")
axs[0].set_ylabel("Accuracy")
axs[0].legend(loc="lower right")
axs[0].set_title("Accuracy eval")
# create error sublpot
axs[1].plot(history.history["loss"], label="train error")
axs[1].plot(history.history["val_loss"], label="test error")
axs[1].set_ylabel("Error")
axs[1].set_xlabel("Epoch")
axs[1].legend(loc="upper right")
axs[1].set_title("Error eval")
plt.show()
if __name__ == "__main__":
# load data
X, y = load_data(DATA_PATH)
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# build network topology
model = keras.Sequential([
# input layer
keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),
# 1st dense layer
keras.layers.Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# 2nd dense layer
keras.layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# 3rd dense layer
keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# output layer
keras.layers.Dense(10, activation='softmax')
])
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
# train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=100)
plot_history(history)
鉴于模型已正确训练,您需要执行以下操作:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
conf_mat = confusion_matrix(y_test, y_pred)
首先,得到预测(概率向量因为你用的是Softmax
),然后用np.argmax
得到最有可能的class。一旦你有了它,只需将它传递给 confusion_matrix
.
我正在关注 This Tutorial 的音乐流派分类器。现在我想使用混淆矩阵可视化对测试集所做的预测。我该如何着手创建它?我是否只是使用 model.predict()
命令进行预测并将其添加到混淆矩阵中?如果是这样,我的其他参数应该是什么?请解释,因为我不明白如何正确地可视化结果。下面是代码:
import json
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
# path to json file that stores MFCCs and genre labels for each processed segment
DATA_PATH = "data_10.json"
def load_data(data_path):
with open(data_path, "r") as fp:
data = json.load(fp)
# convert lists to numpy arrays
X = np.array(data["mfcc"])
y = np.array(data["labels"])
print("Data succesfully loaded!")
return X, y
def plot_history(history):
"""Plots accuracy/loss for training/validation set as a function of the epochs
:param history: Training history of model
:return:
"""
fig, axs = plt.subplots(2)
# create accuracy sublpot
axs[0].plot(history.history["accuracy"], label="train accuracy")
axs[0].plot(history.history["val_accuracy"], label="test accuracy")
axs[0].set_ylabel("Accuracy")
axs[0].legend(loc="lower right")
axs[0].set_title("Accuracy eval")
# create error sublpot
axs[1].plot(history.history["loss"], label="train error")
axs[1].plot(history.history["val_loss"], label="test error")
axs[1].set_ylabel("Error")
axs[1].set_xlabel("Epoch")
axs[1].legend(loc="upper right")
axs[1].set_title("Error eval")
plt.show()
if __name__ == "__main__":
# load data
X, y = load_data(DATA_PATH)
# create train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# build network topology
model = keras.Sequential([
# input layer
keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),
# 1st dense layer
keras.layers.Dense(512, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# 2nd dense layer
keras.layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# 3rd dense layer
keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l2(0.001)),
# output layer
keras.layers.Dense(10, activation='softmax')
])
# compile model
optimiser = keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=optimiser,
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.summary()
# train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=32, epochs=100)
plot_history(history)
鉴于模型已正确训练,您需要执行以下操作:
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
conf_mat = confusion_matrix(y_test, y_pred)
首先,得到预测(概率向量因为你用的是Softmax
),然后用np.argmax
得到最有可能的class。一旦你有了它,只需将它传递给 confusion_matrix
.