sklearn confusion_matrix 在错误的位置显示错误的尺寸/刻度线

sklearn confusion_matrix displaying with wrong dimensions / tick marks at wrong spots

我正在尝试显示一个混淆矩阵,但我终其一生都无法弄清楚为什么它拒绝以适当的方式显示。这是我的代码:

import numpy as np
import itertools
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.winter):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=30)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, fontsize=20)
    plt.yticks(tick_marks, classes, fontsize=20)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.

    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", 
                 color="white" if cm[i, j] < thresh else "black", fontsize=40)

    plt.tight_layout()
    plt.ylabel('True label', fontsize=30)
    plt.xlabel('Predicted label', fontsize=30)

    return plt

cm = confusion_matrix(y_test, y_predicted_counts)
fig = plt.figure(figsize=(10, 10))
plot = plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')
plt.show()
print(cm)

这是显示的内容:

如有任何帮助,我们将不胜感激。提前致谢。

用于调用 imshow you need to specify origin='lower' (the default is 'upper'; they probably changed this at some time and the scikit-learn docs didn't update their example)。所以以下应该可以解决问题:

plt.imshow(cm, interpolation='nearest', cmap=cmap, origin='lower')
#                                                    ^
#                                                    |
# added origin='lower'  ------------------------------

使用 Matplotlib

如果您想保留您的 matplotlib 实现,只需在 plot_confusion_matrix 函数的末尾添加 plt.ylim(-0.5,2.5)

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.winter):
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title, fontsize=30)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, fontsize=20)
    plt.yticks(tick_marks, classes, fontsize=20)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.

    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt), horizontalalignment="center", 
                 color="white" if cm[i, j] < thresh else "black", fontsize=40)

    plt.tight_layout()
    plt.ylabel('True label', fontsize=30)
    plt.xlabel('Predicted label', fontsize=30)
    plt.ylim(-0.5, 2.5)  # <-- SOLUTION 

    return plt

使用 Seaborn

您可以尝试使用 seaborn 包绘制热图:

from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt   

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.winter):
  cm_df = pd.DataFrame(cm, columns=classes, index = classes)
  cm_df.index.name = 'Actual'
  cm_df.columns.name = 'Predicted'
  plt.figure(figsize = (10,7))
  sn.set(font_scale=1.4)#for label size
  ax =sn.heatmap(cm_df, cmap=cmap, annot=True,annot_kws={"size": 16},fmt="d")# font size
  plt.title(title)
  bottom, top = ax.get_ylim()
  ax.set_ylim(bottom + 0.5, top - 0.5)
  plt.show()

plot_confusion_matrix(cm, classes=['Unsure','No','Yes'], normalize=False, title='Confusion matrix')

Confusion Matrix Result

希望这对你有用!

很可能您使用的是 matplotlib 3.1.1,它破坏了刻度线的默认行为。升级到 3.1.2 或降级到 3.1.0 以解决问题。