至少指定一个标签必须在y_true内,目标向量为数值型
At least one label specified must be in y_true, target vector is numerical
我正在用这个 data
实现一个 SVM 项目
以下是我提取特征的方法:
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
df = pd.read_csv('loan_train.csv')
df['due_date'] = pd.to_datetime(df['due_date'])
df['effective_date'] = pd.to_datetime(df['effective_date'])
df['dayofweek'] = df['effective_date'].dt.dayofweek
df['weekend'] = df['dayofweek'].apply(lambda x: 1 if (x>3) else 0)
Feature = df[['Principal','terms','age','Gender','weekend']]
Feature = pd.concat([Feature,pd.get_dummies(df['education'])], axis=1)
Feature.drop(['Master or Above'], axis = 1,inplace=True)
X = Feature
y = df['loan_status'].replace(to_replace=['PAIDOFF','COLLECTION'], value=[0,1],inplace=False)
创建模型和预测:
clf = svm.SVC(kernel='rbf')
clf.fit(X_train_svm, y_train_svm)
yhat_svm = clf.predict(X_test_svm)
评估阶段:
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
np.set_printoptions(precision=2)
print (classification_report(y_test_svm, yhat_svm))
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['Benign(2)','Malignant(4)'],normalize= False, title='Confusion matrix')
这里是错误:
Traceback (most recent call last):
File "E:/python/classification_project/classification.py", line 229,in
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
File "C:\Program Files(x86)\Python38-32\lib\site-packages\sklearn\metrics_classification.py", line 277, in confusion_matrix
raise ValueError("At least one label specified must be in y_true")
ValueError: At least one label specified must be in y_true
我检查了这个 ,它和我的一样,我将 y
从 categorical
更改为 numerical
,但错误仍然存在!
y
中的值是 0
和 1
但在 confusion_matrix
中调用:
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
标签是 2
和 4
。
confusion_matrix
中的标签应等于 y
向量中的标记,即:
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[0,1])
在计算矩阵步骤中,我使用 signs labels=['PAIDOFF','COLLECTION']
定义了标签,而不是使用 labels=[2,4]
所以这是计算代码:
cnf_matrix = confusion_matrix(y_test, yhat, labels=['PAIDOFF','COLLECTION'])
np.set_printoptions(precision=2)
print (classification_report(y_test, yhat))
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['PAIDOFF','COLLECTION'],normalize= False, title='Confusion matrix')
我正在用这个 data
实现一个 SVM 项目以下是我提取特征的方法:
import itertools
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report, confusion_matrix
df = pd.read_csv('loan_train.csv')
df['due_date'] = pd.to_datetime(df['due_date'])
df['effective_date'] = pd.to_datetime(df['effective_date'])
df['dayofweek'] = df['effective_date'].dt.dayofweek
df['weekend'] = df['dayofweek'].apply(lambda x: 1 if (x>3) else 0)
Feature = df[['Principal','terms','age','Gender','weekend']]
Feature = pd.concat([Feature,pd.get_dummies(df['education'])], axis=1)
Feature.drop(['Master or Above'], axis = 1,inplace=True)
X = Feature
y = df['loan_status'].replace(to_replace=['PAIDOFF','COLLECTION'], value=[0,1],inplace=False)
创建模型和预测:
clf = svm.SVC(kernel='rbf')
clf.fit(X_train_svm, y_train_svm)
yhat_svm = clf.predict(X_test_svm)
评估阶段:
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.show()
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
np.set_printoptions(precision=2)
print (classification_report(y_test_svm, yhat_svm))
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['Benign(2)','Malignant(4)'],normalize= False, title='Confusion matrix')
这里是错误:
Traceback (most recent call last):
File "E:/python/classification_project/classification.py", line 229,in
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
File "C:\Program Files(x86)\Python38-32\lib\site-packages\sklearn\metrics_classification.py", line 277, in confusion_matrix
raise ValueError("At least one label specified must be in y_true")
ValueError: At least one label specified must be in y_true
我检查了这个 y
从 categorical
更改为 numerical
,但错误仍然存在!
y
中的值是 0
和 1
但在 confusion_matrix
中调用:
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[2,4])
标签是 2
和 4
。
confusion_matrix
中的标签应等于 y
向量中的标记,即:
cnf_matrix = confusion_matrix(y_test_svm, yhat_svm, labels=[0,1])
在计算矩阵步骤中,我使用 signs labels=['PAIDOFF','COLLECTION']
定义了标签,而不是使用 labels=[2,4]
所以这是计算代码:
cnf_matrix = confusion_matrix(y_test, yhat, labels=['PAIDOFF','COLLECTION'])
np.set_printoptions(precision=2)
print (classification_report(y_test, yhat))
# Plot non-normalized confusion matrix
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['PAIDOFF','COLLECTION'],normalize= False, title='Confusion matrix')