如何使用 Keras 神经网络分类器为 KFold 交叉验证中的每个折叠绘制 ROC_AUC 曲线
How to plot ROC_AUC curve for each folds in KFold Cross Validation using Keras Neural Network Classifier
我真的需要在使用 Keras ANN 的 5 折交叉验证中找到每个折的 ROC 图。我尝试了以下 link [https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html#sphx-glr-auto-examples-model-selection-plot-roc- 中的代码crossval-py][1] 当我使用此处所示的 svm 分类器时,它工作得非常好。但是当我想使用包装器来使用 Keras ANN 模型时,它会显示错误。几个月来我一直坚持这个。任何人都可以帮我吗?这是我的代码:
# Load libraries
import numpy as np
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold
X, y = load_breast_cancer(return_X_y=True)
# Create function returning a compiled network
def create_network():
# Start neural network
network = models.Sequential()
# Add fully connected layer with a ReLU activation function
network.add(layers.Dense(units=16, activation='relu', input_shape=(30,)))
# Add fully connected layer with a ReLU activation function
network.add(layers.Dense(units=16, activation='relu'))
# Add fully connected layer with a sigmoid activation function
network.add(layers.Dense(units=1, activation='sigmoid'))
# Compile neural network
network.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='rmsprop', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
# Return compiled network
return network
cv = StratifiedKFold(n_splits=5)
# Wrap Keras model so it can be used by scikit-learn
classifier = KerasClassifier(build_fn=create_network,
epochs=10,
batch_size=100,
verbose=2)
#Plotting the ROC curve
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
fig, ax = plt.subplots()
for i, (train, test) in enumerate(cv.split(X, y)):
classifier.fit(X[train], y[train])
viz = plot_roc_curve(classifier, X[test], y[test],
name='ROC fold {}'.format(i),
alpha=0.3, lw=1, ax=ax)
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(viz.roc_auc)
ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
ax.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
title="Receiver operating characteristic example")
ax.legend(loc="lower right")
plt.show()
它显示以下错误:
Epoch 1/10
5/5 - 0s - loss: 24.0817 - accuracy: 0.3714
Epoch 2/10
5/5 - 0s - loss: 2.7967 - accuracy: 0.5648
Epoch 3/10
5/5 - 0s - loss: 2.0594 - accuracy: 0.5363
Epoch 4/10
5/5 - 0s - loss: 2.4763 - accuracy: 0.5604
Epoch 5/10
5/5 - 0s - loss: 2.5489 - accuracy: 0.5121
Epoch 6/10
5/5 - 0s - loss: 2.0528 - accuracy: 0.6132
Epoch 7/10
5/5 - 0s - loss: 1.5593 - accuracy: 0.6088
Epoch 8/10
5/5 - 0s - loss: 2.0422 - accuracy: 0.5626
Epoch 9/10
5/5 - 0s - loss: 1.9191 - accuracy: 0.6242
Epoch 10/10
5/5 - 0s - loss: 1.9914 - accuracy: 0.5582
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-104-2d127e528fbe> in <module>()
8 viz = plot_roc_curve(classifier, X[test], y[test],
9 name='ROC fold {}'.format(i),
---> 10 alpha=0.3, lw=1, ax=ax)
11 interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
12 interp_tpr[0] = 0.0
/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_plot/roc_curve.py in plot_roc_curve(estimator, X, y, sample_weight, drop_intermediate, response_method, name, ax, **kwargs)
170 )
171 if not is_classifier(estimator):
--> 172 raise ValueError(classification_error)
173
174 prediction_method = _check_classifer_response_method(estimator,
ValueError: KerasClassifier should be a binary classifier
这是一个(可能)在这个包装器库中缺失的实现细节。
Sklearn 简单地检查估计器上是否存在名为 _estimator_type
的属性并设置为字符串值 classifier
。您可以通过在 github.
上查看 sklearn 的 source code 来了解这一点
def is_classifier(estimator):
"""Return True if the given estimator is (probably) a classifier.
Parameters
----------
estimator : object
Estimator object to test.
Returns
-------
out : bool
True if estimator is a classifier and False otherwise.
"""
return getattr(estimator, "_estimator_type", None) == "classifier"
您需要做的就是手动将此属性添加到您的分类器对象中。
classifier = KerasClassifier(build_fn=create_network,
epochs=10,
batch_size=100,
verbose=2)
classifier._estimator_type = "classifier"
我已经测试过它并且有效。
我真的需要在使用 Keras ANN 的 5 折交叉验证中找到每个折的 ROC 图。我尝试了以下 link [https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html#sphx-glr-auto-examples-model-selection-plot-roc- 中的代码crossval-py][1] 当我使用此处所示的 svm 分类器时,它工作得非常好。但是当我想使用包装器来使用 Keras ANN 模型时,它会显示错误。几个月来我一直坚持这个。任何人都可以帮我吗?这是我的代码:
# Load libraries
import numpy as np
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import StratifiedKFold
X, y = load_breast_cancer(return_X_y=True)
# Create function returning a compiled network
def create_network():
# Start neural network
network = models.Sequential()
# Add fully connected layer with a ReLU activation function
network.add(layers.Dense(units=16, activation='relu', input_shape=(30,)))
# Add fully connected layer with a ReLU activation function
network.add(layers.Dense(units=16, activation='relu'))
# Add fully connected layer with a sigmoid activation function
network.add(layers.Dense(units=1, activation='sigmoid'))
# Compile neural network
network.compile(loss='binary_crossentropy', # Cross-entropy
optimizer='rmsprop', # Root Mean Square Propagation
metrics=['accuracy']) # Accuracy performance metric
# Return compiled network
return network
cv = StratifiedKFold(n_splits=5)
# Wrap Keras model so it can be used by scikit-learn
classifier = KerasClassifier(build_fn=create_network,
epochs=10,
batch_size=100,
verbose=2)
#Plotting the ROC curve
tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
fig, ax = plt.subplots()
for i, (train, test) in enumerate(cv.split(X, y)):
classifier.fit(X[train], y[train])
viz = plot_roc_curve(classifier, X[test], y[test],
name='ROC fold {}'.format(i),
alpha=0.3, lw=1, ax=ax)
interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
interp_tpr[0] = 0.0
tprs.append(interp_tpr)
aucs.append(viz.roc_auc)
ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r',
label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
ax.plot(mean_fpr, mean_tpr, color='b',
label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc),
lw=2, alpha=.8)
std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2,
label=r'$\pm$ 1 std. dev.')
ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05],
title="Receiver operating characteristic example")
ax.legend(loc="lower right")
plt.show()
它显示以下错误:
Epoch 1/10
5/5 - 0s - loss: 24.0817 - accuracy: 0.3714
Epoch 2/10
5/5 - 0s - loss: 2.7967 - accuracy: 0.5648
Epoch 3/10
5/5 - 0s - loss: 2.0594 - accuracy: 0.5363
Epoch 4/10
5/5 - 0s - loss: 2.4763 - accuracy: 0.5604
Epoch 5/10
5/5 - 0s - loss: 2.5489 - accuracy: 0.5121
Epoch 6/10
5/5 - 0s - loss: 2.0528 - accuracy: 0.6132
Epoch 7/10
5/5 - 0s - loss: 1.5593 - accuracy: 0.6088
Epoch 8/10
5/5 - 0s - loss: 2.0422 - accuracy: 0.5626
Epoch 9/10
5/5 - 0s - loss: 1.9191 - accuracy: 0.6242
Epoch 10/10
5/5 - 0s - loss: 1.9914 - accuracy: 0.5582
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-104-2d127e528fbe> in <module>()
8 viz = plot_roc_curve(classifier, X[test], y[test],
9 name='ROC fold {}'.format(i),
---> 10 alpha=0.3, lw=1, ax=ax)
11 interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
12 interp_tpr[0] = 0.0
/usr/local/lib/python3.6/dist-packages/sklearn/metrics/_plot/roc_curve.py in plot_roc_curve(estimator, X, y, sample_weight, drop_intermediate, response_method, name, ax, **kwargs)
170 )
171 if not is_classifier(estimator):
--> 172 raise ValueError(classification_error)
173
174 prediction_method = _check_classifer_response_method(estimator,
ValueError: KerasClassifier should be a binary classifier
这是一个(可能)在这个包装器库中缺失的实现细节。
Sklearn 简单地检查估计器上是否存在名为 _estimator_type
的属性并设置为字符串值 classifier
。您可以通过在 github.
def is_classifier(estimator):
"""Return True if the given estimator is (probably) a classifier.
Parameters
----------
estimator : object
Estimator object to test.
Returns
-------
out : bool
True if estimator is a classifier and False otherwise.
"""
return getattr(estimator, "_estimator_type", None) == "classifier"
您需要做的就是手动将此属性添加到您的分类器对象中。
classifier = KerasClassifier(build_fn=create_network,
epochs=10,
batch_size=100,
verbose=2)
classifier._estimator_type = "classifier"
我已经测试过它并且有效。