用 LIME 解释 CNN (Keras) 输出
Explaining CNN (Keras) outputs with LIME
我正在尝试用 LIME.
解释我在 Keras 中的卷积神经网络 bult 的输出
我的神经网络是一个多 class 文本 class 生成器,其中每个 class 都是独立的。因此,文本可以包含 class 1 和 2 或仅包含 1 等。对于文本中没有 class 的情况,第五个 "class" (None)。
然而,虽然我设法用 Keras 和 Lime 解释了二进制 class 化案例,但我无法得到具有独立 class 的多 class 案例。找到第一个帮助 here:
但是,我的代码不起作用,我从 Lime 收到内部错误,例如:"ValueError: Found input variables with inconsistent numbers of samples: [5000, 100000]"
from lime.lime_text import LimeTextExplainer, TextDomainMapper
explainer = LimeTextExplainer(class_names=encoder.classes_)
chosen_text = 2
def flatten_predict(i):
global model
# catch single string inputs and convert them to list
if i.__class__ != list:
i = [i]
print("## Caught and transformed single string.")
# list for predictions
predStorage = []
# loop through input list and predict
for textInput in i:
textInput = preprocess(textInput)
textInput = make_predictable(textInput)
pred = model.predict(textInput)
pred = np.append(pred, 1-pred, axis=1)
# control output of function
predStorage.extend(pred)
return np.asarray(predStorage)
def get_predict_proba_fn_of_class(label):
"""assuming wrapped_predict outputs an (n, d) array of prediction probabilities, where d is the number of labels"""
def rewrapped_predict(strings):
preds = flatten_predict(strings)[:, np.where(flatten_predict(strings)==label)].reshape(-1, 1)
ret = np.asarray(np.hstack([(1 - preds), preds]))
return ret
return rewrapped_predict
str = 'Ein sehr freundlicher Arzt.'
preds = flatten_predict(str)
labels_to_explain = preds#
print(labels_to_explain)
explanation_for_label = {}
for label in labels_to_explain:
wrapped = get_predict_proba_fn_of_class(label)
explanation_for_label[label] = explainer.explain_instance(str, wrapped)
explanation_for_label[label].show_in_notebook()
错误信息:
ValueError Traceback (most recent call last)
<ipython-input-26-8df61aaa23f4> in <module>()
53 for label in labels_to_explain:
54 wrapped = get_predict_proba_fn_of_class(label)
---> 55 explanation_for_label[label] = explainer.explain_instance(str, wrapped)
56 explanation_for_label[label].show_in_notebook()
57
/usr/local/lib/python3.6/dist-packages/lime/lime_text.py in explain_instance(self, text_instance, classifier_fn, labels, top_labels, num_features, num_samples, distance_metric, model_regressor)
405 data, yss, distances, label, num_features,
406 model_regressor=model_regressor,
--> 407 feature_selection=self.feature_selection)
408 return ret_exp
409
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in explain_instance_with_data(self, neighborhood_data, neighborhood_labels, distances, label, num_features, feature_selection, model_regressor)
155 weights,
156 num_features,
--> 157 feature_selection)
158
159 if model_regressor is None:
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in feature_selection(self, data, labels, weights, num_features, method)
104 n_method = 'highest_weights'
105 return self.feature_selection(data, labels, weights,
--> 106 num_features, n_method)
107
108 def explain_instance_with_data(self,
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in feature_selection(self, data, labels, weights, num_features, method)
78 clf = Ridge(alpha=0, fit_intercept=True,
79 random_state=self.random_state)
---> 80 clf.fit(data, labels, sample_weight=weights)
81 feature_weights = sorted(zip(range(data.shape[0]),
82 clf.coef_ * data[0]),
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/ridge.py in fit(self, X, y, sample_weight)
678 self : returns an instance of self.
679 """
--> 680 return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
681
682
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/ridge.py in fit(self, X, y, sample_weight)
489
490 X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
--> 491 multi_output=True, y_numeric=True)
492
493 if ((sample_weight is not None) and
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
764 y = y.astype(np.float64)
765
--> 766 check_consistent_length(X, y)
767
768 return X, y
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
233 if len(uniques) > 1:
234 raise ValueError("Found input variables with inconsistent numbers of"
--> 235 " samples: %r" % [int(l) for l in lengths])
236
237
ValueError: Found input variables with inconsistent numbers of samples: [5000, 100000]
有谁知道我做错了什么?我很确定它与 输入格式 .
有关
不太确定,但从您的错误日志来看,check_consistent_length(X, y)
似乎失败了,因此您的输入 X
与 y
的长度不同。尝试在第 80
行检查您的 clf.fit(data, labels, sample_weight=weights)
我现在设法解决了这个问题。
给感兴趣的小伙伴们提供解决方案。
简而言之,诀窍是从 get_predict_proba_fn_of_class
中的 numpy 数组中选择正确的列。虽然我有五个独立的分类分数加起来不等于一个,但我必须在新列中为每个标签的分类分数添加负分数(例如,对于 0.67,我添加了 1-0.67),然后选择原始列和新列。
from lime.lime_text import LimeTextExplainer, TextDomainMapper
print(encoder.classes_)
##https://lime-ml.readthedocs.io/en/latest/lime.html#module-lime.lime_text
def flatten_predict(i):
global model
# catch single string input and convert to list
if i.__class__ != list:
i = [i]
# list for predictions
predStorage = []
# loop through input list and predict
for textInput in i:
textInput = preprocess(textInput)
textInput = make_predictable(textInput)
pred = model.predict(textInput)
pred = np.append(pred, 1-pred, axis=1)
predStorage.extend(pred)
return np.asarray(predStorage)
def get_predict_proba_fn_of_class(strings):
def rewrapped_predict(strings):
pred = flatten_predict(strings)
index = np.where(pred==label)[1][0]
preds = pred[:, index::5]
return preds
return rewrapped_predict
string="Der Arzt weiß, was er tut und hat mir alles genau erklärt."
print("Simple model prediction:", model.predict(make_predictable(preprocess(string))))
labels_to_explain = flatten_predict(string)
print("labels_to_explain:", labels_to_explain)
explanation_for_label = {}
for index, label in enumerate(labels_to_explain[0]):
if index < (len(labels_to_explain[0])/2):
actual_classes = [encoder.classes_[index], 'None']
explainer = LimeTextExplainer(class_names=actual_classes)
wrapped = get_predict_proba_fn_of_class(string) # function returns function!
explanation_for_label[label] = explainer.explain_instance(string, wrapped)
explanation_for_label[label].show_in_notebook()
我正在尝试用 LIME.
解释我在 Keras 中的卷积神经网络 bult 的输出我的神经网络是一个多 class 文本 class 生成器,其中每个 class 都是独立的。因此,文本可以包含 class 1 和 2 或仅包含 1 等。对于文本中没有 class 的情况,第五个 "class" (None)。
然而,虽然我设法用 Keras 和 Lime 解释了二进制 class 化案例,但我无法得到具有独立 class 的多 class 案例。找到第一个帮助 here:
但是,我的代码不起作用,我从 Lime 收到内部错误,例如:"ValueError: Found input variables with inconsistent numbers of samples: [5000, 100000]"
from lime.lime_text import LimeTextExplainer, TextDomainMapper
explainer = LimeTextExplainer(class_names=encoder.classes_)
chosen_text = 2
def flatten_predict(i):
global model
# catch single string inputs and convert them to list
if i.__class__ != list:
i = [i]
print("## Caught and transformed single string.")
# list for predictions
predStorage = []
# loop through input list and predict
for textInput in i:
textInput = preprocess(textInput)
textInput = make_predictable(textInput)
pred = model.predict(textInput)
pred = np.append(pred, 1-pred, axis=1)
# control output of function
predStorage.extend(pred)
return np.asarray(predStorage)
def get_predict_proba_fn_of_class(label):
"""assuming wrapped_predict outputs an (n, d) array of prediction probabilities, where d is the number of labels"""
def rewrapped_predict(strings):
preds = flatten_predict(strings)[:, np.where(flatten_predict(strings)==label)].reshape(-1, 1)
ret = np.asarray(np.hstack([(1 - preds), preds]))
return ret
return rewrapped_predict
str = 'Ein sehr freundlicher Arzt.'
preds = flatten_predict(str)
labels_to_explain = preds#
print(labels_to_explain)
explanation_for_label = {}
for label in labels_to_explain:
wrapped = get_predict_proba_fn_of_class(label)
explanation_for_label[label] = explainer.explain_instance(str, wrapped)
explanation_for_label[label].show_in_notebook()
错误信息:
ValueError Traceback (most recent call last)
<ipython-input-26-8df61aaa23f4> in <module>()
53 for label in labels_to_explain:
54 wrapped = get_predict_proba_fn_of_class(label)
---> 55 explanation_for_label[label] = explainer.explain_instance(str, wrapped)
56 explanation_for_label[label].show_in_notebook()
57
/usr/local/lib/python3.6/dist-packages/lime/lime_text.py in explain_instance(self, text_instance, classifier_fn, labels, top_labels, num_features, num_samples, distance_metric, model_regressor)
405 data, yss, distances, label, num_features,
406 model_regressor=model_regressor,
--> 407 feature_selection=self.feature_selection)
408 return ret_exp
409
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in explain_instance_with_data(self, neighborhood_data, neighborhood_labels, distances, label, num_features, feature_selection, model_regressor)
155 weights,
156 num_features,
--> 157 feature_selection)
158
159 if model_regressor is None:
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in feature_selection(self, data, labels, weights, num_features, method)
104 n_method = 'highest_weights'
105 return self.feature_selection(data, labels, weights,
--> 106 num_features, n_method)
107
108 def explain_instance_with_data(self,
/usr/local/lib/python3.6/dist-packages/lime/lime_base.py in feature_selection(self, data, labels, weights, num_features, method)
78 clf = Ridge(alpha=0, fit_intercept=True,
79 random_state=self.random_state)
---> 80 clf.fit(data, labels, sample_weight=weights)
81 feature_weights = sorted(zip(range(data.shape[0]),
82 clf.coef_ * data[0]),
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/ridge.py in fit(self, X, y, sample_weight)
678 self : returns an instance of self.
679 """
--> 680 return super(Ridge, self).fit(X, y, sample_weight=sample_weight)
681
682
/usr/local/lib/python3.6/dist-packages/sklearn/linear_model/ridge.py in fit(self, X, y, sample_weight)
489
490 X, y = check_X_y(X, y, ['csr', 'csc', 'coo'], dtype=_dtype,
--> 491 multi_output=True, y_numeric=True)
492
493 if ((sample_weight is not None) and
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, warn_on_dtype, estimator)
764 y = y.astype(np.float64)
765
--> 766 check_consistent_length(X, y)
767
768 return X, y
/usr/local/lib/python3.6/dist-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
233 if len(uniques) > 1:
234 raise ValueError("Found input variables with inconsistent numbers of"
--> 235 " samples: %r" % [int(l) for l in lengths])
236
237
ValueError: Found input variables with inconsistent numbers of samples: [5000, 100000]
有谁知道我做错了什么?我很确定它与 输入格式 .
有关不太确定,但从您的错误日志来看,check_consistent_length(X, y)
似乎失败了,因此您的输入 X
与 y
的长度不同。尝试在第 80
clf.fit(data, labels, sample_weight=weights)
我现在设法解决了这个问题。 给感兴趣的小伙伴们提供解决方案。
简而言之,诀窍是从 get_predict_proba_fn_of_class
中的 numpy 数组中选择正确的列。虽然我有五个独立的分类分数加起来不等于一个,但我必须在新列中为每个标签的分类分数添加负分数(例如,对于 0.67,我添加了 1-0.67),然后选择原始列和新列。
from lime.lime_text import LimeTextExplainer, TextDomainMapper
print(encoder.classes_)
##https://lime-ml.readthedocs.io/en/latest/lime.html#module-lime.lime_text
def flatten_predict(i):
global model
# catch single string input and convert to list
if i.__class__ != list:
i = [i]
# list for predictions
predStorage = []
# loop through input list and predict
for textInput in i:
textInput = preprocess(textInput)
textInput = make_predictable(textInput)
pred = model.predict(textInput)
pred = np.append(pred, 1-pred, axis=1)
predStorage.extend(pred)
return np.asarray(predStorage)
def get_predict_proba_fn_of_class(strings):
def rewrapped_predict(strings):
pred = flatten_predict(strings)
index = np.where(pred==label)[1][0]
preds = pred[:, index::5]
return preds
return rewrapped_predict
string="Der Arzt weiß, was er tut und hat mir alles genau erklärt."
print("Simple model prediction:", model.predict(make_predictable(preprocess(string))))
labels_to_explain = flatten_predict(string)
print("labels_to_explain:", labels_to_explain)
explanation_for_label = {}
for index, label in enumerate(labels_to_explain[0]):
if index < (len(labels_to_explain[0])/2):
actual_classes = [encoder.classes_[index], 'None']
explainer = LimeTextExplainer(class_names=actual_classes)
wrapped = get_predict_proba_fn_of_class(string) # function returns function!
explanation_for_label[label] = explainer.explain_instance(string, wrapped)
explanation_for_label[label].show_in_notebook()