带有 TensorFlow 2.4+ 错误的 SHAP DeepExplainer
SHAP DeepExplainer with TensorFlow 2.4+ error
我正在尝试使用 DeepExplainer 计算 shap 值,但出现以下错误:
keras is no longer supported, please use tf.keras instead
尽管我正在使用 tf.keras?
KeyError Traceback (most recent call last)
in
6 # ...or pass tensors directly
7 explainer = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
8 shap_values = explainer.shap_values(X_test[1:5])
C:\ProgramData\Anaconda3\lib\site-packages\shap\explainers\_deep\__init__.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity)
122 were chosen as "top".
124 return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity)
C:\ProgramData\Anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity)
310 # assign the attributions to the right part of the output arrays
311 for l in range(len(X)):
312 phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * (X[l][j] - bg_data[l])).mean(0)
313
314 output_phis.append(phis[0] if not self.multi_input else phis)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
2798 if self.columns.nlevels > 1:
2799 return self._getitem_multilevel(key)
2800 indexer = self.columns.get_loc(key)
2801 if is_integer(indexer):
2802 indexer = [indexer]
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance)
2646 return self._engine.get_loc(key)
2647 except KeyError:
2648 return self._engine.get_loc(self._maybe_cast_indexer(key))
2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance)
2650 if indexer.ndim > 1 or indexer.size > 1:
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item()
KeyError: 0
import shap
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
from tensorflow.keras import optimizers
# print the JS visualization code to the notebook
shap.initjs()
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
Y_train = to_categorical(Y_train, num_classes=3)
Y_test = to_categorical(Y_test, num_classes=3)
# Define baseline model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8, input_dim=len(X_train.columns), activation="relu"))
model.add(tf.keras.layers.Dense(3, activation="softmax"))
model.summary()
# compile the model
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])
hist = model.fit(X_train, Y_train, batch_size=5,epochs=200, verbose=0)
# select a set of background examples to take an expectation over
background = X_train.iloc[np.random.choice(X_train.shape[0], 100, replace=False)]
# Explain predictions of the model
#explainer = shap.DeepExplainer(model, background)
# ...or pass tensors directly
explainer = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
shap_values = explainer.shap_values(X_test[1:5])
TL;DR
- Add
tf.compat.v1.disable_v2_behavior()
at the top for TF 2.4+
- calculate shap values on numpy array, not on df
完整的可重现示例:
import shap
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.compat.v1.disable_v2_behavior() # <-- HERE !
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
from tensorflow.keras import optimizers
print("SHAP version is:", shap.__version__)
print("Tensorflow version is:", tf.__version__)
X_train, X_test, Y_train, Y_test = train_test_split(
*shap.datasets.iris(), test_size=0.2, random_state=0
)
Y_train = to_categorical(Y_train, num_classes=3)
Y_test = to_categorical(Y_test, num_classes=3)
# Define baseline model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8, input_dim=len(X_train.columns), activation="relu"))
model.add(tf.keras.layers.Dense(3, activation="softmax"))
# model.summary()
# compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
hist = model.fit(X_train, Y_train, batch_size=5, epochs=200, verbose=0)
# select a set of background examples to take an expectation over
background = X_train.iloc[np.random.choice(X_train.shape[0], 100, replace=False)]
explainer = shap.DeepExplainer(
(model.layers[0].input, model.layers[-1].output), background
)
shap_values = explainer.shap_values(X_test[:3].values) # <-- HERE !
# print the JS visualization code to the notebook
shap.initjs()
shap.force_plot(
explainer.expected_value[0], shap_values[0][0], feature_names=X_train.columns
)
SHAP version is: 0.39.0
Tensorflow version is: 2.5.0
我正在尝试使用 DeepExplainer 计算 shap 值,但出现以下错误:
keras is no longer supported, please use tf.keras instead
尽管我正在使用 tf.keras?
KeyError Traceback (most recent call last) in 6 # ...or pass tensors directly 7 explainer = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background) 8 shap_values = explainer.shap_values(X_test[1:5]) C:\ProgramData\Anaconda3\lib\site-packages\shap\explainers\_deep\__init__.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity) 122 were chosen as "top". 124 return self.explainer.shap_values(X, ranked_outputs, output_rank_order, check_additivity=check_additivity) C:\ProgramData\Anaconda3\lib\site-packages\shap\explainers\_deep\deep_tf.py in shap_values(self, X, ranked_outputs, output_rank_order, check_additivity) 310 # assign the attributions to the right part of the output arrays 311 for l in range(len(X)): 312 phis[l][j] = (sample_phis[l][bg_data[l].shape[0]:] * (X[l][j] - bg_data[l])).mean(0) 313 314 output_phis.append(phis[0] if not self.multi_input else phis) C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key) 2798 if self.columns.nlevels > 1: 2799 return self._getitem_multilevel(key) 2800 indexer = self.columns.get_loc(key) 2801 if is_integer(indexer): 2802 indexer = [indexer] C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in get_loc(self, key, method, tolerance) 2646 return self._engine.get_loc(key) 2647 except KeyError: 2648 return self._engine.get_loc(self._maybe_cast_indexer(key)) 2649 indexer = self.get_indexer([key], method=method, tolerance=tolerance) 2650 if indexer.ndim > 1 or indexer.size > 1: pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc() pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item() KeyError: 0
import shap
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
from tensorflow.keras import optimizers
# print the JS visualization code to the notebook
shap.initjs()
X_train,X_test,Y_train,Y_test = train_test_split(*shap.datasets.iris(), test_size=0.2, random_state=0)
Y_train = to_categorical(Y_train, num_classes=3)
Y_test = to_categorical(Y_test, num_classes=3)
# Define baseline model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8, input_dim=len(X_train.columns), activation="relu"))
model.add(tf.keras.layers.Dense(3, activation="softmax"))
model.summary()
# compile the model
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])
hist = model.fit(X_train, Y_train, batch_size=5,epochs=200, verbose=0)
# select a set of background examples to take an expectation over
background = X_train.iloc[np.random.choice(X_train.shape[0], 100, replace=False)]
# Explain predictions of the model
#explainer = shap.DeepExplainer(model, background)
# ...or pass tensors directly
explainer = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
shap_values = explainer.shap_values(X_test[1:5])
TL;DR
- Add
tf.compat.v1.disable_v2_behavior()
at the top for TF 2.4+- calculate shap values on numpy array, not on df
完整的可重现示例:
import shap
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
tf.compat.v1.disable_v2_behavior() # <-- HERE !
import tensorflow.keras.backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.python.keras.layers import Dense
from tensorflow.python.keras import Sequential
from tensorflow.keras import optimizers
print("SHAP version is:", shap.__version__)
print("Tensorflow version is:", tf.__version__)
X_train, X_test, Y_train, Y_test = train_test_split(
*shap.datasets.iris(), test_size=0.2, random_state=0
)
Y_train = to_categorical(Y_train, num_classes=3)
Y_test = to_categorical(Y_test, num_classes=3)
# Define baseline model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(8, input_dim=len(X_train.columns), activation="relu"))
model.add(tf.keras.layers.Dense(3, activation="softmax"))
# model.summary()
# compile the model
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
hist = model.fit(X_train, Y_train, batch_size=5, epochs=200, verbose=0)
# select a set of background examples to take an expectation over
background = X_train.iloc[np.random.choice(X_train.shape[0], 100, replace=False)]
explainer = shap.DeepExplainer(
(model.layers[0].input, model.layers[-1].output), background
)
shap_values = explainer.shap_values(X_test[:3].values) # <-- HERE !
# print the JS visualization code to the notebook
shap.initjs()
shap.force_plot(
explainer.expected_value[0], shap_values[0][0], feature_names=X_train.columns
)
SHAP version is: 0.39.0
Tensorflow version is: 2.5.0