KeyError: "None of [Index([('A','B','C')] , dtype='object')] are in the [columns]
KeyError: "None of [Index([('A','B','C')] , dtype='object')] are in the [columns]
我将 X 和 y 定义如下:
X=df[text_columns + categorical_columns + textual_columns + numeric_columns]
y=df[['Label']]
哪里
- text_columns='Tweet'
- categorical_columns=['A','B','C']
- numeric_columns =['N1','N2']
列名仅供参考。
然后我拆分成 train/test:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=(1/5), random_state=38, stratify=y)
我正在尝试构建一个定制的变压器,如下所示:
分类
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
class CategoricalTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
super().__init__()
# Return self nothing else to do here
def fit(self, X, y=None):
return self
# Helper function that converts values to Binary depending on input
def create_binary(self, obj):
if obj == 0:
return 'No'
else:
return 'Yes'
# Transformer method for this transformer
def transform(self, X, y=None):
# Categorical features to pass down the categorical pipeline
return X[[categorical_columns]].values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the categorical pipeline
categorical_pipeline = Pipeline(steps=[
('categorical_transformer', CategoricalTransformer()),
('one_hot_encoder', OneHotEncoder(handle_unknown='ignore'))])
正文
class TextTransformer(BaseEstimator,TransformerMixin):
def init(self):
super().初始化()
# Return self nothing else to do here
def fit(self, X, y=None):
return self
# Helper function that converts values to Binary depending on input
def create_binary(self, obj):
if obj == 0:
return 'No'
else:
return 'Yes'
# Transformer method for this transformer
def transform(self, X, y=None):
# Text features to pass down the text pipeline
return X[['Tweet']].values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the text pipeline
text_pipeline = Pipeline(steps=[
('text_transformer', TextTransformer()),
('cv', CountVectorizer())])
数值
class NumericalTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
super().__init__()
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
# Numerical features to pass down the numerical pipeline
X = X[[numeric_columns]]
X = X.replace([np.inf, -np.inf], np.nan)
return X.values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the numerical pipeline
numerical_pipeline = Pipeline(steps=[
('numerical_transformer', NumericalTransformer()),
('imputer', KNNImputer(n_neighbors=2)),
('minmax', MinMaxScaler())])
然后我使用特征联合:
from sklearn.pipeline import FeatureUnion
union_pipeline = FeatureUnion(transformer_list=[
('categorical_pipeline', categorical_pipeline),
('numerical_pipeline', numerical_pipeline),
('text_pipeline', text_pipeline)])
# Combining the custom imputer with the categorical, text and numerical pipeline
preprocess_pipeline = Pipeline(steps=[('full_pipeline', union_pipeline)])
但是当我运行模型
# MODEL
from sklearn import tree
# Decision Tree
decision_tree = tree.DecisionTreeClassifier()
full_pipeline = Pipeline(steps=[
('preprocess_pipeline', preprocess_pipeline),
('model', decision_tree)])
# fit on the complete pipeline
training = full_pipeline.fit(X, y)
print(full_pipeline.get_params())
# metrics
score_test = \
round(training.score(X, y) * 100, 2)
print(f"\nTraining Accuracy: {score_test}")
我收到这个错误:
---> 12 training = full_pipeline.fit(X, y)
<ipython-input-69-051568c7b272> in transform(self, X, y)
21 def transform(self, X, y=None):
22 # Categorical features to pass down the categorical pipeline
---> 23 return X[[('A','B','C')]].values
24
25 def get_feature_names(self):
....
KeyError: "None of [Index([('A','B','C')], dtype='object')] are in the [columns]"
我在数字列中也遇到了类似的错误。 TextTransformer 似乎是唯一一个没有错误的工作。
我想我正在考虑的 dataset/columns 有问题。
如果 numeric_columns
(和其他任何一个)是元组,那么你就是
X[numeric_columns]
而不是
X[[numeric_columns]]
到 select 来自 pandas DataFrame
的列子集
我将 X 和 y 定义如下:
X=df[text_columns + categorical_columns + textual_columns + numeric_columns]
y=df[['Label']]
哪里
- text_columns='Tweet'
- categorical_columns=['A','B','C']
- numeric_columns =['N1','N2']
列名仅供参考。 然后我拆分成 train/test:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=(1/5), random_state=38, stratify=y)
我正在尝试构建一个定制的变压器,如下所示:
分类
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
class CategoricalTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
super().__init__()
# Return self nothing else to do here
def fit(self, X, y=None):
return self
# Helper function that converts values to Binary depending on input
def create_binary(self, obj):
if obj == 0:
return 'No'
else:
return 'Yes'
# Transformer method for this transformer
def transform(self, X, y=None):
# Categorical features to pass down the categorical pipeline
return X[[categorical_columns]].values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the categorical pipeline
categorical_pipeline = Pipeline(steps=[
('categorical_transformer', CategoricalTransformer()),
('one_hot_encoder', OneHotEncoder(handle_unknown='ignore'))])
正文 class TextTransformer(BaseEstimator,TransformerMixin): def init(self): super().初始化()
# Return self nothing else to do here
def fit(self, X, y=None):
return self
# Helper function that converts values to Binary depending on input
def create_binary(self, obj):
if obj == 0:
return 'No'
else:
return 'Yes'
# Transformer method for this transformer
def transform(self, X, y=None):
# Text features to pass down the text pipeline
return X[['Tweet']].values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the text pipeline
text_pipeline = Pipeline(steps=[
('text_transformer', TextTransformer()),
('cv', CountVectorizer())])
数值
class NumericalTransformer(BaseEstimator, TransformerMixin):
def __init__(self):
super().__init__()
def fit(self, X, y=None):
return self
def transform(self, X, y=None):
# Numerical features to pass down the numerical pipeline
X = X[[numeric_columns]]
X = X.replace([np.inf, -np.inf], np.nan)
return X.values
def get_feature_names(self):
return X.columns.tolist()
# Defining the steps in the numerical pipeline
numerical_pipeline = Pipeline(steps=[
('numerical_transformer', NumericalTransformer()),
('imputer', KNNImputer(n_neighbors=2)),
('minmax', MinMaxScaler())])
然后我使用特征联合:
from sklearn.pipeline import FeatureUnion
union_pipeline = FeatureUnion(transformer_list=[
('categorical_pipeline', categorical_pipeline),
('numerical_pipeline', numerical_pipeline),
('text_pipeline', text_pipeline)])
# Combining the custom imputer with the categorical, text and numerical pipeline
preprocess_pipeline = Pipeline(steps=[('full_pipeline', union_pipeline)])
但是当我运行模型
# MODEL
from sklearn import tree
# Decision Tree
decision_tree = tree.DecisionTreeClassifier()
full_pipeline = Pipeline(steps=[
('preprocess_pipeline', preprocess_pipeline),
('model', decision_tree)])
# fit on the complete pipeline
training = full_pipeline.fit(X, y)
print(full_pipeline.get_params())
# metrics
score_test = \
round(training.score(X, y) * 100, 2)
print(f"\nTraining Accuracy: {score_test}")
我收到这个错误:
---> 12 training = full_pipeline.fit(X, y)
<ipython-input-69-051568c7b272> in transform(self, X, y)
21 def transform(self, X, y=None):
22 # Categorical features to pass down the categorical pipeline
---> 23 return X[[('A','B','C')]].values
24
25 def get_feature_names(self):
....
KeyError: "None of [Index([('A','B','C')], dtype='object')] are in the [columns]"
我在数字列中也遇到了类似的错误。 TextTransformer 似乎是唯一一个没有错误的工作。
我想我正在考虑的 dataset/columns 有问题。
如果 numeric_columns
(和其他任何一个)是元组,那么你就是
X[numeric_columns]
而不是
X[[numeric_columns]]
到 select 来自 pandas DataFrame
的列子集