ValueError: The truth value of a Series is ambiguous in one hot encoding error
ValueError: The truth value of a Series is ambiguous in one hot encoding error
我在下面的一段代码中尝试使用一个热编码器。但是我得到了错误值错误:系列的真值不明确。使用 a.empty、a.bool()、a.item()、a.any() 或 a.all()。
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
target=train_features_df['y']
train_features_df=train_features_df.drop(['y'], axis=1)
# Categorical boolean mask this is done to find all categorical dfeature
categorical_feature_mask = train_features_df.dtypes==object
# filter categorical columns using mask and turn it into a list
categorical_cols = train_features_df.columns[categorical_feature_mask].tolist()
# instantiate labelencoder object
le = LabelEncoder()
# apply le on categorical feature columns
train_features_df[categorical_cols] = train_features_df[categorical_cols].apply(lambda col:
le.fit_transform(col))
train_features_df[categorical_cols].head(10)
# instantiate OneHotEncoder
ohe = OneHotEncoder(categories = categorical_feature_mask, sparse=False )
# categorical_features = boolean mask for categorical columns
# sparse = False output an array not sparse matrix
# apply OneHotEncoder on categorical feature columns
ohe.fit_transform(train_features_df)
我在最后一行收到此错误“ValueError:系列的真值不明确。使用 a.empty、a.bool()、a.item()、a.any() 或 a.all()。在线 ohe.fit_transform(train_features_df)
请求的完整追溯消息如下:-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-72e45bd93f15> in <module>
23
24 # apply OneHotEncoder on categorical feature columns
---> 25 ohe.fit_transform(train_features_df)
26 #train_encoded_df=pd.DataFrame(data = ohe.fit_transform(train_features_df)) # It returns an numpy array
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
408 """
409 self._validate_keywords()
--> 410 return super().fit_transform(X, y)
411
412 def transform(self, X):
~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
688 if y is None:
689 # fit method of arity 1 (unsupervised transformation)
--> 690 return self.fit(X, **fit_params).transform(X)
691 else:
692 # fit method of arity 2 (supervised transformation)
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
383 """
384 self._validate_keywords()
--> 385 self._fit(X, handle_unknown=self.handle_unknown)
386 self.drop_idx_ = self._compute_drop_idx()
387 return self
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
74 X_list, n_samples, n_features = self._check_X(X)
75
---> 76 if self.categories != 'auto':
77 if len(self.categories) != n_features:
78 raise ValueError("Shape mismatch: if categories is an array,"
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1477 def __nonzero__(self):
1478 raise ValueError(
-> 1479 f"The truth value of a {type(self).__name__} is ambiguous. "
1480 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1481 )
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
一往无前,
该错误是由于您传入 categories
参数而引起的,这不是编码器功能所期望的。
如果你只想 select 使用 selection 的分类列,请执行以下操作:
ohe = OneHotEncoder(categories = 'auto', sparse=False )
selection = train_features_df[train_features_df.columns[categorical_feature_mask]]
encoded = ohe.fit_transform(selection)
然后将编码结果与非分类列合并
如果您想使用 categories
参数传递类别值 - 使用 here
中的示例
更优雅的方法是使用 Pandas 函数进行单热编码:
pd.get_dummies(data=train_features_df, columns=train_features_df.columns[categorical_feature_mask])
我在下面的一段代码中尝试使用一个热编码器。但是我得到了错误值错误:系列的真值不明确。使用 a.empty、a.bool()、a.item()、a.any() 或 a.all()。
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import pandas as pd
target=train_features_df['y']
train_features_df=train_features_df.drop(['y'], axis=1)
# Categorical boolean mask this is done to find all categorical dfeature
categorical_feature_mask = train_features_df.dtypes==object
# filter categorical columns using mask and turn it into a list
categorical_cols = train_features_df.columns[categorical_feature_mask].tolist()
# instantiate labelencoder object
le = LabelEncoder()
# apply le on categorical feature columns
train_features_df[categorical_cols] = train_features_df[categorical_cols].apply(lambda col:
le.fit_transform(col))
train_features_df[categorical_cols].head(10)
# instantiate OneHotEncoder
ohe = OneHotEncoder(categories = categorical_feature_mask, sparse=False )
# categorical_features = boolean mask for categorical columns
# sparse = False output an array not sparse matrix
# apply OneHotEncoder on categorical feature columns
ohe.fit_transform(train_features_df)
我在最后一行收到此错误“ValueError:系列的真值不明确。使用 a.empty、a.bool()、a.item()、a.any() 或 a.all()。在线 ohe.fit_transform(train_features_df)
请求的完整追溯消息如下:-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-12-72e45bd93f15> in <module>
23
24 # apply OneHotEncoder on categorical feature columns
---> 25 ohe.fit_transform(train_features_df)
26 #train_encoded_df=pd.DataFrame(data = ohe.fit_transform(train_features_df)) # It returns an numpy array
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
408 """
409 self._validate_keywords()
--> 410 return super().fit_transform(X, y)
411
412 def transform(self, X):
~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
688 if y is None:
689 # fit method of arity 1 (unsupervised transformation)
--> 690 return self.fit(X, **fit_params).transform(X)
691 else:
692 # fit method of arity 2 (supervised transformation)
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
383 """
384 self._validate_keywords()
--> 385 self._fit(X, handle_unknown=self.handle_unknown)
386 self.drop_idx_ = self._compute_drop_idx()
387 return self
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown)
74 X_list, n_samples, n_features = self._check_X(X)
75
---> 76 if self.categories != 'auto':
77 if len(self.categories) != n_features:
78 raise ValueError("Shape mismatch: if categories is an array,"
~\Anaconda3\lib\site-packages\pandas\core\generic.py in __nonzero__(self)
1477 def __nonzero__(self):
1478 raise ValueError(
-> 1479 f"The truth value of a {type(self).__name__} is ambiguous. "
1480 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1481 )
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
一往无前,
该错误是由于您传入 categories
参数而引起的,这不是编码器功能所期望的。
如果你只想 select 使用 selection 的分类列,请执行以下操作:
ohe = OneHotEncoder(categories = 'auto', sparse=False )
selection = train_features_df[train_features_df.columns[categorical_feature_mask]]
encoded = ohe.fit_transform(selection)
然后将编码结果与非分类列合并
如果您想使用 categories
参数传递类别值 - 使用 here
更优雅的方法是使用 Pandas 函数进行单热编码:
pd.get_dummies(data=train_features_df, columns=train_features_df.columns[categorical_feature_mask])