IndexError: arrays used as indices must be of integer (or boolean) type Error during OneHotEncoding
IndexError: arrays used as indices must be of integer (or boolean) type Error during OneHotEncoding
我有一个包含分类变量的数据框,我想应用 OneHotEncoder。我的问题在 OneHotEncoder 之前使用 LabelEncoder 解决了,但这对我来说没有意义,因为最新更新 OneHotEncoder 接受分类变量的字符串。
您可以在上面测试代码的示例数据框:
data = pd.DataFrame({'col1': {0: 'ab321', 1: 'ab568', 2: 'mkld78'},
'col2': {0: 'Red', 1: 'Blue', 2: 'Green'},
'col3': {0: 'First', 1: 'Second', 2: 'Third'},
'col4': {0: 'Wisconsin', 1: 'California', 2: 'Portland'},
'col5': {0: 'a', 1: 'f', 2: 'g'},
'col6': {0: 1, 1: 2, 2: 3},
'target': {0: 0, 1: 0, 2: 1}})
这是我尝试过的:
我试过同时使用索引值和列名来解决错误:
#Index
# OneHotEncoding
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
#Load data
train = pd.read_csv("data_train.csv")
test = pd.read_csv("data_test.csv")
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder(categorical_features = categorical_columns)
# reshape data
for index in categorical_indexes:
X.iloc[:,index] = ohe.fit_transform(X.iloc[:,index].values.reshape(-1,1))
#Column Names
# OneHotEncoding
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
train = pd.read_csv("data_train.csv")
test = pd.read_csv("data_test.csv")
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder(categorical_features = categorical_columns)
# reshape data
for column in categorical_columns:
X[column] = ohe.fit_transform(X[column].values.reshape(-1,1))
错误回溯:
IndexError Traceback (most recent call last)
<ipython-input-86-17c86bf649e2> in <module>
11 # reshape data
12 for index in categorical_indexes:
---> 13 X.iloc[:,index] = ohe.fit_transform(X.iloc[:,index].values.reshape(-1,1))
14
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
622 self._validate_keywords()
623
--> 624 self._handle_deprecations(X)
625
626 if self._legacy_mode:
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\sklearn\preprocessing\_encoders.py in _handle_deprecations(self, X)
453 n_features = X.shape[1]
454 sel = np.zeros(n_features, dtype=bool)
--> 455 sel[np.asarray(self.categorical_features)] = True
456 if sum(sel) == 0:
457 self.categories_ = []
IndexError: arrays used as indices must be of integer (or boolean) type
您缺少 OnehotEncoder
的概念。使用方法是在整个训练集上拟合。
使用这个:
data = pd.DataFrame({'col1': {0: 'ab321', 1: 'ab568', 2: 'mkld78'},
'col2': {0: 'Red', 1: 'Blue', 2: 'Green'},
'col3': {0: 'First', 1: 'Second', 2: 'Third'},
'col4': {0: 'Wisconsin', 1: 'California', 2: 'Portland'},
'col5': {0: 'a', 1: 'f', 2: 'g'},
'col6': {0: 1, 1: 2, 2: 3},
'target': {0: 0, 1: 0, 2: 1}})
# OneHotEncoding
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
train = data.iloc[0:2,:]
test = data.iloc[2:,:]
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder()
X_ = ohe.fit_transform(X)
X_
# <2x12 sparse matrix of type '<type 'numpy.float64'>'
# with 12 stored elements in Compressed Sparse Row format>
我有一个包含分类变量的数据框,我想应用 OneHotEncoder。我的问题在 OneHotEncoder 之前使用 LabelEncoder 解决了,但这对我来说没有意义,因为最新更新 OneHotEncoder 接受分类变量的字符串。
您可以在上面测试代码的示例数据框:
data = pd.DataFrame({'col1': {0: 'ab321', 1: 'ab568', 2: 'mkld78'},
'col2': {0: 'Red', 1: 'Blue', 2: 'Green'},
'col3': {0: 'First', 1: 'Second', 2: 'Third'},
'col4': {0: 'Wisconsin', 1: 'California', 2: 'Portland'},
'col5': {0: 'a', 1: 'f', 2: 'g'},
'col6': {0: 1, 1: 2, 2: 3},
'target': {0: 0, 1: 0, 2: 1}})
这是我尝试过的:
我试过同时使用索引值和列名来解决错误:
#Index
# OneHotEncoding
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd
#Load data
train = pd.read_csv("data_train.csv")
test = pd.read_csv("data_test.csv")
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder(categorical_features = categorical_columns)
# reshape data
for index in categorical_indexes:
X.iloc[:,index] = ohe.fit_transform(X.iloc[:,index].values.reshape(-1,1))
#Column Names
# OneHotEncoding
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
train = pd.read_csv("data_train.csv")
test = pd.read_csv("data_test.csv")
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder(categorical_features = categorical_columns)
# reshape data
for column in categorical_columns:
X[column] = ohe.fit_transform(X[column].values.reshape(-1,1))
错误回溯:
IndexError Traceback (most recent call last)
<ipython-input-86-17c86bf649e2> in <module>
11 # reshape data
12 for index in categorical_indexes:
---> 13 X.iloc[:,index] = ohe.fit_transform(X.iloc[:,index].values.reshape(-1,1))
14
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\sklearn\preprocessing\_encoders.py in fit_transform(self, X, y)
622 self._validate_keywords()
623
--> 624 self._handle_deprecations(X)
625
626 if self._legacy_mode:
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\sklearn\preprocessing\_encoders.py in _handle_deprecations(self, X)
453 n_features = X.shape[1]
454 sel = np.zeros(n_features, dtype=bool)
--> 455 sel[np.asarray(self.categorical_features)] = True
456 if sum(sel) == 0:
457 self.categories_ = []
IndexError: arrays used as indices must be of integer (or boolean) type
您缺少 OnehotEncoder
的概念。使用方法是在整个训练集上拟合。
使用这个:
data = pd.DataFrame({'col1': {0: 'ab321', 1: 'ab568', 2: 'mkld78'},
'col2': {0: 'Red', 1: 'Blue', 2: 'Green'},
'col3': {0: 'First', 1: 'Second', 2: 'Third'},
'col4': {0: 'Wisconsin', 1: 'California', 2: 'Portland'},
'col5': {0: 'a', 1: 'f', 2: 'g'},
'col6': {0: 1, 1: 2, 2: 3},
'target': {0: 0, 1: 0, 2: 1}})
# OneHotEncoding
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
train = data.iloc[0:2,:]
test = data.iloc[2:,:]
X= train.drop(["target"], axis = 1)
y= train["target"]
# Filter categorical columns
categorical_columns = ["col1","col2","col3","col4","col5"]
categorical_indexes = np.where(X.dtypes == 'object')[0]
# OHE
ohe = OneHotEncoder()
X_ = ohe.fit_transform(X)
X_
# <2x12 sparse matrix of type '<type 'numpy.float64'>'
# with 12 stored elements in Compressed Sparse Row format>