如何解码 pandas DataFrame 中的 LabelEncoder 编码列?

How to decode LabelEncoder encoded column in pandas DataFrame?

我正在 dataset。我在那里通过将分类对象转换为数字来练习特征工程,代码行如下:

import pandas as pd 
import numpy as np
from sklearn import preprocessing
df = pd.read_csv(r'train.csv',index_col='Id')
print(df.shape)
df.head()
colsNum = df.select_dtypes(np.number).columns
colsObj = df.columns.difference(colsNum)

df[colsNum] = df[colsNum].fillna(df[colsNum].mean()//1)
df[colsObj] = df[colsObj].fillna(df[colsObj].mode().iloc[0])

label_encoder = preprocessing.LabelEncoder() 
for col in colsObj:
    df[col] = label_encoder.fit_transform(df[col])
df.head()
for col in colsObj:
    df[col] = label_encoder.inverse_transform(df[col])
df.head()

但是这里 inverse_tranform() 没有返回原始数据集。请帮助我!

为了正确工作,有必要将 LabelEncoder 转换为字典数据类型:

from sklearn import preprocessing
df = pd.read_csv(r'train.csv',index_col='Id')

print(df.shape)
print (df.head())
    MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
Id                                                                    
1           60       RL         65.0     8450   Pave   NaN      Reg   
2           20       RL         80.0     9600   Pave   NaN      Reg   
3           60       RL         68.0    11250   Pave   NaN      IR1   
4           70       RL         60.0     9550   Pave   NaN      IR1   
5           60       RL         84.0    14260   Pave   NaN      IR1   

   LandContour Utilities LotConfig  ... PoolArea PoolQC Fence MiscFeature  \
Id                                  ...                                     
1          Lvl    AllPub    Inside  ...        0    NaN   NaN         NaN   
2          Lvl    AllPub       FR2  ...        0    NaN   NaN         NaN   
3          Lvl    AllPub    Inside  ...        0    NaN   NaN         NaN   
4          Lvl    AllPub    Corner  ...        0    NaN   NaN         NaN   
5          Lvl    AllPub       FR2  ...        0    NaN   NaN         NaN   

   MiscVal MoSold  YrSold  SaleType  SaleCondition  SalePrice  
Id                                                             
1        0      2    2008        WD         Normal     208500  
2        0      5    2007        WD         Normal     181500  
3        0      9    2008        WD         Normal     223500  
4        0      2    2006        WD        Abnorml     140000  
5        0     12    2008        WD         Normal     250000  

[5 rows x 80 columns]

colsNum = df.select_dtypes(np.number).columns
colsObj = df.columns.difference(colsNum)

df[colsNum] = df[colsNum].fillna(df[colsNum].mean()//1)
df[colsObj] = df[colsObj].fillna(df[colsObj].mode().iloc[0])

from collections import defaultdict
di = defaultdict(preprocessing.LabelEncoder)

for col in colsObj:
    df[col] = di[col].fit_transform(df[col])

print (df.head())
    MSSubClass  MSZoning  LotFrontage  LotArea  Street  Alley  LotShape  \
Id                                                                        
1           60         3         65.0     8450       1      0         3   
2           20         3         80.0     9600       1      0         3   
3           60         3         68.0    11250       1      0         0   
4           70         3         60.0     9550       1      0         0   
5           60         3         84.0    14260       1      0         0   

    LandContour  Utilities  LotConfig  ...  PoolArea  PoolQC  Fence  \
Id                                     ...                            
1             3          0          4  ...         0       2      2   
2             3          0          2  ...         0       2      2   
3             3          0          4  ...         0       2      2   
4             3          0          0  ...         0       2      2   
5             3          0          2  ...         0       2      2   

    MiscFeature  MiscVal  MoSold  YrSold  SaleType  SaleCondition  SalePrice  
Id                                                                            
1             2        0       2    2008         8              4     208500  
2             2        0       5    2007         8              4     181500  
3             2        0       9    2008         8              4     223500  
4             2        0       2    2006         8              0     140000  
5             2        0      12    2008         8              4     250000  

[5 rows x 80 columns]

print (di)
defaultdict(<class 'sklearn.preprocessing._label.LabelEncoder'>, {'Alley': LabelEncoder(), 'BldgType': LabelEncoder(), 'BsmtCond': LabelEncoder(), 'BsmtExposure': LabelEncoder(), 'BsmtFinType1': LabelEncoder(), 'BsmtFinType2': LabelEncoder(), 'BsmtQual': LabelEncoder(), 'CentralAir': LabelEncoder(), 'Condition1': LabelEncoder(), 'Condition2': LabelEncoder(), 'Electrical': LabelEncoder(), 'ExterCond': LabelEncoder(), 'ExterQual': LabelEncoder(), 'Exterior1st': LabelEncoder(), 'Exterior2nd': LabelEncoder(), 'Fence': LabelEncoder(), 'FireplaceQu': LabelEncoder(), 'Foundation': LabelEncoder(), 'Functional': LabelEncoder(), 'GarageCond': LabelEncoder(), 'GarageFinish': LabelEncoder(), 'GarageQual': LabelEncoder(), 'GarageType': LabelEncoder(), 'Heating': LabelEncoder(), 'HeatingQC': LabelEncoder(), 'HouseStyle': LabelEncoder(), 'KitchenQual': LabelEncoder(), 'LandContour': LabelEncoder(), 'LandSlope': LabelEncoder(), 'LotConfig': LabelEncoder(), 'LotShape': LabelEncoder(), 'MSZoning': LabelEncoder(), 'MasVnrType': LabelEncoder(), 'MiscFeature': LabelEncoder(), 'Neighborhood': LabelEncoder(), 'PavedDrive': LabelEncoder(), 'PoolQC': LabelEncoder(), 'RoofMatl': LabelEncoder(), 'RoofStyle': LabelEncoder(), 'SaleCondition': LabelEncoder(), 'SaleType': LabelEncoder(), 'Street': LabelEncoder(), 'Utilities': LabelEncoder()})

for col in colsObj:
    df[col] = di[col].inverse_transform(df[col])

print (df.head())
    MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
Id                                                                    
1           60       RL         65.0     8450   Pave  Grvl      Reg   
2           20       RL         80.0     9600   Pave  Grvl      Reg   
3           60       RL         68.0    11250   Pave  Grvl      IR1   
4           70       RL         60.0     9550   Pave  Grvl      IR1   
5           60       RL         84.0    14260   Pave  Grvl      IR1   

   LandContour Utilities LotConfig  ... PoolArea PoolQC  Fence MiscFeature  \
Id                                  ...                                      
1          Lvl    AllPub    Inside  ...        0     Gd  MnPrv        Shed   
2          Lvl    AllPub       FR2  ...        0     Gd  MnPrv        Shed   
3          Lvl    AllPub    Inside  ...        0     Gd  MnPrv        Shed   
4          Lvl    AllPub    Corner  ...        0     Gd  MnPrv        Shed   
5          Lvl    AllPub       FR2  ...        0     Gd  MnPrv        Shed   

   MiscVal MoSold  YrSold  SaleType  SaleCondition  SalePrice  
Id                                                             
1        0      2    2008        WD         Normal     208500  
2        0      5    2007        WD         Normal     181500  
3        0      9    2008        WD         Normal     223500  
4        0      2    2006        WD        Abnorml     140000  
5        0     12    2008        WD         Normal     250000