如何使 Python LightGBM 代码接受列表
How to make Python LightGBM code to accept list
我正在使用以下代码:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score
pd.options.display.max_columns = 999
import lightgbm as lgb
def load_csv(filepath):
data = []
col = []
checkcol = False
with open(filepath) as f:
for val in f.readlines():
val = val.replace("\n","")
val = val.split(',')
if checkcol is False:
col = val
checkcol = True
else:
data.append(val)
df = pd.DataFrame(data=data, columns=col)
return df
heart=load_csv(r'C:\Users\PC\Documents\Essay\heart.csv')
df=heart[['chol','cp']]
Y=heart['sex']
sc=StandardScaler()
sc.fit(df)
X=pd.DataFrame(sc.fit_transform(df))
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=0)
d_train=lgb.Dataset(X_train, label=y_train)
params={}
params['learning_rate']=0.03
params['boosting_type']='gbdt' #GradientBoostingDecisionTree
params['objective']='binary' #Binary target feature
params['metric']='binary_logloss' #metric for binary classification
params['max_depth']=10
clf=lgb.train(params,d_train,100)
只得到错误信息:
ValueError: Series.dtypes must be int, float or bool
我知道这是因为我选择了 Y,但我也尝试过这里的数组和嵌套列表,但仍然失败。
使用 labelencoder 可以将您的列转换为预期格式:
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()
encoder.fit(df['sex'])
encoder.transform(df['sex'])
这将生成一个 0 和 1 列表,您可以将其输入到您的学习算法中:
array([1, 0, 0, 0, 1])
我正在使用以下代码:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import mean_squared_error,roc_auc_score,precision_score
pd.options.display.max_columns = 999
import lightgbm as lgb
def load_csv(filepath):
data = []
col = []
checkcol = False
with open(filepath) as f:
for val in f.readlines():
val = val.replace("\n","")
val = val.split(',')
if checkcol is False:
col = val
checkcol = True
else:
data.append(val)
df = pd.DataFrame(data=data, columns=col)
return df
heart=load_csv(r'C:\Users\PC\Documents\Essay\heart.csv')
df=heart[['chol','cp']]
Y=heart['sex']
sc=StandardScaler()
sc.fit(df)
X=pd.DataFrame(sc.fit_transform(df))
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=0)
d_train=lgb.Dataset(X_train, label=y_train)
params={}
params['learning_rate']=0.03
params['boosting_type']='gbdt' #GradientBoostingDecisionTree
params['objective']='binary' #Binary target feature
params['metric']='binary_logloss' #metric for binary classification
params['max_depth']=10
clf=lgb.train(params,d_train,100)
只得到错误信息:
ValueError: Series.dtypes must be int, float or bool
我知道这是因为我选择了 Y,但我也尝试过这里的数组和嵌套列表,但仍然失败。
使用 labelencoder 可以将您的列转换为预期格式:
from sklearn import preprocessing
encoder = preprocessing.LabelEncoder()
encoder.fit(df['sex'])
encoder.transform(df['sex'])
这将生成一个 0 和 1 列表,您可以将其输入到您的学习算法中:
array([1, 0, 0, 0, 1])