如何设置带有评估集的学习xgboost?
How set learning xgboost with evaluation set?
在使用 sklearn wrapper 时,这对我来说很容易做到:
import xgboost as xgb
clf = xgb.XGBClassifier( n_estimators=1500, learning_rate=0.015, gamma =0.3, min_child_weight = 3,nthread = 15,max_depth=150,
subsample=0.9, colsample_bytree=0.8, seed=2100, eval_metric = "rmse")
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
model = xgb.train(X_train, y_train, params,
evallist = [(X_valid, y_valid)],
verbose_eval = 50,
early_stopping_rounds=50)
但是我无法使用 xgboost 的标准 class 设置它:
params = {
'objective' : 'gpu:reg:linear',
'learning_rate': 0.02,
'gamma' : 0.3,
'min_child_weight' : 3,
'nthread' : 15,
'max_depth' : 30,
'subsample' : 0.9,
'colsample_bytree' : 0.8,
'seed':2100,
'eval_metric' : "rmse",
'num_boost_round' : 300
}
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
model = xgb.train(X_train, y_train, params,
evallist = [(X_valid, y_valid)],
verbose_eval = 50,
early_stopping_rounds=50)
#error TypeError: train() got an unexpected keyword argument 'evallist'
只需要正确指定参数即可:
params = {
#'objective' : 'gpu:reg:linear',
'tree_method':'gpu_hist',
'learning_rate': 0.02,
'gamma' : 0.3,
'min_child_weight' : 3,
'nthread' : 15,
'max_depth' : 30,
'subsample' : 0.9,
'colsample_bytree' : 0.8,
'seed':2100,
'eval_metric' : "rmse",
'num_boost_round' : 300,
'n_estimators':999,
'max_leaves': 300
}
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
tr_data = xgb.DMatrix(X_train, y_train)
va_data = xgb.DMatrix(X_valid, y_valid)
#del X_train, X_valid, y_train, y_valid ; gc.collect()
watchlist = [(tr_data, 'train'), (va_data, 'valid')]
model = xgb.train(params, tr_data, 300, watchlist, maximize=False, early_stopping_rounds = 30, verbose_eval=50)
在使用 sklearn wrapper 时,这对我来说很容易做到:
import xgboost as xgb
clf = xgb.XGBClassifier( n_estimators=1500, learning_rate=0.015, gamma =0.3, min_child_weight = 3,nthread = 15,max_depth=150,
subsample=0.9, colsample_bytree=0.8, seed=2100, eval_metric = "rmse")
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
model = xgb.train(X_train, y_train, params,
evallist = [(X_valid, y_valid)],
verbose_eval = 50,
early_stopping_rounds=50)
但是我无法使用 xgboost 的标准 class 设置它:
params = {
'objective' : 'gpu:reg:linear',
'learning_rate': 0.02,
'gamma' : 0.3,
'min_child_weight' : 3,
'nthread' : 15,
'max_depth' : 30,
'subsample' : 0.9,
'colsample_bytree' : 0.8,
'seed':2100,
'eval_metric' : "rmse",
'num_boost_round' : 300
}
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
model = xgb.train(X_train, y_train, params,
evallist = [(X_valid, y_valid)],
verbose_eval = 50,
early_stopping_rounds=50)
#error TypeError: train() got an unexpected keyword argument 'evallist'
只需要正确指定参数即可:
params = {
#'objective' : 'gpu:reg:linear',
'tree_method':'gpu_hist',
'learning_rate': 0.02,
'gamma' : 0.3,
'min_child_weight' : 3,
'nthread' : 15,
'max_depth' : 30,
'subsample' : 0.9,
'colsample_bytree' : 0.8,
'seed':2100,
'eval_metric' : "rmse",
'num_boost_round' : 300,
'n_estimators':999,
'max_leaves': 300
}
VALID = True
if VALID == True:
X_train, X_valid, y_train, y_valid = train_test_split(
X, y, test_size = 0.19, random_state=23)
tr_data = xgb.DMatrix(X_train, y_train)
va_data = xgb.DMatrix(X_valid, y_valid)
#del X_train, X_valid, y_train, y_valid ; gc.collect()
watchlist = [(tr_data, 'train'), (va_data, 'valid')]
model = xgb.train(params, tr_data, 300, watchlist, maximize=False, early_stopping_rounds = 30, verbose_eval=50)