如何设置带有评估集的学习xgboost?

How set learning xgboost with evaluation set?

在使用 sklearn wrapper 时,这对我来说很容易做到:

import xgboost as xgb
clf = xgb.XGBClassifier( n_estimators=1500, learning_rate=0.015, gamma =0.3, min_child_weight = 3,nthread = 15,max_depth=150,
                        subsample=0.9, colsample_bytree=0.8, seed=2100,  eval_metric = "rmse")

VALID = True
if VALID == True:
    X_train, X_valid, y_train, y_valid = train_test_split(
        X, y, test_size = 0.19, random_state=23)
    model = xgb.train(X_train, y_train,  params,
                      evallist = [(X_valid, y_valid)], 
                      verbose_eval = 50, 
            early_stopping_rounds=50)

但是我无法使用 xgboost 的标准 class 设置它:

params =   {
    'objective' : 'gpu:reg:linear',
    'learning_rate': 0.02, 
    'gamma' : 0.3, 
    'min_child_weight' : 3,
    'nthread' : 15,
    'max_depth' : 30,
    'subsample' : 0.9, 
    'colsample_bytree' : 0.8, 
    'seed':2100, 
    'eval_metric' : "rmse",
    'num_boost_round' : 300
}

VALID = True
if VALID == True:
    X_train, X_valid, y_train, y_valid = train_test_split(
        X, y, test_size = 0.19, random_state=23)
    model = xgb.train(X_train, y_train,  params,
                      evallist = [(X_valid, y_valid)], 
                      verbose_eval = 50, 
            early_stopping_rounds=50)

#error TypeError: train() got an unexpected keyword argument 'evallist'

只需要正确指定参数即可:

params =   {
    #'objective' : 'gpu:reg:linear',
    'tree_method':'gpu_hist',
    'learning_rate': 0.02, 
    'gamma' : 0.3, 
    'min_child_weight' : 3,
    'nthread' : 15,
    'max_depth' : 30,
    'subsample' : 0.9, 
    'colsample_bytree' : 0.8, 
    'seed':2100, 
    'eval_metric' : "rmse",
    'num_boost_round' : 300,
    'n_estimators':999,
    'max_leaves': 300
}

VALID = True
if VALID == True:
    X_train, X_valid, y_train, y_valid = train_test_split(
        X, y, test_size = 0.19, random_state=23)

    tr_data = xgb.DMatrix(X_train, y_train)
    va_data = xgb.DMatrix(X_valid, y_valid)


    #del X_train, X_valid, y_train, y_valid  ; gc.collect()

    watchlist = [(tr_data, 'train'), (va_data, 'valid')]

    model = xgb.train(params, tr_data, 300, watchlist, maximize=False, early_stopping_rounds = 30, verbose_eval=50)