将 XGBoost xgb.train 的输出保存为带有 python 日志记录的日志文件
Save the output of xgb.train of XGBoost as a log file with python logging
我试图通过 logging
将 XGBoost xgb.train
的输出保存为日志文件,但无法记录输出。我怎样才能记录下来?我试图参考现有的 Whosebug 问题,但这是不可能的。我希望你用一个具体的例子来展示它。
import sys
import logging
# ---------------------------------------------- #
# Some logging settings
# ---------------------------------------------- #
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_digits
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth':2, 'eta':0.3, 'silent':1, 'objective':'binary:logistic' }
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# I want to record this output.
# Zeros and Ones from the Digits dataset: binary classification
# [0] eval-error:0.011111 train-error:0.011111
# [1] eval-error:0.011111 train-error:0.005556
# [0] eval-error:0.016667 train-error:0.005556
# [1] eval-error:0.005556 train-error:0
import sys
%logstart -o "test.log"
sys.stdout = open('test.log', 'a')
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_digits
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth':2, 'eta':0.3, 'silent':1, 'objective':'binary:logistic' }
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
这将开始保存文件 test.log 中的所有内容。输出和输入。
xgboost 将它们的日志直接打印到标准输出中,您无法更改该行为。
但是 xgb.train
的 callbacks
参数能够记录结果与内部打印相同的时间。
以下代码是使用回调将 xgboost 日志记录到记录器中的示例。
log_evaluation()
returns 从 xgboost 内部调用的回调函数,您可以将回调函数添加到 callbacks
from logging import getLogger, basicConfig, INFO
import numpy as np
import xgboost as xgb
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
# Some logging settings
basicConfig(level=INFO)
logger = getLogger(__name__)
def log_evaluation(period=1, show_stdv=True):
"""Create a callback that logs evaluation result with logger.
Parameters
----------
period : int
The period to log the evaluation results
show_stdv : bool, optional
Whether show stdv if provided
Returns
-------
callback : function
A callback that logs evaluation every period iterations into logger.
"""
def _fmt_metric(value, show_stdv=True):
"""format metric string"""
if len(value) == 2:
return '%s:%g' % (value[0], value[1])
elif len(value) == 3:
if show_stdv:
return '%s:%g+%g' % (value[0], value[1], value[2])
else:
return '%s:%g' % (value[0], value[1])
else:
raise ValueError("wrong metric value")
def callback(env):
if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False:
return
i = env.iteration
if i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration:
msg = '\t'.join([_fmt_metric(x, show_stdv) for x in env.evaluation_result_list])
logger.info('[%d]\t%s\n' % (i, msg))
return callback
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth': 2, 'eta': 0.3, 'silent': 1, 'objective': 'binary:logistic'}
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
# add logger
callbacks = [log_evaluation(1, True)]
bst = xgb.train(param, dtrain, num_round, watchlist, callbacks=callbacks)
我试图通过 logging
将 XGBoost xgb.train
的输出保存为日志文件,但无法记录输出。我怎样才能记录下来?我试图参考现有的 Whosebug 问题,但这是不可能的。我希望你用一个具体的例子来展示它。
import sys
import logging
# ---------------------------------------------- #
# Some logging settings
# ---------------------------------------------- #
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_digits
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth':2, 'eta':0.3, 'silent':1, 'objective':'binary:logistic' }
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
# I want to record this output.
# Zeros and Ones from the Digits dataset: binary classification
# [0] eval-error:0.011111 train-error:0.011111
# [1] eval-error:0.011111 train-error:0.005556
# [0] eval-error:0.016667 train-error:0.005556
# [1] eval-error:0.005556 train-error:0
import sys
%logstart -o "test.log"
sys.stdout = open('test.log', 'a')
import xgboost as xgb
import numpy as np
from sklearn.model_selection import KFold
from sklearn.datasets import load_digits
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth':2, 'eta':0.3, 'silent':1, 'objective':'binary:logistic' }
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest,'eval'), (dtrain,'train')]
num_round = 2
bst = xgb.train(param, dtrain, num_round, watchlist)
这将开始保存文件 test.log 中的所有内容。输出和输入。
xgboost 将它们的日志直接打印到标准输出中,您无法更改该行为。
但是 xgb.train
的 callbacks
参数能够记录结果与内部打印相同的时间。
以下代码是使用回调将 xgboost 日志记录到记录器中的示例。
log_evaluation()
returns 从 xgboost 内部调用的回调函数,您可以将回调函数添加到 callbacks
from logging import getLogger, basicConfig, INFO
import numpy as np
import xgboost as xgb
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold
# Some logging settings
basicConfig(level=INFO)
logger = getLogger(__name__)
def log_evaluation(period=1, show_stdv=True):
"""Create a callback that logs evaluation result with logger.
Parameters
----------
period : int
The period to log the evaluation results
show_stdv : bool, optional
Whether show stdv if provided
Returns
-------
callback : function
A callback that logs evaluation every period iterations into logger.
"""
def _fmt_metric(value, show_stdv=True):
"""format metric string"""
if len(value) == 2:
return '%s:%g' % (value[0], value[1])
elif len(value) == 3:
if show_stdv:
return '%s:%g+%g' % (value[0], value[1], value[2])
else:
return '%s:%g' % (value[0], value[1])
else:
raise ValueError("wrong metric value")
def callback(env):
if env.rank != 0 or len(env.evaluation_result_list) == 0 or period is False:
return
i = env.iteration
if i % period == 0 or i + 1 == env.begin_iteration or i + 1 == env.end_iteration:
msg = '\t'.join([_fmt_metric(x, show_stdv) for x in env.evaluation_result_list])
logger.info('[%d]\t%s\n' % (i, msg))
return callback
rng = np.random.RandomState(31337)
print("Zeros and Ones from the Digits dataset: binary classification")
digits = load_digits(2)
y = digits['target']
X = digits['data']
kf = KFold(n_splits=2, shuffle=True, random_state=rng)
for train_index, test_index in kf.split(X):
param = {'max_depth': 2, 'eta': 0.3, 'silent': 1, 'objective': 'binary:logistic'}
dtrain = xgb.DMatrix(X[train_index], y[train_index])
dtest = xgb.DMatrix(X[test_index], y[test_index])
# specify validations set to watch performance
watchlist = [(dtest, 'eval'), (dtrain, 'train')]
num_round = 2
# add logger
callbacks = [log_evaluation(1, True)]
bst = xgb.train(param, dtrain, num_round, watchlist, callbacks=callbacks)