BayesianOptimization 由于浮动错误而失败
BayesianOptimization fails due to float error
我想优化我的 lightgbm 模型的 HPO。我使用贝叶斯优化过程来做到这一点。遗憾的是我的算法无法收敛。
MRE
import warnings
import pandas as pd
import time
import numpy as np
warnings.filterwarnings("ignore")
import lightgbm as lgb
from bayes_opt import BayesianOptimization
import sklearn as sklearn
import pyprojroot
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
train = pd.DataFrame(housing['data'], columns=housing['feature_names'])
train_y = train.pop('MedInc')
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
train_data = lgb.Dataset(train, train_y,free_raw_data=False)
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split)
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result = lgb.cv(params, train_data, nfold=5, seed=0, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
gbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (1,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
lgbBO.maximize(init_points=5, n_iter=5,acq='ei')
def bayes_parameter_opt_lgb(train, train_y, init_round=15, opt_round=25, n_folds=5, random_seed=0, n_estimators=10000, learning_rate=0.05, output_process=False):
# prepare data
train_data = lgb.Dataset(train,train_y,free_raw_data=False)
# parameters
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split),
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
# range
lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (200,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
# optimize
lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
# output optimization process
lgbBO.points_to_csv("bayes_opt_result.csv")
# return best parameters
return lgbBO.res['max']['max_params']
opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=200, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
这导致以下堆栈跟踪:
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:179, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
178 try:
--> 179 x_probe = next(self._queue)
180 except StopIteration:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:25, in Queue.__next__(self)
24 if self.empty:
---> 25 raise StopIteration("Queue is empty, no more objects to retrieve.")
26 obj = self._queue[0]
StopIteration: Queue is empty, no more objects to retrieve.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in <cell line: 35>()
32 # return best parameters
33 return lgbBO.res['max']['max_params']
---> 35 opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=20, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in bayes_parameter_opt_lgb(train, train_y, init_round, opt_round, n_folds, random_seed, n_estimators, learning_rate, output_process)
21 lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
22 'max_depth': (5, 9),
23 'num_leaves' : (200,300),
24 'min_gain_to_split': (0.001, 0.1),
25 'min_data_in_leaf': (5, 50)}, random_state=0)
26 # optimize
---> 27 lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
29 # output optimization process
30 lgbBO.points_to_csv("bayes_opt_result.csv")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:182, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
180 except StopIteration:
181 util.update_params()
--> 182 x_probe = self.suggest(util)
183 iteration += 1
185 self.probe(x_probe, lazy=False)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:131, in BayesianOptimization.suggest(self, utility_function)
128 self._gp.fit(self._space.params, self._space.target)
130 # Finding argmax of the acquisition function.
--> 131 suggestion = acq_max(
132 ac=utility_function.utility,
133 gp=self._gp,
134 y_max=self._space.target.max(),
135 bounds=self._space.bounds,
136 random_state=self._random_state
137 )
139 return self._space.array_to_params(suggestion)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\util.py:65, in acq_max(ac, gp, y_max, bounds, random_state, n_warmup, n_iter)
62 continue
64 # Store it if better than previous minimum(maximum).
---> 65 if max_acq is None or -res.fun[0] >= max_acq:
66 x_max = res.x
67 max_acq = -res.fun[0]
TypeError: 'float' object is not subscriptable
编辑:堆栈跟踪上方的 MRE 应该会导致随后的编程错误。正如堆栈跟踪所暗示的那样, -res.fun[0]
看起来应该是一个列表,因此是可订阅的(第 65 行,堆栈跟踪的结尾),但它不是,我不明白为什么。
此列表分配给 max_acq
,它是最大化函数 acq_max()
的一部分(堆栈跟踪的第 131 行),高斯过程本身是 BayesianOptimization
函数的一部分(堆栈跟踪的第 27 行) )
为什么我会收到 TypeError: 'float' object is not subscriptable
,如何解决?
这与 scipy 1.8.0 中的更改有关,
应该使用 -np.squeeze(res.fun)
而不是 -res.fun[0]
https://github.com/fmfn/BayesianOptimization/issues/300
错误报告中的评论表明恢复到 scipy 1.7.0 可以解决此问题,
似乎在 BayesianOptimization 包中提出了修复:
https://github.com/fmfn/BayesianOptimization/pull/303
但这还没有被合并和发布,所以你可以:
- 回到 scipy 1.7.0
- 将分叉的 github 版本的 BayesianOptimization 与补丁一起使用 (https://github.com/samFarrellDay/BayesianOptimization)
- 在您的系统上手动应用问题 303 中的补丁
我想优化我的 lightgbm 模型的 HPO。我使用贝叶斯优化过程来做到这一点。遗憾的是我的算法无法收敛。
MRE
import warnings
import pandas as pd
import time
import numpy as np
warnings.filterwarnings("ignore")
import lightgbm as lgb
from bayes_opt import BayesianOptimization
import sklearn as sklearn
import pyprojroot
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
train = pd.DataFrame(housing['data'], columns=housing['feature_names'])
train_y = train.pop('MedInc')
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
train_data = lgb.Dataset(train, train_y,free_raw_data=False)
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split)
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result = lgb.cv(params, train_data, nfold=5, seed=0, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
gbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (1,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
lgbBO.maximize(init_points=5, n_iter=5,acq='ei')
def bayes_parameter_opt_lgb(train, train_y, init_round=15, opt_round=25, n_folds=5, random_seed=0, n_estimators=10000, learning_rate=0.05, output_process=False):
# prepare data
train_data = lgb.Dataset(train,train_y,free_raw_data=False)
# parameters
def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
params = {
"objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
"min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
"learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
}
params['feature_fraction'] = max(min(feature_fraction, 1), 0)
params['max_depth'] = int(round(max_depth))
params['num_leaves'] = int(round(num_leaves))
params['min_gain_to_split'] = float(min_gain_to_split),
params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, verbose_eval =200,stratified=False)
return ( np.array(cv_result['rmse-mean'])).max()
# range
lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
'max_depth': (5, 9),
'num_leaves' : (200,300),
'min_gain_to_split': (0.001, 0.1),
'min_data_in_leaf': (5, 50)}, random_state=0)
# optimize
lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
# output optimization process
lgbBO.points_to_csv("bayes_opt_result.csv")
# return best parameters
return lgbBO.res['max']['max_params']
opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=200, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
这导致以下堆栈跟踪:
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:179, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
178 try:
--> 179 x_probe = next(self._queue)
180 except StopIteration:
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:25, in Queue.__next__(self)
24 if self.empty:
---> 25 raise StopIteration("Queue is empty, no more objects to retrieve.")
26 obj = self._queue[0]
StopIteration: Queue is empty, no more objects to retrieve.
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in <cell line: 35>()
32 # return best parameters
33 return lgbBO.res['max']['max_params']
---> 35 opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=20, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in bayes_parameter_opt_lgb(train, train_y, init_round, opt_round, n_folds, random_seed, n_estimators, learning_rate, output_process)
21 lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
22 'max_depth': (5, 9),
23 'num_leaves' : (200,300),
24 'min_gain_to_split': (0.001, 0.1),
25 'min_data_in_leaf': (5, 50)}, random_state=0)
26 # optimize
---> 27 lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
29 # output optimization process
30 lgbBO.points_to_csv("bayes_opt_result.csv")
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:182, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
180 except StopIteration:
181 util.update_params()
--> 182 x_probe = self.suggest(util)
183 iteration += 1
185 self.probe(x_probe, lazy=False)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:131, in BayesianOptimization.suggest(self, utility_function)
128 self._gp.fit(self._space.params, self._space.target)
130 # Finding argmax of the acquisition function.
--> 131 suggestion = acq_max(
132 ac=utility_function.utility,
133 gp=self._gp,
134 y_max=self._space.target.max(),
135 bounds=self._space.bounds,
136 random_state=self._random_state
137 )
139 return self._space.array_to_params(suggestion)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\util.py:65, in acq_max(ac, gp, y_max, bounds, random_state, n_warmup, n_iter)
62 continue
64 # Store it if better than previous minimum(maximum).
---> 65 if max_acq is None or -res.fun[0] >= max_acq:
66 x_max = res.x
67 max_acq = -res.fun[0]
TypeError: 'float' object is not subscriptable
编辑:堆栈跟踪上方的 MRE 应该会导致随后的编程错误。正如堆栈跟踪所暗示的那样, -res.fun[0]
看起来应该是一个列表,因此是可订阅的(第 65 行,堆栈跟踪的结尾),但它不是,我不明白为什么。
此列表分配给 max_acq
,它是最大化函数 acq_max()
的一部分(堆栈跟踪的第 131 行),高斯过程本身是 BayesianOptimization
函数的一部分(堆栈跟踪的第 27 行) )
为什么我会收到 TypeError: 'float' object is not subscriptable
,如何解决?
这与 scipy 1.8.0 中的更改有关,
应该使用 -np.squeeze(res.fun)
而不是 -res.fun[0]
https://github.com/fmfn/BayesianOptimization/issues/300
错误报告中的评论表明恢复到 scipy 1.7.0 可以解决此问题,
似乎在 BayesianOptimization 包中提出了修复: https://github.com/fmfn/BayesianOptimization/pull/303
但这还没有被合并和发布,所以你可以:
- 回到 scipy 1.7.0
- 将分叉的 github 版本的 BayesianOptimization 与补丁一起使用 (https://github.com/samFarrellDay/BayesianOptimization)
- 在您的系统上手动应用问题 303 中的补丁