BayesianOptimization 由于浮动错误而失败

BayesianOptimization fails due to float error

我想优化我的 lightgbm 模型的 HPO。我使用贝叶斯优化过程来做到这一点。遗憾的是我的算法无法收敛。

MRE

import warnings
import pandas as pd
import time
import numpy as np
warnings.filterwarnings("ignore")
import lightgbm as lgb
from bayes_opt import BayesianOptimization
import sklearn as sklearn
import pyprojroot
from sklearn.metrics import roc_auc_score, mean_squared_error
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
train = pd.DataFrame(housing['data'], columns=housing['feature_names'])
train_y = train.pop('MedInc')

params = {
            "objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
            "min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
            "learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
        }
train_data = lgb.Dataset(train, train_y,free_raw_data=False)


def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
    params = {
            "objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
            "min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
            "learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
        }
    params['feature_fraction'] = max(min(feature_fraction, 1), 0)
    params['max_depth'] = int(round(max_depth))
    params['num_leaves'] = int(round(num_leaves))
    params['min_gain_to_split'] = float(min_gain_to_split)
    params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
    cv_result = lgb.cv(params, train_data, nfold=5, seed=0, verbose_eval =200,stratified=False)
    return ( np.array(cv_result['rmse-mean'])).max()


gbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
                                            'max_depth': (5, 9),
                                            'num_leaves' : (1,300),
                                            'min_gain_to_split': (0.001, 0.1),
                                            'min_data_in_leaf': (5, 50)}, random_state=0)

lgbBO.maximize(init_points=5, n_iter=5,acq='ei')


def bayes_parameter_opt_lgb(train, train_y, init_round=15, opt_round=25, n_folds=5, random_seed=0, n_estimators=10000, learning_rate=0.05, output_process=False):
    # prepare data
    train_data = lgb.Dataset(train,train_y,free_raw_data=False)
    # parameters

    def lgb_eval(num_leaves, feature_fraction, max_depth , min_gain_to_split, min_data_in_leaf):
        params = {
            "objective" : "regression", "bagging_fraction" : 0.8, "bagging_freq": 1,
            "min_child_samples": 20, "reg_alpha": 1, "reg_lambda": 1,"boosting": "gbdt",
            "learning_rate" : 0.01, "subsample" : 0.8, "colsample_bytree" : 0.8, "verbosity": -1, "metric" : 'rmse'
        }
        params['feature_fraction'] = max(min(feature_fraction, 1), 0)
        params['max_depth'] = int(round(max_depth))
        params['num_leaves'] = int(round(num_leaves))
        params['min_gain_to_split'] = float(min_gain_to_split),
        params['min_data_in_leaf'] = int(np.round(min_data_in_leaf))
        cv_result = lgb.cv(params, train_data, nfold=n_folds, seed=random_seed, verbose_eval =200,stratified=False)
        return ( np.array(cv_result['rmse-mean'])).max()
    
        # range 
    lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
                                            'max_depth': (5, 9),
                                            'num_leaves' : (200,300),
                                            'min_gain_to_split': (0.001, 0.1),
                                            'min_data_in_leaf': (5, 50)}, random_state=0)
        # optimize
    lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')

        # output optimization process
    lgbBO.points_to_csv("bayes_opt_result.csv")

        # return best parameters
    return lgbBO.res['max']['max_params']

opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=200, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)

这导致以下堆栈跟踪:

---------------------------------------------------------------------------
StopIteration                             Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:179, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
    178 try:
--> 179     x_probe = next(self._queue)
    180 except StopIteration:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:25, in Queue.__next__(self)
     24 if self.empty:
---> 25     raise StopIteration("Queue is empty, no more objects to retrieve.")
     26 obj = self._queue[0]

StopIteration: Queue is empty, no more objects to retrieve.

During handling of the above exception, another exception occurred:

TypeError                                 Traceback (most recent call last)
..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in <cell line: 35>()
     32         # return best parameters
     33     return lgbBO.res['max']['max_params']
---> 35 opt_params = bayes_parameter_opt_lgb(train, train_y, init_round=20, opt_round=20, n_folds=5, random_seed=0, n_estimators=1000, learning_rate=0.01)

..\GitHub\Meister2\src\lgb_new.ipynb Cell 13' in bayes_parameter_opt_lgb(train, train_y, init_round, opt_round, n_folds, random_seed, n_estimators, learning_rate, output_process)
     21 lgbBO = BayesianOptimization(lgb_eval, {'feature_fraction': (0.1, 0.9),
     22                                         'max_depth': (5, 9),
     23                                         'num_leaves' : (200,300),
     24                                         'min_gain_to_split': (0.001, 0.1),
     25                                         'min_data_in_leaf': (5, 50)}, random_state=0)
     26     # optimize
---> 27 lgbBO.maximize(init_points=init_round, n_iter=opt_round,acq='ei')
     29     # output optimization process
     30 lgbBO.points_to_csv("bayes_opt_result.csv")

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:182, in BayesianOptimization.maximize(self, init_points, n_iter, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)
    180 except StopIteration:
    181     util.update_params()
--> 182     x_probe = self.suggest(util)
    183     iteration += 1
    185 self.probe(x_probe, lazy=False)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\bayesian_optimization.py:131, in BayesianOptimization.suggest(self, utility_function)
    128     self._gp.fit(self._space.params, self._space.target)
    130 # Finding argmax of the acquisition function.
--> 131 suggestion = acq_max(
    132     ac=utility_function.utility,
    133     gp=self._gp,
    134     y_max=self._space.target.max(),
    135     bounds=self._space.bounds,
    136     random_state=self._random_state
    137 )
    139 return self._space.array_to_params(suggestion)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\bayes_opt\util.py:65, in acq_max(ac, gp, y_max, bounds, random_state, n_warmup, n_iter)
     62     continue
     64 # Store it if better than previous minimum(maximum).
---> 65 if max_acq is None or -res.fun[0] >= max_acq:
     66     x_max = res.x
     67     max_acq = -res.fun[0]

TypeError: 'float' object is not subscriptable

编辑:堆栈跟踪上方的 MRE 应该会导致随后的编程错误。正如堆栈跟踪所暗示的那样, -res.fun[0] 看起来应该是一个列表,因此是可订阅的(第 65 行,堆栈跟踪的结尾),但它不是,我不明白为什么。 此列表分配给 max_acq,它是最大化函数 acq_max() 的一部分(堆栈跟踪的第 131 行),高斯过程本身是 BayesianOptimization 函数的一部分(堆栈跟踪的第 27 行) )

为什么我会收到 TypeError: 'float' object is not subscriptable,如何解决?

这与 scipy 1.8.0 中的更改有关, 应该使用 -np.squeeze(res.fun) 而不是 -res.fun[0]

https://github.com/fmfn/BayesianOptimization/issues/300

错误报告中的评论表明恢复到 scipy 1.7.0 可以解决此问题,

似乎在 BayesianOptimization 包中提出了修复: https://github.com/fmfn/BayesianOptimization/pull/303

但这还没有被合并和发布,所以你可以: