TypeError: __init__() got an unexpected keyword argument 'early_stopping_rounds' for CatBoost in Python
TypeError: __init__() got an unexpected keyword argument 'early_stopping_rounds' for CatBoost in Python
当我尝试 运行 CatBoost 算法时出现此错误。它是参数之一,所以我不明白为什么当我注释掉 early_stopping_rounds 时会出现此错误 我对 cat_features 参数
得到相同的错误
from catboost import CatBoostClassifier
categorical_indexes = np.where(X.dtypes == 'object')[0]
X.drop(["id"], axis = 1, inplace = True)
params = {'loss_function':'Logloss',
'eval_metric':'AUC',
'verbose': 200,
"early_stopping_rounds": 200,
"cat_features": categorical_indexes,
'random_seed': 17}
cat = CatBoostClassifier(**params)
cat.fit(X,y)
编辑:
通过 pip 升级了 catboost 包,现在我收到另一个错误
但是,在我的代码中,我过滤分类值然后将它们分配给 cat_features 参数
TypeError Traceback (most recent call last)
_catboost.pyx in _catboost.get_float_feature()
_catboost.pyx in _catboost._FloatOrNan()
_catboost.pyx in _catboost._FloatOrNanFromString()
TypeError: Cannot convert 'b'Triangle'' to float
During handling of the above exception, another exception occurred:
CatBoostError Traceback (most recent call last)
<ipython-input-13-1190699fd260> in <module>
74
75 cat = CatBoostClassifier(loss_function='Logloss', eval_metric='AUC', verbose=200, early_stopping_rounds=200, cat_features=np.array([ 6, 7, 8, 9, 10, 11, 12, 13, 14, 18, 19, 20]), random_seed=17)
---> 76 cat.fit(X,y)
77
78
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in fit(self, X, y, cat_features, text_features, sample_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
3786 self._fit(X, y, cat_features, text_features, None, sample_weight, None, None, None, None, baseline, use_best_model,
3787 eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period,
-> 3788 silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
3789 return self
3790
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in _fit(self, X, y, cat_features, text_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
1670 use_best_model, eval_set, verbose, logging_level, plot,
1671 column_description, verbose_eval, metric_period, silent, early_stopping_rounds,
-> 1672 save_snapshot, snapshot_file, snapshot_interval, init_model
1673 )
1674 params = train_params["params"]
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in _prepare_train_params(self, X, y, cat_features, text_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, use_best_model, eval_set, verbose, logging_level, plot, column_description, verbose_eval, metric_period, silent, early_stopping_rounds, save_snapshot, snapshot_file, snapshot_interval, init_model)
1558
1559 train_pool = _build_train_pool(X, y, cat_features, text_features, pairs, sample_weight, group_id,
-> 1560 group_weight, subgroup_id, pairs_weight, baseline, column_description)
1561 if train_pool.is_empty_:
1562 raise CatBoostError("X is empty.")
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in _build_train_pool(X, y, cat_features, text_features, pairs, sample_weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, column_description)
968 raise CatBoostError("y has not initialized in fit(): X is not catboost.Pool object, y must be not None in fit().")
969 train_pool = Pool(X, y, cat_features=cat_features, text_features=text_features, pairs=pairs, weight=sample_weight, group_id=group_id,
--> 970 group_weight=group_weight, subgroup_id=subgroup_id, pairs_weight=pairs_weight, baseline=baseline)
971 return train_pool
972
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in __init__(self, data, label, cat_features, text_features, column_description, pairs, delimiter, has_header, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names, thread_count)
369 )
370
--> 371 self._init(data, label, cat_features, text_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names)
372 super(Pool, self).__init__()
373
c:\users\m\appdata\local\programs\python\python37\lib\site-packages\catboost\core.py in _init(self, data, label, cat_features, text_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names)
949 baseline = np.reshape(baseline, (samples_count, -1))
950 self._check_baseline_shape(baseline, samples_count)
--> 951 self._init_pool(data, label, cat_features, text_features, pairs, weight, group_id, group_weight, subgroup_id, pairs_weight, baseline, feature_names)
952
953
_catboost.pyx in _catboost._PoolBase._init_pool()
_catboost.pyx in _catboost._PoolBase._init_pool()
_catboost.pyx in _catboost._PoolBase._init_features_order_layout_pool()
_catboost.pyx in _catboost._set_features_order_data_pd_data_frame()
_catboost.pyx in _catboost.create_num_factor_data()
_catboost.pyx in _catboost.get_float_feature()
CatBoostError: Bad value for num_feature[non_default_doc_idx=0,feature_idx=5]="Triangle": Cannot convert 'b'Triangle'' to float
您似乎错过了在 "feature_idx=5" 处为值为“三角形”的分类特征指定特征索引或特征名称。 Catboost 尝试将该二进制字符串值转换为 float
并获得 CatBoostError
.
您可以在构造 class CatBoostClassifier
时将该分类特征索引或名称更新到您的变量 categorical_indexes
,或者您可以提供分类特征索引或名称的列表以fit()
cat_features
我刚刚尝试检查官方 catboost 算法页面中是否存在此参数 ('sampling_frequency'),现在似乎已将其删除 https://catboost.ai/docs/concepts/python-reference_catboost_fit.html#python-reference_catboost_fit__parameters
我在这里 this answer 关于捕获所有对象变量以避免在使用 catboost 进行开发时出现此错误。
# Get all object variables from X_train
cat_vars = [var for var in X_train.columns if X_train[var].dtype == "O"]
# Initialize model with cat_vars
rf = CatBoostClassifier(
cat_features = cat_vars,
eval_metric="RMSE",
learning_rate=0.13,
iterations=500,
)
# Fit & Predict
rf.fit(X_train, y_train, plot=True, eval_set=(X_test, y_test))
pred_rf = rf.predict(X_test)